10b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __EMMINTRIN_H 110b57cec5SDimitry Andric #define __EMMINTRIN_H 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include <xmmintrin.h> 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); 160b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); 190b57cec5SDimitry Andric typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric /* Type defines. */ 220b57cec5SDimitry Andric typedef double __v2df __attribute__ ((__vector_size__ (16))); 230b57cec5SDimitry Andric typedef long long __v2di __attribute__ ((__vector_size__ (16))); 240b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16))); 250b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16))); 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric /* Unsigned types */ 280b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); 290b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16))); 300b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16))); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 330b57cec5SDimitry Andric * appear in the interface though. */ 340b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16))); 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 370b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128))) 380b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64))) 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the 410b57cec5SDimitry Andric /// sum in the lower 64 bits of the result. The upper 64 bits of the result 420b57cec5SDimitry Andric /// are copied from the upper double-precision value of the first operand. 430b57cec5SDimitry Andric /// 440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 450b57cec5SDimitry Andric /// 460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. 470b57cec5SDimitry Andric /// 480b57cec5SDimitry Andric /// \param __a 490b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 500b57cec5SDimitry Andric /// \param __b 510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 520b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 530b57cec5SDimitry Andric /// sum of the lower 64 bits of both operands. The upper 64 bits are copied 540b57cec5SDimitry Andric /// from the upper 64 bits of the first source operand. 550b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 560b57cec5SDimitry Andric _mm_add_sd(__m128d __a, __m128d __b) 570b57cec5SDimitry Andric { 580b57cec5SDimitry Andric __a[0] += __b[0]; 590b57cec5SDimitry Andric return __a; 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double]. 630b57cec5SDimitry Andric /// 640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 650b57cec5SDimitry Andric /// 660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. 670b57cec5SDimitry Andric /// 680b57cec5SDimitry Andric /// \param __a 690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 700b57cec5SDimitry Andric /// \param __b 710b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 720b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both 730b57cec5SDimitry Andric /// operands. 740b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 750b57cec5SDimitry Andric _mm_add_pd(__m128d __a, __m128d __b) 760b57cec5SDimitry Andric { 770b57cec5SDimitry Andric return (__m128d)((__v2df)__a + (__v2df)__b); 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand 810b57cec5SDimitry Andric /// from the lower double-precision value of the first operand and returns 820b57cec5SDimitry Andric /// the difference in the lower 64 bits of the result. The upper 64 bits of 830b57cec5SDimitry Andric /// the result are copied from the upper double-precision value of the first 840b57cec5SDimitry Andric /// operand. 850b57cec5SDimitry Andric /// 860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 870b57cec5SDimitry Andric /// 880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. 890b57cec5SDimitry Andric /// 900b57cec5SDimitry Andric /// \param __a 910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 920b57cec5SDimitry Andric /// \param __b 930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 940b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 950b57cec5SDimitry Andric /// difference of the lower 64 bits of both operands. The upper 64 bits are 960b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 970b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 980b57cec5SDimitry Andric _mm_sub_sd(__m128d __a, __m128d __b) 990b57cec5SDimitry Andric { 1000b57cec5SDimitry Andric __a[0] -= __b[0]; 1010b57cec5SDimitry Andric return __a; 1020b57cec5SDimitry Andric } 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double]. 1050b57cec5SDimitry Andric /// 1060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1070b57cec5SDimitry Andric /// 1080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. 1090b57cec5SDimitry Andric /// 1100b57cec5SDimitry Andric /// \param __a 1110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1120b57cec5SDimitry Andric /// \param __b 1130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1140b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between 1150b57cec5SDimitry Andric /// both operands. 1160b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1170b57cec5SDimitry Andric _mm_sub_pd(__m128d __a, __m128d __b) 1180b57cec5SDimitry Andric { 1190b57cec5SDimitry Andric return (__m128d)((__v2df)__a - (__v2df)__b); 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns 1230b57cec5SDimitry Andric /// the product in the lower 64 bits of the result. The upper 64 bits of the 1240b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1250b57cec5SDimitry Andric /// operand. 1260b57cec5SDimitry Andric /// 1270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1280b57cec5SDimitry Andric /// 1290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. 1300b57cec5SDimitry Andric /// 1310b57cec5SDimitry Andric /// \param __a 1320b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1330b57cec5SDimitry Andric /// \param __b 1340b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1350b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1360b57cec5SDimitry Andric /// product of the lower 64 bits of both operands. The upper 64 bits are 1370b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 1380b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1390b57cec5SDimitry Andric _mm_mul_sd(__m128d __a, __m128d __b) 1400b57cec5SDimitry Andric { 1410b57cec5SDimitry Andric __a[0] *= __b[0]; 1420b57cec5SDimitry Andric return __a; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double]. 1460b57cec5SDimitry Andric /// 1470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1480b57cec5SDimitry Andric /// 1490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. 1500b57cec5SDimitry Andric /// 1510b57cec5SDimitry Andric /// \param __a 1520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1530b57cec5SDimitry Andric /// \param __b 1540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1550b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both 1560b57cec5SDimitry Andric /// operands. 1570b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1580b57cec5SDimitry Andric _mm_mul_pd(__m128d __a, __m128d __b) 1590b57cec5SDimitry Andric { 1600b57cec5SDimitry Andric return (__m128d)((__v2df)__a * (__v2df)__b); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the 1640b57cec5SDimitry Andric /// lower double-precision value of the second operand and returns the 1650b57cec5SDimitry Andric /// quotient in the lower 64 bits of the result. The upper 64 bits of the 1660b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1670b57cec5SDimitry Andric /// operand. 1680b57cec5SDimitry Andric /// 1690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1700b57cec5SDimitry Andric /// 1710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. 1720b57cec5SDimitry Andric /// 1730b57cec5SDimitry Andric /// \param __a 1740b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1750b57cec5SDimitry Andric /// \param __b 1760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing divisor. 1770b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1780b57cec5SDimitry Andric /// quotient of the lower 64 bits of both operands. The upper 64 bits are 1790b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 1800b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1810b57cec5SDimitry Andric _mm_div_sd(__m128d __a, __m128d __b) 1820b57cec5SDimitry Andric { 1830b57cec5SDimitry Andric __a[0] /= __b[0]; 1840b57cec5SDimitry Andric return __a; 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of 1880b57cec5SDimitry Andric /// [2 x double]. 1890b57cec5SDimitry Andric /// 1900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1910b57cec5SDimitry Andric /// 1920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. 1930b57cec5SDimitry Andric /// 1940b57cec5SDimitry Andric /// \param __a 1950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1960b57cec5SDimitry Andric /// \param __b 1970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the divisor. 1980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both 1990b57cec5SDimitry Andric /// operands. 2000b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2010b57cec5SDimitry Andric _mm_div_pd(__m128d __a, __m128d __b) 2020b57cec5SDimitry Andric { 2030b57cec5SDimitry Andric return (__m128d)((__v2df)__a / (__v2df)__b); 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of 2070b57cec5SDimitry Andric /// the second operand and returns it in the lower 64 bits of the result. 2080b57cec5SDimitry Andric /// The upper 64 bits of the result are copied from the upper 2090b57cec5SDimitry Andric /// double-precision value of the first operand. 2100b57cec5SDimitry Andric /// 2110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2120b57cec5SDimitry Andric /// 2130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. 2140b57cec5SDimitry Andric /// 2150b57cec5SDimitry Andric /// \param __a 2160b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2170b57cec5SDimitry Andric /// upper 64 bits of this operand are copied to the upper 64 bits of the 2180b57cec5SDimitry Andric /// result. 2190b57cec5SDimitry Andric /// \param __b 2200b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2210b57cec5SDimitry Andric /// square root is calculated using the lower 64 bits of this operand. 2220b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2230b57cec5SDimitry Andric /// square root of the lower 64 bits of operand \a __b, and whose upper 64 2240b57cec5SDimitry Andric /// bits are copied from the upper 64 bits of operand \a __a. 2250b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2260b57cec5SDimitry Andric _mm_sqrt_sd(__m128d __a, __m128d __b) 2270b57cec5SDimitry Andric { 2280b57cec5SDimitry Andric __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); 2290b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a 2330b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 2340b57cec5SDimitry Andric /// 2350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2360b57cec5SDimitry Andric /// 2370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. 2380b57cec5SDimitry Andric /// 2390b57cec5SDimitry Andric /// \param __a 2400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 2410b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the 2420b57cec5SDimitry Andric /// values in the operand. 2430b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2440b57cec5SDimitry Andric _mm_sqrt_pd(__m128d __a) 2450b57cec5SDimitry Andric { 2460b57cec5SDimitry Andric return __builtin_ia32_sqrtpd((__v2df)__a); 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2500b57cec5SDimitry Andric /// returns the lesser of the pair of values in the lower 64-bits of the 2510b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2520b57cec5SDimitry Andric /// double-precision value of the first operand. 2530b57cec5SDimitry Andric /// 2540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2550b57cec5SDimitry Andric /// 2560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. 2570b57cec5SDimitry Andric /// 2580b57cec5SDimitry Andric /// \param __a 2590b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2600b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2610b57cec5SDimitry Andric /// \param __b 2620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2630b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2640b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2650b57cec5SDimitry Andric /// minimum value between both operands. The upper 64 bits are copied from 2660b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 2670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2680b57cec5SDimitry Andric _mm_min_sd(__m128d __a, __m128d __b) 2690b57cec5SDimitry Andric { 2700b57cec5SDimitry Andric return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); 2710b57cec5SDimitry Andric } 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 2740b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the lesser of each pair of 2750b57cec5SDimitry Andric /// values. 2760b57cec5SDimitry Andric /// 2770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2780b57cec5SDimitry Andric /// 2790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. 2800b57cec5SDimitry Andric /// 2810b57cec5SDimitry Andric /// \param __a 2820b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2830b57cec5SDimitry Andric /// \param __b 2840b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2850b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values 2860b57cec5SDimitry Andric /// between both operands. 2870b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2880b57cec5SDimitry Andric _mm_min_pd(__m128d __a, __m128d __b) 2890b57cec5SDimitry Andric { 2900b57cec5SDimitry Andric return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2940b57cec5SDimitry Andric /// returns the greater of the pair of values in the lower 64-bits of the 2950b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2960b57cec5SDimitry Andric /// double-precision value of the first operand. 2970b57cec5SDimitry Andric /// 2980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2990b57cec5SDimitry Andric /// 3000b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. 3010b57cec5SDimitry Andric /// 3020b57cec5SDimitry Andric /// \param __a 3030b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3040b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3050b57cec5SDimitry Andric /// \param __b 3060b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3070b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3080b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 3090b57cec5SDimitry Andric /// maximum value between both operands. The upper 64 bits are copied from 3100b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 3110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3120b57cec5SDimitry Andric _mm_max_sd(__m128d __a, __m128d __b) 3130b57cec5SDimitry Andric { 3140b57cec5SDimitry Andric return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 3180b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the greater of each pair 3190b57cec5SDimitry Andric /// of values. 3200b57cec5SDimitry Andric /// 3210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3220b57cec5SDimitry Andric /// 3230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. 3240b57cec5SDimitry Andric /// 3250b57cec5SDimitry Andric /// \param __a 3260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3270b57cec5SDimitry Andric /// \param __b 3280b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3290b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values 3300b57cec5SDimitry Andric /// between both operands. 3310b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3320b57cec5SDimitry Andric _mm_max_pd(__m128d __a, __m128d __b) 3330b57cec5SDimitry Andric { 3340b57cec5SDimitry Andric return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. 3380b57cec5SDimitry Andric /// 3390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3400b57cec5SDimitry Andric /// 3410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 3420b57cec5SDimitry Andric /// 3430b57cec5SDimitry Andric /// \param __a 3440b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3450b57cec5SDimitry Andric /// \param __b 3460b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3470b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3480b57cec5SDimitry Andric /// values between both operands. 3490b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3500b57cec5SDimitry Andric _mm_and_pd(__m128d __a, __m128d __b) 3510b57cec5SDimitry Andric { 3520b57cec5SDimitry Andric return (__m128d)((__v2du)__a & (__v2du)__b); 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using 3560b57cec5SDimitry Andric /// the one's complement of the values contained in the first source operand. 3570b57cec5SDimitry Andric /// 3580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3590b57cec5SDimitry Andric /// 3600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 3610b57cec5SDimitry Andric /// 3620b57cec5SDimitry Andric /// \param __a 3630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. The 3640b57cec5SDimitry Andric /// one's complement of this value is used in the bitwise AND. 3650b57cec5SDimitry Andric /// \param __b 3660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 3670b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3680b57cec5SDimitry Andric /// values in the second operand and the one's complement of the first 3690b57cec5SDimitry Andric /// operand. 3700b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3710b57cec5SDimitry Andric _mm_andnot_pd(__m128d __a, __m128d __b) 3720b57cec5SDimitry Andric { 3730b57cec5SDimitry Andric return (__m128d)(~(__v2du)__a & (__v2du)__b); 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. 3770b57cec5SDimitry Andric /// 3780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3790b57cec5SDimitry Andric /// 3800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 3810b57cec5SDimitry Andric /// 3820b57cec5SDimitry Andric /// \param __a 3830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3840b57cec5SDimitry Andric /// \param __b 3850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3860b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the 3870b57cec5SDimitry Andric /// values between both operands. 3880b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3890b57cec5SDimitry Andric _mm_or_pd(__m128d __a, __m128d __b) 3900b57cec5SDimitry Andric { 3910b57cec5SDimitry Andric return (__m128d)((__v2du)__a | (__v2du)__b); 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. 3950b57cec5SDimitry Andric /// 3960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3970b57cec5SDimitry Andric /// 3980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 3990b57cec5SDimitry Andric /// 4000b57cec5SDimitry Andric /// \param __a 4010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4020b57cec5SDimitry Andric /// \param __b 4030b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4040b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the 4050b57cec5SDimitry Andric /// values between both operands. 4060b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4070b57cec5SDimitry Andric _mm_xor_pd(__m128d __a, __m128d __b) 4080b57cec5SDimitry Andric { 4090b57cec5SDimitry Andric return (__m128d)((__v2du)__a ^ (__v2du)__b); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4130b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 4140b57cec5SDimitry Andric /// for false, 0xFFFFFFFFFFFFFFFF for true. 4150b57cec5SDimitry Andric /// 4160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4170b57cec5SDimitry Andric /// 4180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. 4190b57cec5SDimitry Andric /// 4200b57cec5SDimitry Andric /// \param __a 4210b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4220b57cec5SDimitry Andric /// \param __b 4230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4240b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4250b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4260b57cec5SDimitry Andric _mm_cmpeq_pd(__m128d __a, __m128d __b) 4270b57cec5SDimitry Andric { 4280b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4320b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4330b57cec5SDimitry Andric /// operand are less than those in the second operand. Each comparison 4340b57cec5SDimitry Andric /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4350b57cec5SDimitry Andric /// 4360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4370b57cec5SDimitry Andric /// 4380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4390b57cec5SDimitry Andric /// 4400b57cec5SDimitry Andric /// \param __a 4410b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4420b57cec5SDimitry Andric /// \param __b 4430b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4440b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4450b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4460b57cec5SDimitry Andric _mm_cmplt_pd(__m128d __a, __m128d __b) 4470b57cec5SDimitry Andric { 4480b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4520b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4530b57cec5SDimitry Andric /// operand are less than or equal to those in the second operand. 4540b57cec5SDimitry Andric /// 4550b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4560b57cec5SDimitry Andric /// 4570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4580b57cec5SDimitry Andric /// 4590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4600b57cec5SDimitry Andric /// 4610b57cec5SDimitry Andric /// \param __a 4620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4630b57cec5SDimitry Andric /// \param __b 4640b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4650b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4660b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4670b57cec5SDimitry Andric _mm_cmple_pd(__m128d __a, __m128d __b) 4680b57cec5SDimitry Andric { 4690b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4730b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4740b57cec5SDimitry Andric /// operand are greater than those in the second operand. 4750b57cec5SDimitry Andric /// 4760b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4770b57cec5SDimitry Andric /// 4780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4790b57cec5SDimitry Andric /// 4800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4810b57cec5SDimitry Andric /// 4820b57cec5SDimitry Andric /// \param __a 4830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4840b57cec5SDimitry Andric /// \param __b 4850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4860b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4870b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4880b57cec5SDimitry Andric _mm_cmpgt_pd(__m128d __a, __m128d __b) 4890b57cec5SDimitry Andric { 4900b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); 4910b57cec5SDimitry Andric } 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4940b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4950b57cec5SDimitry Andric /// operand are greater than or equal to those in the second operand. 4960b57cec5SDimitry Andric /// 4970b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4980b57cec5SDimitry Andric /// 4990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5000b57cec5SDimitry Andric /// 5010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 5020b57cec5SDimitry Andric /// 5030b57cec5SDimitry Andric /// \param __a 5040b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5050b57cec5SDimitry Andric /// \param __b 5060b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5070b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5080b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5090b57cec5SDimitry Andric _mm_cmpge_pd(__m128d __a, __m128d __b) 5100b57cec5SDimitry Andric { 5110b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5150b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5160b57cec5SDimitry Andric /// operand are ordered with respect to those in the second operand. 5170b57cec5SDimitry Andric /// 5180b57cec5SDimitry Andric /// A pair of double-precision values are "ordered" with respect to each 5190b57cec5SDimitry Andric /// other if neither value is a NaN. Each comparison yields 0x0 for false, 5200b57cec5SDimitry Andric /// 0xFFFFFFFFFFFFFFFF for true. 5210b57cec5SDimitry Andric /// 5220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5230b57cec5SDimitry Andric /// 5240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. 5250b57cec5SDimitry Andric /// 5260b57cec5SDimitry Andric /// \param __a 5270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5280b57cec5SDimitry Andric /// \param __b 5290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5300b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5310b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5320b57cec5SDimitry Andric _mm_cmpord_pd(__m128d __a, __m128d __b) 5330b57cec5SDimitry Andric { 5340b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5380b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5390b57cec5SDimitry Andric /// operand are unordered with respect to those in the second operand. 5400b57cec5SDimitry Andric /// 5410b57cec5SDimitry Andric /// A pair of double-precision values are "unordered" with respect to each 5420b57cec5SDimitry Andric /// other if one or both values are NaN. Each comparison yields 0x0 for 5430b57cec5SDimitry Andric /// false, 0xFFFFFFFFFFFFFFFF for true. 5440b57cec5SDimitry Andric /// 5450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5460b57cec5SDimitry Andric /// 5470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> 5480b57cec5SDimitry Andric /// instruction. 5490b57cec5SDimitry Andric /// 5500b57cec5SDimitry Andric /// \param __a 5510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5520b57cec5SDimitry Andric /// \param __b 5530b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5540b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5550b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5560b57cec5SDimitry Andric _mm_cmpunord_pd(__m128d __a, __m128d __b) 5570b57cec5SDimitry Andric { 5580b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5620b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5630b57cec5SDimitry Andric /// operand are unequal to those in the second operand. 5640b57cec5SDimitry Andric /// 5650b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5660b57cec5SDimitry Andric /// 5670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5680b57cec5SDimitry Andric /// 5690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. 5700b57cec5SDimitry Andric /// 5710b57cec5SDimitry Andric /// \param __a 5720b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5730b57cec5SDimitry Andric /// \param __b 5740b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5750b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5760b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5770b57cec5SDimitry Andric _mm_cmpneq_pd(__m128d __a, __m128d __b) 5780b57cec5SDimitry Andric { 5790b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5830b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5840b57cec5SDimitry Andric /// operand are not less than those in the second operand. 5850b57cec5SDimitry Andric /// 5860b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5870b57cec5SDimitry Andric /// 5880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5890b57cec5SDimitry Andric /// 5900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 5910b57cec5SDimitry Andric /// 5920b57cec5SDimitry Andric /// \param __a 5930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5940b57cec5SDimitry Andric /// \param __b 5950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5960b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5970b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5980b57cec5SDimitry Andric _mm_cmpnlt_pd(__m128d __a, __m128d __b) 5990b57cec5SDimitry Andric { 6000b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6040b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6050b57cec5SDimitry Andric /// operand are not less than or equal to those in the second operand. 6060b57cec5SDimitry Andric /// 6070b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6080b57cec5SDimitry Andric /// 6090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6100b57cec5SDimitry Andric /// 6110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6120b57cec5SDimitry Andric /// 6130b57cec5SDimitry Andric /// \param __a 6140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6150b57cec5SDimitry Andric /// \param __b 6160b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6170b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6180b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6190b57cec5SDimitry Andric _mm_cmpnle_pd(__m128d __a, __m128d __b) 6200b57cec5SDimitry Andric { 6210b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6250b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6260b57cec5SDimitry Andric /// operand are not greater than those in the second operand. 6270b57cec5SDimitry Andric /// 6280b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6290b57cec5SDimitry Andric /// 6300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6310b57cec5SDimitry Andric /// 6320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 6330b57cec5SDimitry Andric /// 6340b57cec5SDimitry Andric /// \param __a 6350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6360b57cec5SDimitry Andric /// \param __b 6370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6380b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6390b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6400b57cec5SDimitry Andric _mm_cmpngt_pd(__m128d __a, __m128d __b) 6410b57cec5SDimitry Andric { 6420b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6460b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6470b57cec5SDimitry Andric /// operand are not greater than or equal to those in the second operand. 6480b57cec5SDimitry Andric /// 6490b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6500b57cec5SDimitry Andric /// 6510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6520b57cec5SDimitry Andric /// 6530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6540b57cec5SDimitry Andric /// 6550b57cec5SDimitry Andric /// \param __a 6560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6570b57cec5SDimitry Andric /// \param __b 6580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6590b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6600b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6610b57cec5SDimitry Andric _mm_cmpnge_pd(__m128d __a, __m128d __b) 6620b57cec5SDimitry Andric { 6630b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); 6640b57cec5SDimitry Andric } 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6670b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 6680b57cec5SDimitry Andric /// 6690b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6700b57cec5SDimitry Andric /// 6710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6720b57cec5SDimitry Andric /// 6730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. 6740b57cec5SDimitry Andric /// 6750b57cec5SDimitry Andric /// \param __a 6760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6770b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6780b57cec5SDimitry Andric /// \param __b 6790b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6800b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6810b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6820b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 6830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6840b57cec5SDimitry Andric _mm_cmpeq_sd(__m128d __a, __m128d __b) 6850b57cec5SDimitry Andric { 6860b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6900b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 6910b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 6920b57cec5SDimitry Andric /// the second parameter. 6930b57cec5SDimitry Andric /// 6940b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6950b57cec5SDimitry Andric /// 6960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6970b57cec5SDimitry Andric /// 6980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 6990b57cec5SDimitry Andric /// 7000b57cec5SDimitry Andric /// \param __a 7010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7020b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7030b57cec5SDimitry Andric /// \param __b 7040b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7050b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7060b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7070b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7080b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7090b57cec5SDimitry Andric _mm_cmplt_sd(__m128d __a, __m128d __b) 7100b57cec5SDimitry Andric { 7110b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7150b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7160b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 7170b57cec5SDimitry Andric /// corresponding value in the second parameter. 7180b57cec5SDimitry Andric /// 7190b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7200b57cec5SDimitry Andric /// 7210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7220b57cec5SDimitry Andric /// 7230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7240b57cec5SDimitry Andric /// 7250b57cec5SDimitry Andric /// \param __a 7260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7270b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7280b57cec5SDimitry Andric /// \param __b 7290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7300b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7310b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7320b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7330b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7340b57cec5SDimitry Andric _mm_cmple_sd(__m128d __a, __m128d __b) 7350b57cec5SDimitry Andric { 7360b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7400b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7410b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 7420b57cec5SDimitry Andric /// in the second parameter. 7430b57cec5SDimitry Andric /// 7440b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7450b57cec5SDimitry Andric /// 7460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7470b57cec5SDimitry Andric /// 7480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7490b57cec5SDimitry Andric /// 7500b57cec5SDimitry Andric /// \param __a 7510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7520b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7530b57cec5SDimitry Andric /// \param __b 7540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7550b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7560b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7570b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7580b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7590b57cec5SDimitry Andric _mm_cmpgt_sd(__m128d __a, __m128d __b) 7600b57cec5SDimitry Andric { 7610b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); 7620b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7660b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7670b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 7680b57cec5SDimitry Andric /// corresponding value in the second parameter. 7690b57cec5SDimitry Andric /// 7700b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7710b57cec5SDimitry Andric /// 7720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7730b57cec5SDimitry Andric /// 7740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7750b57cec5SDimitry Andric /// 7760b57cec5SDimitry Andric /// \param __a 7770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7780b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7790b57cec5SDimitry Andric /// \param __b 7800b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7810b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7820b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7830b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7840b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7850b57cec5SDimitry Andric _mm_cmpge_sd(__m128d __a, __m128d __b) 7860b57cec5SDimitry Andric { 7870b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); 7880b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7920b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7930b57cec5SDimitry Andric /// the value in the first parameter is "ordered" with respect to the 7940b57cec5SDimitry Andric /// corresponding value in the second parameter. 7950b57cec5SDimitry Andric /// 7960b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 7970b57cec5SDimitry Andric /// of double-precision values are "ordered" with respect to each other if 7980b57cec5SDimitry Andric /// neither value is a NaN. 7990b57cec5SDimitry Andric /// 8000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8010b57cec5SDimitry Andric /// 8020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. 8030b57cec5SDimitry Andric /// 8040b57cec5SDimitry Andric /// \param __a 8050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8060b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8070b57cec5SDimitry Andric /// \param __b 8080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8090b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8100b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8110b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8120b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8130b57cec5SDimitry Andric _mm_cmpord_sd(__m128d __a, __m128d __b) 8140b57cec5SDimitry Andric { 8150b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); 8160b57cec5SDimitry Andric } 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8190b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8200b57cec5SDimitry Andric /// the value in the first parameter is "unordered" with respect to the 8210b57cec5SDimitry Andric /// corresponding value in the second parameter. 8220b57cec5SDimitry Andric /// 8230b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 8240b57cec5SDimitry Andric /// of double-precision values are "unordered" with respect to each other if 8250b57cec5SDimitry Andric /// one or both values are NaN. 8260b57cec5SDimitry Andric /// 8270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8280b57cec5SDimitry Andric /// 8290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> 8300b57cec5SDimitry Andric /// instruction. 8310b57cec5SDimitry Andric /// 8320b57cec5SDimitry Andric /// \param __a 8330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8340b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8350b57cec5SDimitry Andric /// \param __b 8360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8380b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8390b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8410b57cec5SDimitry Andric _mm_cmpunord_sd(__m128d __a, __m128d __b) 8420b57cec5SDimitry Andric { 8430b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8470b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8480b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 8490b57cec5SDimitry Andric /// the second parameter. 8500b57cec5SDimitry Andric /// 8510b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8520b57cec5SDimitry Andric /// 8530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8540b57cec5SDimitry Andric /// 8550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. 8560b57cec5SDimitry Andric /// 8570b57cec5SDimitry Andric /// \param __a 8580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8590b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8600b57cec5SDimitry Andric /// \param __b 8610b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8620b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8630b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8640b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8650b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8660b57cec5SDimitry Andric _mm_cmpneq_sd(__m128d __a, __m128d __b) 8670b57cec5SDimitry Andric { 8680b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); 8690b57cec5SDimitry Andric } 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8720b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8730b57cec5SDimitry Andric /// the value in the first parameter is not less than the corresponding 8740b57cec5SDimitry Andric /// value in the second parameter. 8750b57cec5SDimitry Andric /// 8760b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8770b57cec5SDimitry Andric /// 8780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8790b57cec5SDimitry Andric /// 8800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 8810b57cec5SDimitry Andric /// 8820b57cec5SDimitry Andric /// \param __a 8830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8840b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8850b57cec5SDimitry Andric /// \param __b 8860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8870b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8880b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8890b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8900b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8910b57cec5SDimitry Andric _mm_cmpnlt_sd(__m128d __a, __m128d __b) 8920b57cec5SDimitry Andric { 8930b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8970b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8980b57cec5SDimitry Andric /// the value in the first parameter is not less than or equal to the 8990b57cec5SDimitry Andric /// corresponding value in the second parameter. 9000b57cec5SDimitry Andric /// 9010b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9020b57cec5SDimitry Andric /// 9030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9040b57cec5SDimitry Andric /// 9050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9060b57cec5SDimitry Andric /// 9070b57cec5SDimitry Andric /// \param __a 9080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9090b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9100b57cec5SDimitry Andric /// \param __b 9110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9120b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9130b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9140b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 9150b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 9160b57cec5SDimitry Andric _mm_cmpnle_sd(__m128d __a, __m128d __b) 9170b57cec5SDimitry Andric { 9180b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); 9190b57cec5SDimitry Andric } 9200b57cec5SDimitry Andric 9210b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9220b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9230b57cec5SDimitry Andric /// the value in the first parameter is not greater than the corresponding 9240b57cec5SDimitry Andric /// value in the second parameter. 9250b57cec5SDimitry Andric /// 9260b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9270b57cec5SDimitry Andric /// 9280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9290b57cec5SDimitry Andric /// 9300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 9310b57cec5SDimitry Andric /// 9320b57cec5SDimitry Andric /// \param __a 9330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9340b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9350b57cec5SDimitry Andric /// \param __b 9360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9380b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9390b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 9400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 9410b57cec5SDimitry Andric _mm_cmpngt_sd(__m128d __a, __m128d __b) 9420b57cec5SDimitry Andric { 9430b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); 9440b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9480b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9490b57cec5SDimitry Andric /// the value in the first parameter is not greater than or equal to the 9500b57cec5SDimitry Andric /// corresponding value in the second parameter. 9510b57cec5SDimitry Andric /// 9520b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9530b57cec5SDimitry Andric /// 9540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9550b57cec5SDimitry Andric /// 9560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9570b57cec5SDimitry Andric /// 9580b57cec5SDimitry Andric /// \param __a 9590b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9600b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9610b57cec5SDimitry Andric /// \param __b 9620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9630b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9640b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9650b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 9660b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 9670b57cec5SDimitry Andric _mm_cmpnge_sd(__m128d __a, __m128d __b) 9680b57cec5SDimitry Andric { 9690b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); 9700b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 9710b57cec5SDimitry Andric } 9720b57cec5SDimitry Andric 9730b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9740b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 9750b57cec5SDimitry Andric /// 9760b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9770b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9780b57cec5SDimitry Andric /// 9790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9800b57cec5SDimitry Andric /// 9810b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9820b57cec5SDimitry Andric /// 9830b57cec5SDimitry Andric /// \param __a 9840b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9850b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9860b57cec5SDimitry Andric /// \param __b 9870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9880b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9890b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 9900b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9910b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 9920b57cec5SDimitry Andric _mm_comieq_sd(__m128d __a, __m128d __b) 9930b57cec5SDimitry Andric { 9940b57cec5SDimitry Andric return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); 9950b57cec5SDimitry Andric } 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9980b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9990b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 10000b57cec5SDimitry Andric /// the second parameter. 10010b57cec5SDimitry Andric /// 10020b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10030b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10040b57cec5SDimitry Andric /// 10050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10060b57cec5SDimitry Andric /// 10070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10080b57cec5SDimitry Andric /// 10090b57cec5SDimitry Andric /// \param __a 10100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10110b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10120b57cec5SDimitry Andric /// \param __b 10130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10140b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10150b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10160b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10170b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10180b57cec5SDimitry Andric _mm_comilt_sd(__m128d __a, __m128d __b) 10190b57cec5SDimitry Andric { 10200b57cec5SDimitry Andric return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric 10230b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10240b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10250b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 10260b57cec5SDimitry Andric /// corresponding value in the second parameter. 10270b57cec5SDimitry Andric /// 10280b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10290b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10300b57cec5SDimitry Andric /// 10310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10320b57cec5SDimitry Andric /// 10330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10340b57cec5SDimitry Andric /// 10350b57cec5SDimitry Andric /// \param __a 10360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10380b57cec5SDimitry Andric /// \param __b 10390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10400b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10410b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10420b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10430b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10440b57cec5SDimitry Andric _mm_comile_sd(__m128d __a, __m128d __b) 10450b57cec5SDimitry Andric { 10460b57cec5SDimitry Andric return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); 10470b57cec5SDimitry Andric } 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10500b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10510b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 10520b57cec5SDimitry Andric /// in the second parameter. 10530b57cec5SDimitry Andric /// 10540b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10550b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10560b57cec5SDimitry Andric /// 10570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10580b57cec5SDimitry Andric /// 10590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10600b57cec5SDimitry Andric /// 10610b57cec5SDimitry Andric /// \param __a 10620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10630b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10640b57cec5SDimitry Andric /// \param __b 10650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10660b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10670b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10680b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10690b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10700b57cec5SDimitry Andric _mm_comigt_sd(__m128d __a, __m128d __b) 10710b57cec5SDimitry Andric { 10720b57cec5SDimitry Andric return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); 10730b57cec5SDimitry Andric } 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10760b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10770b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 10780b57cec5SDimitry Andric /// corresponding value in the second parameter. 10790b57cec5SDimitry Andric /// 10800b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10810b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10820b57cec5SDimitry Andric /// 10830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10840b57cec5SDimitry Andric /// 10850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10860b57cec5SDimitry Andric /// 10870b57cec5SDimitry Andric /// \param __a 10880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10900b57cec5SDimitry Andric /// \param __b 10910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10920b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10930b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10940b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10950b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10960b57cec5SDimitry Andric _mm_comige_sd(__m128d __a, __m128d __b) 10970b57cec5SDimitry Andric { 10980b57cec5SDimitry Andric return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); 10990b57cec5SDimitry Andric } 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11020b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11030b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 11040b57cec5SDimitry Andric /// the second parameter. 11050b57cec5SDimitry Andric /// 11060b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 11070b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 11080b57cec5SDimitry Andric /// 11090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11100b57cec5SDimitry Andric /// 11110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 11120b57cec5SDimitry Andric /// 11130b57cec5SDimitry Andric /// \param __a 11140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11150b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11160b57cec5SDimitry Andric /// \param __b 11170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11180b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11190b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11200b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 11210b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11220b57cec5SDimitry Andric _mm_comineq_sd(__m128d __a, __m128d __b) 11230b57cec5SDimitry Andric { 11240b57cec5SDimitry Andric return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); 11250b57cec5SDimitry Andric } 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11280b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. The 11290b57cec5SDimitry Andric /// comparison yields 0 for false, 1 for true. 11300b57cec5SDimitry Andric /// 11310b57cec5SDimitry Andric /// If either of the two lower double-precision values is NaN, 0 is returned. 11320b57cec5SDimitry Andric /// 11330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11340b57cec5SDimitry Andric /// 11350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11360b57cec5SDimitry Andric /// 11370b57cec5SDimitry Andric /// \param __a 11380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11390b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11400b57cec5SDimitry Andric /// \param __b 11410b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11420b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11430b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11440b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 11450b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11460b57cec5SDimitry Andric _mm_ucomieq_sd(__m128d __a, __m128d __b) 11470b57cec5SDimitry Andric { 11480b57cec5SDimitry Andric return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); 11490b57cec5SDimitry Andric } 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11520b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11530b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 11540b57cec5SDimitry Andric /// the second parameter. 11550b57cec5SDimitry Andric /// 11560b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11570b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11580b57cec5SDimitry Andric /// 11590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11600b57cec5SDimitry Andric /// 11610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11620b57cec5SDimitry Andric /// 11630b57cec5SDimitry Andric /// \param __a 11640b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11650b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11660b57cec5SDimitry Andric /// \param __b 11670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11680b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11690b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11700b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 11710b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11720b57cec5SDimitry Andric _mm_ucomilt_sd(__m128d __a, __m128d __b) 11730b57cec5SDimitry Andric { 11740b57cec5SDimitry Andric return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); 11750b57cec5SDimitry Andric } 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11780b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11790b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 11800b57cec5SDimitry Andric /// corresponding value in the second parameter. 11810b57cec5SDimitry Andric /// 11820b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11830b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11840b57cec5SDimitry Andric /// 11850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11860b57cec5SDimitry Andric /// 11870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11880b57cec5SDimitry Andric /// 11890b57cec5SDimitry Andric /// \param __a 11900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11910b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11920b57cec5SDimitry Andric /// \param __b 11930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11940b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11950b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11960b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 11970b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11980b57cec5SDimitry Andric _mm_ucomile_sd(__m128d __a, __m128d __b) 11990b57cec5SDimitry Andric { 12000b57cec5SDimitry Andric return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); 12010b57cec5SDimitry Andric } 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12040b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12050b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 12060b57cec5SDimitry Andric /// in the second parameter. 12070b57cec5SDimitry Andric /// 12080b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 12090b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 12100b57cec5SDimitry Andric /// 12110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12120b57cec5SDimitry Andric /// 12130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12140b57cec5SDimitry Andric /// 12150b57cec5SDimitry Andric /// \param __a 12160b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12170b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12180b57cec5SDimitry Andric /// \param __b 12190b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12200b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12210b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12220b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12230b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12240b57cec5SDimitry Andric _mm_ucomigt_sd(__m128d __a, __m128d __b) 12250b57cec5SDimitry Andric { 12260b57cec5SDimitry Andric return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); 12270b57cec5SDimitry Andric } 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12300b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12310b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 12320b57cec5SDimitry Andric /// corresponding value in the second parameter. 12330b57cec5SDimitry Andric /// 12340b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 12350b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12360b57cec5SDimitry Andric /// 12370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12380b57cec5SDimitry Andric /// 12390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12400b57cec5SDimitry Andric /// 12410b57cec5SDimitry Andric /// \param __a 12420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12430b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12440b57cec5SDimitry Andric /// \param __b 12450b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12460b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12470b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12480b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12490b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12500b57cec5SDimitry Andric _mm_ucomige_sd(__m128d __a, __m128d __b) 12510b57cec5SDimitry Andric { 12520b57cec5SDimitry Andric return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); 12530b57cec5SDimitry Andric } 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12560b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12570b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 12580b57cec5SDimitry Andric /// the second parameter. 12590b57cec5SDimitry Andric /// 12600b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 12610b57cec5SDimitry Andric /// double-precision values is NaN, 1 is returned. 12620b57cec5SDimitry Andric /// 12630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12640b57cec5SDimitry Andric /// 12650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12660b57cec5SDimitry Andric /// 12670b57cec5SDimitry Andric /// \param __a 12680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12690b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12700b57cec5SDimitry Andric /// \param __b 12710b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12720b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12730b57cec5SDimitry Andric /// \returns An integer containing the comparison result. If either of the two 12740b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 12750b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12760b57cec5SDimitry Andric _mm_ucomineq_sd(__m128d __a, __m128d __b) 12770b57cec5SDimitry Andric { 12780b57cec5SDimitry Andric return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 12820b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two single-precision floating-point 12830b57cec5SDimitry Andric /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. 12840b57cec5SDimitry Andric /// The upper 64 bits of the result vector are set to zero. 12850b57cec5SDimitry Andric /// 12860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12870b57cec5SDimitry Andric /// 12880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. 12890b57cec5SDimitry Andric /// 12900b57cec5SDimitry Andric /// \param __a 12910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 12920b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the 12930b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 12940b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 12950b57cec5SDimitry Andric _mm_cvtpd_ps(__m128d __a) 12960b57cec5SDimitry Andric { 12970b57cec5SDimitry Andric return __builtin_ia32_cvtpd2ps((__v2df)__a); 12980b57cec5SDimitry Andric } 12990b57cec5SDimitry Andric 13000b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a 13010b57cec5SDimitry Andric /// 128-bit vector of [4 x float] into two double-precision floating-point 13020b57cec5SDimitry Andric /// values, returned in a 128-bit vector of [2 x double]. The upper two 13030b57cec5SDimitry Andric /// elements of the input vector are unused. 13040b57cec5SDimitry Andric /// 13050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13060b57cec5SDimitry Andric /// 13070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. 13080b57cec5SDimitry Andric /// 13090b57cec5SDimitry Andric /// \param __a 13100b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower two single-precision 13110b57cec5SDimitry Andric /// floating-point elements are converted to double-precision values. The 13120b57cec5SDimitry Andric /// upper two elements are unused. 13130b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 13140b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 13150b57cec5SDimitry Andric _mm_cvtps_pd(__m128 __a) 13160b57cec5SDimitry Andric { 13170b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 13180b57cec5SDimitry Andric __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); 13190b57cec5SDimitry Andric } 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of 13220b57cec5SDimitry Andric /// [4 x i32] into two double-precision floating-point values, returned in a 13230b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 13240b57cec5SDimitry Andric /// 13250b57cec5SDimitry Andric /// The upper two elements of the input vector are unused. 13260b57cec5SDimitry Andric /// 13270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13280b57cec5SDimitry Andric /// 13290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. 13300b57cec5SDimitry Andric /// 13310b57cec5SDimitry Andric /// \param __a 13320b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are 13330b57cec5SDimitry Andric /// converted to double-precision values. 13340b57cec5SDimitry Andric /// 13350b57cec5SDimitry Andric /// The upper two elements are unused. 13360b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 13370b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 13380b57cec5SDimitry Andric _mm_cvtepi32_pd(__m128i __a) 13390b57cec5SDimitry Andric { 13400b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 13410b57cec5SDimitry Andric __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); 13420b57cec5SDimitry Andric } 13430b57cec5SDimitry Andric 13440b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 13450b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 13460b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper 13470b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 13480b57cec5SDimitry Andric /// 13490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13500b57cec5SDimitry Andric /// 13510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. 13520b57cec5SDimitry Andric /// 13530b57cec5SDimitry Andric /// \param __a 13540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 13550b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 13560b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 13570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 13580b57cec5SDimitry Andric _mm_cvtpd_epi32(__m128d __a) 13590b57cec5SDimitry Andric { 13600b57cec5SDimitry Andric return __builtin_ia32_cvtpd2dq((__v2df)__a); 13610b57cec5SDimitry Andric } 13620b57cec5SDimitry Andric 13630b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double] 13640b57cec5SDimitry Andric /// into a 32-bit signed integer value. 13650b57cec5SDimitry Andric /// 13660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13670b57cec5SDimitry Andric /// 13680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 13690b57cec5SDimitry Andric /// 13700b57cec5SDimitry Andric /// \param __a 13710b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 13720b57cec5SDimitry Andric /// conversion. 13730b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 13740b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 13750b57cec5SDimitry Andric _mm_cvtsd_si32(__m128d __a) 13760b57cec5SDimitry Andric { 13770b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si((__v2df)__a); 13780b57cec5SDimitry Andric } 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a 13810b57cec5SDimitry Andric /// 128-bit vector of [2 x double], in the second parameter, into a 13820b57cec5SDimitry Andric /// single-precision floating-point value, returned in the lower 32 bits of a 13830b57cec5SDimitry Andric /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are 13840b57cec5SDimitry Andric /// copied from the upper 96 bits of the first parameter. 13850b57cec5SDimitry Andric /// 13860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13870b57cec5SDimitry Andric /// 13880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. 13890b57cec5SDimitry Andric /// 13900b57cec5SDimitry Andric /// \param __a 13910b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are 13920b57cec5SDimitry Andric /// copied to the upper 96 bits of the result. 13930b57cec5SDimitry Andric /// \param __b 13940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision 13950b57cec5SDimitry Andric /// floating-point element is used in the conversion. 13960b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the 13970b57cec5SDimitry Andric /// converted value from the second parameter. The upper 96 bits are copied 13980b57cec5SDimitry Andric /// from the upper 96 bits of the first parameter. 13990b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 14000b57cec5SDimitry Andric _mm_cvtsd_ss(__m128 __a, __m128d __b) 14010b57cec5SDimitry Andric { 14020b57cec5SDimitry Andric return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); 14030b57cec5SDimitry Andric } 14040b57cec5SDimitry Andric 14050b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into 14060b57cec5SDimitry Andric /// a double-precision floating-point value, returned in the lower 64 bits of 14070b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 14080b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 14090b57cec5SDimitry Andric /// 14100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14110b57cec5SDimitry Andric /// 14120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 14130b57cec5SDimitry Andric /// 14140b57cec5SDimitry Andric /// \param __a 14150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 14160b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 14170b57cec5SDimitry Andric /// \param __b 14180b57cec5SDimitry Andric /// A 32-bit signed integer containing the value to be converted. 14190b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 14200b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 14210b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 14220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 14230b57cec5SDimitry Andric _mm_cvtsi32_sd(__m128d __a, int __b) 14240b57cec5SDimitry Andric { 14250b57cec5SDimitry Andric __a[0] = __b; 14260b57cec5SDimitry Andric return __a; 14270b57cec5SDimitry Andric } 14280b57cec5SDimitry Andric 14290b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a 14300b57cec5SDimitry Andric /// 128-bit vector of [4 x float], in the second parameter, into a 14310b57cec5SDimitry Andric /// double-precision floating-point value, returned in the lower 64 bits of 14320b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 14330b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 14340b57cec5SDimitry Andric /// 14350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14360b57cec5SDimitry Andric /// 14370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. 14380b57cec5SDimitry Andric /// 14390b57cec5SDimitry Andric /// \param __a 14400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 14410b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 14420b57cec5SDimitry Andric /// \param __b 14430b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower single-precision 14440b57cec5SDimitry Andric /// floating-point element is used in the conversion. 14450b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 14460b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 14470b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 14480b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 14490b57cec5SDimitry Andric _mm_cvtss_sd(__m128d __a, __m128 __b) 14500b57cec5SDimitry Andric { 14510b57cec5SDimitry Andric __a[0] = __b[0]; 14520b57cec5SDimitry Andric return __a; 14530b57cec5SDimitry Andric } 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14560b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14570b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. 14580b57cec5SDimitry Andric /// 14590b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 14600b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. The upper 14610b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 14620b57cec5SDimitry Andric /// 14630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14640b57cec5SDimitry Andric /// 14650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> 14660b57cec5SDimitry Andric /// instruction. 14670b57cec5SDimitry Andric /// 14680b57cec5SDimitry Andric /// \param __a 14690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14700b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 14710b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 14720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 14730b57cec5SDimitry Andric _mm_cvttpd_epi32(__m128d __a) 14740b57cec5SDimitry Andric { 14750b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); 14760b57cec5SDimitry Andric } 14770b57cec5SDimitry Andric 14780b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit 14790b57cec5SDimitry Andric /// signed integer value, truncating the result when it is inexact. 14800b57cec5SDimitry Andric /// 14810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14820b57cec5SDimitry Andric /// 14830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 14840b57cec5SDimitry Andric /// instruction. 14850b57cec5SDimitry Andric /// 14860b57cec5SDimitry Andric /// \param __a 14870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 14880b57cec5SDimitry Andric /// conversion. 14890b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 14900b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 14910b57cec5SDimitry Andric _mm_cvttsd_si32(__m128d __a) 14920b57cec5SDimitry Andric { 14930b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si((__v2df)__a); 14940b57cec5SDimitry Andric } 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14970b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14980b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 14990b57cec5SDimitry Andric /// 15000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15010b57cec5SDimitry Andric /// 15020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. 15030b57cec5SDimitry Andric /// 15040b57cec5SDimitry Andric /// \param __a 15050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 15060b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 15070b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 15080b57cec5SDimitry Andric _mm_cvtpd_pi32(__m128d __a) 15090b57cec5SDimitry Andric { 15100b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); 15110b57cec5SDimitry Andric } 15120b57cec5SDimitry Andric 15130b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 15140b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 15150b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 15160b57cec5SDimitry Andric /// 15170b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 15180b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. 15190b57cec5SDimitry Andric /// 15200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15210b57cec5SDimitry Andric /// 15220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. 15230b57cec5SDimitry Andric /// 15240b57cec5SDimitry Andric /// \param __a 15250b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 15260b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 15270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 15280b57cec5SDimitry Andric _mm_cvttpd_pi32(__m128d __a) 15290b57cec5SDimitry Andric { 15300b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); 15310b57cec5SDimitry Andric } 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of 15340b57cec5SDimitry Andric /// [2 x i32] into two double-precision floating-point values, returned in a 15350b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 15360b57cec5SDimitry Andric /// 15370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15380b57cec5SDimitry Andric /// 15390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. 15400b57cec5SDimitry Andric /// 15410b57cec5SDimitry Andric /// \param __a 15420b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 15430b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 15440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX 15450b57cec5SDimitry Andric _mm_cvtpi32_pd(__m64 __a) 15460b57cec5SDimitry Andric { 15470b57cec5SDimitry Andric return __builtin_ia32_cvtpi2pd((__v2si)__a); 15480b57cec5SDimitry Andric } 15490b57cec5SDimitry Andric 15500b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as 15510b57cec5SDimitry Andric /// a double-precision floating-point value. 15520b57cec5SDimitry Andric /// 15530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15540b57cec5SDimitry Andric /// 15550b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 15560b57cec5SDimitry Andric /// 15570b57cec5SDimitry Andric /// \param __a 15580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. 15590b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64 15600b57cec5SDimitry Andric /// bits of \a __a. 15610b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS 15620b57cec5SDimitry Andric _mm_cvtsd_f64(__m128d __a) 15630b57cec5SDimitry Andric { 15640b57cec5SDimitry Andric return __a[0]; 15650b57cec5SDimitry Andric } 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned 15680b57cec5SDimitry Andric /// memory location. 15690b57cec5SDimitry Andric /// 15700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15710b57cec5SDimitry Andric /// 15720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. 15730b57cec5SDimitry Andric /// 15740b57cec5SDimitry Andric /// \param __dp 15750b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15760b57cec5SDimitry Andric /// location has to be 16-byte aligned. 15770b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 15780b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 15790b57cec5SDimitry Andric _mm_load_pd(double const *__dp) 15800b57cec5SDimitry Andric { 1581480093f4SDimitry Andric return *(const __m128d*)__dp; 15820b57cec5SDimitry Andric } 15830b57cec5SDimitry Andric 15840b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory 15850b57cec5SDimitry Andric /// location and duplicates it to both vector elements of a 128-bit vector of 15860b57cec5SDimitry Andric /// [2 x double]. 15870b57cec5SDimitry Andric /// 15880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15890b57cec5SDimitry Andric /// 15900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. 15910b57cec5SDimitry Andric /// 15920b57cec5SDimitry Andric /// \param __dp 15930b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 15940b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and 15950b57cec5SDimitry Andric /// duplicated values. 15960b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 15970b57cec5SDimitry Andric _mm_load1_pd(double const *__dp) 15980b57cec5SDimitry Andric { 15990b57cec5SDimitry Andric struct __mm_load1_pd_struct { 16000b57cec5SDimitry Andric double __u; 16010b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1602480093f4SDimitry Andric double __u = ((const struct __mm_load1_pd_struct*)__dp)->__u; 16030b57cec5SDimitry Andric return __extension__ (__m128d){ __u, __u }; 16040b57cec5SDimitry Andric } 16050b57cec5SDimitry Andric 16060b57cec5SDimitry Andric #define _mm_load_pd1(dp) _mm_load1_pd(dp) 16070b57cec5SDimitry Andric 16080b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned 16090b57cec5SDimitry Andric /// memory location into a 128-bit vector of [2 x double]. 16100b57cec5SDimitry Andric /// 16110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16120b57cec5SDimitry Andric /// 16130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + 16140b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined 16150b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. 16160b57cec5SDimitry Andric /// 16170b57cec5SDimitry Andric /// \param __dp 16180b57cec5SDimitry Andric /// A 16-byte aligned pointer to an array of double-precision values to be 16190b57cec5SDimitry Andric /// loaded in reverse order. 16200b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded 16210b57cec5SDimitry Andric /// values. 16220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 16230b57cec5SDimitry Andric _mm_loadr_pd(double const *__dp) 16240b57cec5SDimitry Andric { 1625480093f4SDimitry Andric __m128d __u = *(const __m128d*)__dp; 16260b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); 16270b57cec5SDimitry Andric } 16280b57cec5SDimitry Andric 16290b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an 16300b57cec5SDimitry Andric /// unaligned memory location. 16310b57cec5SDimitry Andric /// 16320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16330b57cec5SDimitry Andric /// 16340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 16350b57cec5SDimitry Andric /// 16360b57cec5SDimitry Andric /// \param __dp 16370b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 16380b57cec5SDimitry Andric /// location does not have to be aligned. 16390b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 16400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 16410b57cec5SDimitry Andric _mm_loadu_pd(double const *__dp) 16420b57cec5SDimitry Andric { 16430b57cec5SDimitry Andric struct __loadu_pd { 16440b57cec5SDimitry Andric __m128d_u __v; 16450b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1646480093f4SDimitry Andric return ((const struct __loadu_pd*)__dp)->__v; 16470b57cec5SDimitry Andric } 16480b57cec5SDimitry Andric 16490b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer 16500b57cec5SDimitry Andric /// vector and clears the upper element. 16510b57cec5SDimitry Andric /// 16520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16530b57cec5SDimitry Andric /// 16540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 16550b57cec5SDimitry Andric /// 16560b57cec5SDimitry Andric /// \param __a 16570b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 16580b57cec5SDimitry Andric /// location does not have to be aligned. 16590b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value. 16600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 16610b57cec5SDimitry Andric _mm_loadu_si64(void const *__a) 16620b57cec5SDimitry Andric { 16630b57cec5SDimitry Andric struct __loadu_si64 { 16640b57cec5SDimitry Andric long long __v; 16650b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1666480093f4SDimitry Andric long long __u = ((const struct __loadu_si64*)__a)->__v; 16670b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){__u, 0LL}; 16680b57cec5SDimitry Andric } 16690b57cec5SDimitry Andric 16700b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer 16710b57cec5SDimitry Andric /// vector and clears the upper element. 16720b57cec5SDimitry Andric /// 16730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16740b57cec5SDimitry Andric /// 16750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 16760b57cec5SDimitry Andric /// 16770b57cec5SDimitry Andric /// \param __a 16780b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 16790b57cec5SDimitry Andric /// location does not have to be aligned. 16800b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value. 16810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 16820b57cec5SDimitry Andric _mm_loadu_si32(void const *__a) 16830b57cec5SDimitry Andric { 16840b57cec5SDimitry Andric struct __loadu_si32 { 16850b57cec5SDimitry Andric int __v; 16860b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1687480093f4SDimitry Andric int __u = ((const struct __loadu_si32*)__a)->__v; 16880b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){__u, 0, 0, 0}; 16890b57cec5SDimitry Andric } 16900b57cec5SDimitry Andric 16910b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer 16920b57cec5SDimitry Andric /// vector and clears the upper element. 16930b57cec5SDimitry Andric /// 16940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16950b57cec5SDimitry Andric /// 16960b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 16970b57cec5SDimitry Andric /// 16980b57cec5SDimitry Andric /// \param __a 16990b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 17000b57cec5SDimitry Andric /// location does not have to be aligned. 17010b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value. 17020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 17030b57cec5SDimitry Andric _mm_loadu_si16(void const *__a) 17040b57cec5SDimitry Andric { 17050b57cec5SDimitry Andric struct __loadu_si16 { 17060b57cec5SDimitry Andric short __v; 17070b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1708480093f4SDimitry Andric short __u = ((const struct __loadu_si16*)__a)->__v; 17090b57cec5SDimitry Andric return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; 17100b57cec5SDimitry Andric } 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a 17130b57cec5SDimitry Andric /// 128-bit integer vector and clears the upper element. 17140b57cec5SDimitry Andric /// 17150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17160b57cec5SDimitry Andric /// 17170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 17180b57cec5SDimitry Andric /// 17190b57cec5SDimitry Andric /// \param __dp 17200b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 17210b57cec5SDimitry Andric /// The address of the memory location does not have to be aligned. 17220b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value. 17230b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17240b57cec5SDimitry Andric _mm_load_sd(double const *__dp) 17250b57cec5SDimitry Andric { 17260b57cec5SDimitry Andric struct __mm_load_sd_struct { 17270b57cec5SDimitry Andric double __u; 17280b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1729480093f4SDimitry Andric double __u = ((const struct __mm_load_sd_struct*)__dp)->__u; 17300b57cec5SDimitry Andric return __extension__ (__m128d){ __u, 0 }; 17310b57cec5SDimitry Andric } 17320b57cec5SDimitry Andric 17330b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit 17340b57cec5SDimitry Andric /// vector of [2 x double]. The low-order bits are copied from the low-order 17350b57cec5SDimitry Andric /// bits of the first operand. 17360b57cec5SDimitry Andric /// 17370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17380b57cec5SDimitry Andric /// 17390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 17400b57cec5SDimitry Andric /// 17410b57cec5SDimitry Andric /// \param __a 17420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 17430b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the result. 17440b57cec5SDimitry Andric /// \param __dp 17450b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 17460b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 17470b57cec5SDimitry Andric /// [127:64] of the result. The address of the memory location does not have 17480b57cec5SDimitry Andric /// to be aligned. 17490b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 17500b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17510b57cec5SDimitry Andric _mm_loadh_pd(__m128d __a, double const *__dp) 17520b57cec5SDimitry Andric { 17530b57cec5SDimitry Andric struct __mm_loadh_pd_struct { 17540b57cec5SDimitry Andric double __u; 17550b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1756480093f4SDimitry Andric double __u = ((const struct __mm_loadh_pd_struct*)__dp)->__u; 17570b57cec5SDimitry Andric return __extension__ (__m128d){ __a[0], __u }; 17580b57cec5SDimitry Andric } 17590b57cec5SDimitry Andric 17600b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit 17610b57cec5SDimitry Andric /// vector of [2 x double]. The high-order bits are copied from the 17620b57cec5SDimitry Andric /// high-order bits of the first operand. 17630b57cec5SDimitry Andric /// 17640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17650b57cec5SDimitry Andric /// 17660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 17670b57cec5SDimitry Andric /// 17680b57cec5SDimitry Andric /// \param __a 17690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 17700b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the result. 17710b57cec5SDimitry Andric /// \param __dp 17720b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 17730b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 17740b57cec5SDimitry Andric /// [63:0] of the result. The address of the memory location does not have to 17750b57cec5SDimitry Andric /// be aligned. 17760b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 17770b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17780b57cec5SDimitry Andric _mm_loadl_pd(__m128d __a, double const *__dp) 17790b57cec5SDimitry Andric { 17800b57cec5SDimitry Andric struct __mm_loadl_pd_struct { 17810b57cec5SDimitry Andric double __u; 17820b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1783480093f4SDimitry Andric double __u = ((const struct __mm_loadl_pd_struct*)__dp)->__u; 17840b57cec5SDimitry Andric return __extension__ (__m128d){ __u, __a[1] }; 17850b57cec5SDimitry Andric } 17860b57cec5SDimitry Andric 17870b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with 17880b57cec5SDimitry Andric /// unspecified content. This could be used as an argument to another 17890b57cec5SDimitry Andric /// intrinsic function where the argument is required but the value is not 17900b57cec5SDimitry Andric /// actually used. 17910b57cec5SDimitry Andric /// 17920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17930b57cec5SDimitry Andric /// 17940b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 17950b57cec5SDimitry Andric /// 17960b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified 17970b57cec5SDimitry Andric /// content. 17980b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17990b57cec5SDimitry Andric _mm_undefined_pd(void) 18000b57cec5SDimitry Andric { 18010b57cec5SDimitry Andric return (__m128d)__builtin_ia32_undef128(); 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric 18040b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 18050b57cec5SDimitry Andric /// 64 bits of the vector are initialized with the specified double-precision 18060b57cec5SDimitry Andric /// floating-point value. The upper 64 bits are set to zero. 18070b57cec5SDimitry Andric /// 18080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18090b57cec5SDimitry Andric /// 18100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 18110b57cec5SDimitry Andric /// 18120b57cec5SDimitry Andric /// \param __w 18130b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18140b57cec5SDimitry Andric /// bits of the result. 18150b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The 18160b57cec5SDimitry Andric /// lower 64 bits contain the value of the parameter. The upper 64 bits are 18170b57cec5SDimitry Andric /// set to zero. 18180b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18190b57cec5SDimitry Andric _mm_set_sd(double __w) 18200b57cec5SDimitry Andric { 18210b57cec5SDimitry Andric return __extension__ (__m128d){ __w, 0 }; 18220b57cec5SDimitry Andric } 18230b57cec5SDimitry Andric 18240b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 18250b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 18260b57cec5SDimitry Andric /// specified double-precision floating-point value. 18270b57cec5SDimitry Andric /// 18280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18290b57cec5SDimitry Andric /// 18300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 18310b57cec5SDimitry Andric /// 18320b57cec5SDimitry Andric /// \param __w 18330b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 18340b57cec5SDimitry Andric /// element of the result. 18350b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18360b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18370b57cec5SDimitry Andric _mm_set1_pd(double __w) 18380b57cec5SDimitry Andric { 18390b57cec5SDimitry Andric return __extension__ (__m128d){ __w, __w }; 18400b57cec5SDimitry Andric } 18410b57cec5SDimitry Andric 18420b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 18430b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 18440b57cec5SDimitry Andric /// specified double-precision floating-point value. 18450b57cec5SDimitry Andric /// 18460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18470b57cec5SDimitry Andric /// 18480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 18490b57cec5SDimitry Andric /// 18500b57cec5SDimitry Andric /// \param __w 18510b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 18520b57cec5SDimitry Andric /// element of the result. 18530b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18540b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18550b57cec5SDimitry Andric _mm_set_pd1(double __w) 18560b57cec5SDimitry Andric { 18570b57cec5SDimitry Andric return _mm_set1_pd(__w); 18580b57cec5SDimitry Andric } 18590b57cec5SDimitry Andric 18600b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 18610b57cec5SDimitry Andric /// initialized with the specified double-precision floating-point values. 18620b57cec5SDimitry Andric /// 18630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18640b57cec5SDimitry Andric /// 18650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 18660b57cec5SDimitry Andric /// 18670b57cec5SDimitry Andric /// \param __w 18680b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18690b57cec5SDimitry Andric /// bits of the result. 18700b57cec5SDimitry Andric /// \param __x 18710b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18720b57cec5SDimitry Andric /// bits of the result. 18730b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18740b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18750b57cec5SDimitry Andric _mm_set_pd(double __w, double __x) 18760b57cec5SDimitry Andric { 18770b57cec5SDimitry Andric return __extension__ (__m128d){ __x, __w }; 18780b57cec5SDimitry Andric } 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], 18810b57cec5SDimitry Andric /// initialized in reverse order with the specified double-precision 18820b57cec5SDimitry Andric /// floating-point values. 18830b57cec5SDimitry Andric /// 18840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18850b57cec5SDimitry Andric /// 18860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 18870b57cec5SDimitry Andric /// 18880b57cec5SDimitry Andric /// \param __w 18890b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18900b57cec5SDimitry Andric /// bits of the result. 18910b57cec5SDimitry Andric /// \param __x 18920b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18930b57cec5SDimitry Andric /// bits of the result. 18940b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18950b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18960b57cec5SDimitry Andric _mm_setr_pd(double __w, double __x) 18970b57cec5SDimitry Andric { 18980b57cec5SDimitry Andric return __extension__ (__m128d){ __w, __x }; 18990b57cec5SDimitry Andric } 19000b57cec5SDimitry Andric 19010b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 19020b57cec5SDimitry Andric /// initialized to zero. 19030b57cec5SDimitry Andric /// 19040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19050b57cec5SDimitry Andric /// 19060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 19070b57cec5SDimitry Andric /// 19080b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with 19090b57cec5SDimitry Andric /// all elements set to zero. 19100b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 19110b57cec5SDimitry Andric _mm_setzero_pd(void) 19120b57cec5SDimitry Andric { 19130b57cec5SDimitry Andric return __extension__ (__m128d){ 0, 0 }; 19140b57cec5SDimitry Andric } 19150b57cec5SDimitry Andric 19160b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 19170b57cec5SDimitry Andric /// 64 bits are set to the lower 64 bits of the second parameter. The upper 19180b57cec5SDimitry Andric /// 64 bits are set to the upper 64 bits of the first parameter. 19190b57cec5SDimitry Andric /// 19200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19210b57cec5SDimitry Andric /// 19220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 19230b57cec5SDimitry Andric /// 19240b57cec5SDimitry Andric /// \param __a 19250b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the 19260b57cec5SDimitry Andric /// upper 64 bits of the result. 19270b57cec5SDimitry Andric /// \param __b 19280b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the 19290b57cec5SDimitry Andric /// lower 64 bits of the result. 19300b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 19310b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 19320b57cec5SDimitry Andric _mm_move_sd(__m128d __a, __m128d __b) 19330b57cec5SDimitry Andric { 19340b57cec5SDimitry Andric __a[0] = __b[0]; 19350b57cec5SDimitry Andric return __a; 19360b57cec5SDimitry Andric } 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 19390b57cec5SDimitry Andric /// memory location. 19400b57cec5SDimitry Andric /// 19410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19420b57cec5SDimitry Andric /// 19430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 19440b57cec5SDimitry Andric /// 19450b57cec5SDimitry Andric /// \param __dp 19460b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 19470b57cec5SDimitry Andric /// \param __a 19480b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 19490b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 19500b57cec5SDimitry Andric _mm_store_sd(double *__dp, __m128d __a) 19510b57cec5SDimitry Andric { 19520b57cec5SDimitry Andric struct __mm_store_sd_struct { 19530b57cec5SDimitry Andric double __u; 19540b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19550b57cec5SDimitry Andric ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of 19590b57cec5SDimitry Andric /// [2 x double] to a memory location. 19600b57cec5SDimitry Andric /// 19610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19620b57cec5SDimitry Andric /// 19630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. 19640b57cec5SDimitry Andric /// 19650b57cec5SDimitry Andric /// \param __dp 19660b57cec5SDimitry Andric /// A pointer to an aligned memory location that can store two 19670b57cec5SDimitry Andric /// double-precision values. 19680b57cec5SDimitry Andric /// \param __a 19690b57cec5SDimitry Andric /// A packed 128-bit vector of [2 x double] containing the values to be 19700b57cec5SDimitry Andric /// moved. 19710b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 19720b57cec5SDimitry Andric _mm_store_pd(double *__dp, __m128d __a) 19730b57cec5SDimitry Andric { 19740b57cec5SDimitry Andric *(__m128d*)__dp = __a; 19750b57cec5SDimitry Andric } 19760b57cec5SDimitry Andric 19770b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 19780b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 19790b57cec5SDimitry Andric /// 19800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19810b57cec5SDimitry Andric /// 19820b57cec5SDimitry Andric /// This intrinsic corresponds to the 19830b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 19840b57cec5SDimitry Andric /// 19850b57cec5SDimitry Andric /// \param __dp 19860b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 19870b57cec5SDimitry Andric /// values. 19880b57cec5SDimitry Andric /// \param __a 19890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 19900b57cec5SDimitry Andric /// of the values in \a __dp. 19910b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 19920b57cec5SDimitry Andric _mm_store1_pd(double *__dp, __m128d __a) 19930b57cec5SDimitry Andric { 19940b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 19950b57cec5SDimitry Andric _mm_store_pd(__dp, __a); 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 19990b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 20000b57cec5SDimitry Andric /// 20010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20020b57cec5SDimitry Andric /// 20030b57cec5SDimitry Andric /// This intrinsic corresponds to the 20040b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 20050b57cec5SDimitry Andric /// 20060b57cec5SDimitry Andric /// \param __dp 20070b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 20080b57cec5SDimitry Andric /// values. 20090b57cec5SDimitry Andric /// \param __a 20100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 20110b57cec5SDimitry Andric /// of the values in \a __dp. 20120b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20130b57cec5SDimitry Andric _mm_store_pd1(double *__dp, __m128d __a) 20140b57cec5SDimitry Andric { 20150b57cec5SDimitry Andric _mm_store1_pd(__dp, __a); 20160b57cec5SDimitry Andric } 20170b57cec5SDimitry Andric 20180b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory 20190b57cec5SDimitry Andric /// location. 20200b57cec5SDimitry Andric /// 20210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20220b57cec5SDimitry Andric /// 20230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 20240b57cec5SDimitry Andric /// 20250b57cec5SDimitry Andric /// \param __dp 20260b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 20270b57cec5SDimitry Andric /// location does not have to be aligned. 20280b57cec5SDimitry Andric /// \param __a 20290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be stored. 20300b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20310b57cec5SDimitry Andric _mm_storeu_pd(double *__dp, __m128d __a) 20320b57cec5SDimitry Andric { 20330b57cec5SDimitry Andric struct __storeu_pd { 20340b57cec5SDimitry Andric __m128d_u __v; 20350b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20360b57cec5SDimitry Andric ((struct __storeu_pd*)__dp)->__v = __a; 20370b57cec5SDimitry Andric } 20380b57cec5SDimitry Andric 20390b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit 20400b57cec5SDimitry Andric /// vector of [2 x double] to a 16-byte aligned memory location. 20410b57cec5SDimitry Andric /// 20420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20430b57cec5SDimitry Andric /// 20440b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a 20450b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction. 20460b57cec5SDimitry Andric /// 20470b57cec5SDimitry Andric /// \param __dp 20480b57cec5SDimitry Andric /// A pointer to a 16-byte aligned memory location that can store two 20490b57cec5SDimitry Andric /// double-precision values. 20500b57cec5SDimitry Andric /// \param __a 20510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be reversed and 20520b57cec5SDimitry Andric /// stored. 20530b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20540b57cec5SDimitry Andric _mm_storer_pd(double *__dp, __m128d __a) 20550b57cec5SDimitry Andric { 20560b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); 20570b57cec5SDimitry Andric *(__m128d *)__dp = __a; 20580b57cec5SDimitry Andric } 20590b57cec5SDimitry Andric 20600b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a 20610b57cec5SDimitry Andric /// memory location. 20620b57cec5SDimitry Andric /// 20630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20640b57cec5SDimitry Andric /// 20650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 20660b57cec5SDimitry Andric /// 20670b57cec5SDimitry Andric /// \param __dp 20680b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 20690b57cec5SDimitry Andric /// \param __a 20700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 20710b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20720b57cec5SDimitry Andric _mm_storeh_pd(double *__dp, __m128d __a) 20730b57cec5SDimitry Andric { 20740b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 20750b57cec5SDimitry Andric double __u; 20760b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20770b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; 20780b57cec5SDimitry Andric } 20790b57cec5SDimitry Andric 20800b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 20810b57cec5SDimitry Andric /// memory location. 20820b57cec5SDimitry Andric /// 20830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20840b57cec5SDimitry Andric /// 20850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 20860b57cec5SDimitry Andric /// 20870b57cec5SDimitry Andric /// \param __dp 20880b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 20890b57cec5SDimitry Andric /// \param __a 20900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 20910b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20920b57cec5SDimitry Andric _mm_storel_pd(double *__dp, __m128d __a) 20930b57cec5SDimitry Andric { 20940b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 20950b57cec5SDimitry Andric double __u; 20960b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20970b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; 20980b57cec5SDimitry Andric } 20990b57cec5SDimitry Andric 21000b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], 21010b57cec5SDimitry Andric /// saving the lower 8 bits of each sum in the corresponding element of a 21020b57cec5SDimitry Andric /// 128-bit result vector of [16 x i8]. 21030b57cec5SDimitry Andric /// 21040b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21050b57cec5SDimitry Andric /// 21060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21070b57cec5SDimitry Andric /// 21080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. 21090b57cec5SDimitry Andric /// 21100b57cec5SDimitry Andric /// \param __a 21110b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 21120b57cec5SDimitry Andric /// \param __b 21130b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 21140b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both 21150b57cec5SDimitry Andric /// parameters. 21160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 21170b57cec5SDimitry Andric _mm_add_epi8(__m128i __a, __m128i __b) 21180b57cec5SDimitry Andric { 21190b57cec5SDimitry Andric return (__m128i)((__v16qu)__a + (__v16qu)__b); 21200b57cec5SDimitry Andric } 21210b57cec5SDimitry Andric 21220b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], 21230b57cec5SDimitry Andric /// saving the lower 16 bits of each sum in the corresponding element of a 21240b57cec5SDimitry Andric /// 128-bit result vector of [8 x i16]. 21250b57cec5SDimitry Andric /// 21260b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21270b57cec5SDimitry Andric /// 21280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21290b57cec5SDimitry Andric /// 21300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. 21310b57cec5SDimitry Andric /// 21320b57cec5SDimitry Andric /// \param __a 21330b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 21340b57cec5SDimitry Andric /// \param __b 21350b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 21360b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both 21370b57cec5SDimitry Andric /// parameters. 21380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 21390b57cec5SDimitry Andric _mm_add_epi16(__m128i __a, __m128i __b) 21400b57cec5SDimitry Andric { 21410b57cec5SDimitry Andric return (__m128i)((__v8hu)__a + (__v8hu)__b); 21420b57cec5SDimitry Andric } 21430b57cec5SDimitry Andric 21440b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], 21450b57cec5SDimitry Andric /// saving the lower 32 bits of each sum in the corresponding element of a 21460b57cec5SDimitry Andric /// 128-bit result vector of [4 x i32]. 21470b57cec5SDimitry Andric /// 21480b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21490b57cec5SDimitry Andric /// 21500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21510b57cec5SDimitry Andric /// 21520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. 21530b57cec5SDimitry Andric /// 21540b57cec5SDimitry Andric /// \param __a 21550b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 21560b57cec5SDimitry Andric /// \param __b 21570b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 21580b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both 21590b57cec5SDimitry Andric /// parameters. 21600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 21610b57cec5SDimitry Andric _mm_add_epi32(__m128i __a, __m128i __b) 21620b57cec5SDimitry Andric { 21630b57cec5SDimitry Andric return (__m128i)((__v4su)__a + (__v4su)__b); 21640b57cec5SDimitry Andric } 21650b57cec5SDimitry Andric 21660b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the 21670b57cec5SDimitry Andric /// lower 64 bits of the sum. 21680b57cec5SDimitry Andric /// 21690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21700b57cec5SDimitry Andric /// 21710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction. 21720b57cec5SDimitry Andric /// 21730b57cec5SDimitry Andric /// \param __a 21740b57cec5SDimitry Andric /// A 64-bit integer. 21750b57cec5SDimitry Andric /// \param __b 21760b57cec5SDimitry Andric /// A 64-bit integer. 21770b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters. 21780b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 21790b57cec5SDimitry Andric _mm_add_si64(__m64 __a, __m64 __b) 21800b57cec5SDimitry Andric { 21810b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); 21820b57cec5SDimitry Andric } 21830b57cec5SDimitry Andric 21840b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], 21850b57cec5SDimitry Andric /// saving the lower 64 bits of each sum in the corresponding element of a 21860b57cec5SDimitry Andric /// 128-bit result vector of [2 x i64]. 21870b57cec5SDimitry Andric /// 21880b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21890b57cec5SDimitry Andric /// 21900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21910b57cec5SDimitry Andric /// 21920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. 21930b57cec5SDimitry Andric /// 21940b57cec5SDimitry Andric /// \param __a 21950b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 21960b57cec5SDimitry Andric /// \param __b 21970b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 21980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both 21990b57cec5SDimitry Andric /// parameters. 22000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22010b57cec5SDimitry Andric _mm_add_epi64(__m128i __a, __m128i __b) 22020b57cec5SDimitry Andric { 22030b57cec5SDimitry Andric return (__m128i)((__v2du)__a + (__v2du)__b); 22040b57cec5SDimitry Andric } 22050b57cec5SDimitry Andric 22060b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22070b57cec5SDimitry Andric /// signed [16 x i8] vectors, saving each sum in the corresponding element of 22080b57cec5SDimitry Andric /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are 22090b57cec5SDimitry Andric /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. 22100b57cec5SDimitry Andric /// 22110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22120b57cec5SDimitry Andric /// 22130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. 22140b57cec5SDimitry Andric /// 22150b57cec5SDimitry Andric /// \param __a 22160b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 22170b57cec5SDimitry Andric /// \param __b 22180b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 22190b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of 22200b57cec5SDimitry Andric /// both parameters. 22210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22220b57cec5SDimitry Andric _mm_adds_epi8(__m128i __a, __m128i __b) 22230b57cec5SDimitry Andric { 22240b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); 22250b57cec5SDimitry Andric } 22260b57cec5SDimitry Andric 22270b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22280b57cec5SDimitry Andric /// signed [8 x i16] vectors, saving each sum in the corresponding element of 22290b57cec5SDimitry Andric /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF 22300b57cec5SDimitry Andric /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 22310b57cec5SDimitry Andric /// 0x8000. 22320b57cec5SDimitry Andric /// 22330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22340b57cec5SDimitry Andric /// 22350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. 22360b57cec5SDimitry Andric /// 22370b57cec5SDimitry Andric /// \param __a 22380b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22390b57cec5SDimitry Andric /// \param __b 22400b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22410b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of 22420b57cec5SDimitry Andric /// both parameters. 22430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22440b57cec5SDimitry Andric _mm_adds_epi16(__m128i __a, __m128i __b) 22450b57cec5SDimitry Andric { 22460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); 22470b57cec5SDimitry Andric } 22480b57cec5SDimitry Andric 22490b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22500b57cec5SDimitry Andric /// unsigned [16 x i8] vectors, saving each sum in the corresponding element 22510b57cec5SDimitry Andric /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF 22520b57cec5SDimitry Andric /// are saturated to 0xFF. Negative sums are saturated to 0x00. 22530b57cec5SDimitry Andric /// 22540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22550b57cec5SDimitry Andric /// 22560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 22570b57cec5SDimitry Andric /// 22580b57cec5SDimitry Andric /// \param __a 22590b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22600b57cec5SDimitry Andric /// \param __b 22610b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22620b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums 22630b57cec5SDimitry Andric /// of both parameters. 22640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22650b57cec5SDimitry Andric _mm_adds_epu8(__m128i __a, __m128i __b) 22660b57cec5SDimitry Andric { 22670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); 22680b57cec5SDimitry Andric } 22690b57cec5SDimitry Andric 22700b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22710b57cec5SDimitry Andric /// unsigned [8 x i16] vectors, saving each sum in the corresponding element 22720b57cec5SDimitry Andric /// of a 128-bit result vector of [8 x i16]. Positive sums greater than 22730b57cec5SDimitry Andric /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. 22740b57cec5SDimitry Andric /// 22750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22760b57cec5SDimitry Andric /// 22770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 22780b57cec5SDimitry Andric /// 22790b57cec5SDimitry Andric /// \param __a 22800b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22810b57cec5SDimitry Andric /// \param __b 22820b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22830b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums 22840b57cec5SDimitry Andric /// of both parameters. 22850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22860b57cec5SDimitry Andric _mm_adds_epu16(__m128i __a, __m128i __b) 22870b57cec5SDimitry Andric { 22880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); 22890b57cec5SDimitry Andric } 22900b57cec5SDimitry Andric 2291480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 22920b57cec5SDimitry Andric /// 128-bit unsigned [16 x i8] vectors, saving each result in the 22930b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 22940b57cec5SDimitry Andric /// 22950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22960b57cec5SDimitry Andric /// 22970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. 22980b57cec5SDimitry Andric /// 22990b57cec5SDimitry Andric /// \param __a 23000b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23010b57cec5SDimitry Andric /// \param __b 23020b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23030b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded 23040b57cec5SDimitry Andric /// averages of both parameters. 23050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23060b57cec5SDimitry Andric _mm_avg_epu8(__m128i __a, __m128i __b) 23070b57cec5SDimitry Andric { 23080b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 23090b57cec5SDimitry Andric } 23100b57cec5SDimitry Andric 2311480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 23120b57cec5SDimitry Andric /// 128-bit unsigned [8 x i16] vectors, saving each result in the 23130b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 23140b57cec5SDimitry Andric /// 23150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23160b57cec5SDimitry Andric /// 23170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. 23180b57cec5SDimitry Andric /// 23190b57cec5SDimitry Andric /// \param __a 23200b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23210b57cec5SDimitry Andric /// \param __b 23220b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23230b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded 23240b57cec5SDimitry Andric /// averages of both parameters. 23250b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23260b57cec5SDimitry Andric _mm_avg_epu16(__m128i __a, __m128i __b) 23270b57cec5SDimitry Andric { 23280b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 23290b57cec5SDimitry Andric } 23300b57cec5SDimitry Andric 23310b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] 23320b57cec5SDimitry Andric /// vectors, producing eight intermediate 32-bit signed integer products, and 23330b57cec5SDimitry Andric /// adds the consecutive pairs of 32-bit products to form a 128-bit signed 23340b57cec5SDimitry Andric /// [4 x i32] vector. 23350b57cec5SDimitry Andric /// 23360b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied producing a 23370b57cec5SDimitry Andric /// 32-bit product, bits [31:16] of both parameters are multiplied producing 23380b57cec5SDimitry Andric /// a 32-bit product, and the sum of those two products becomes bits [31:0] 23390b57cec5SDimitry Andric /// of the result. 23400b57cec5SDimitry Andric /// 23410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23420b57cec5SDimitry Andric /// 23430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. 23440b57cec5SDimitry Andric /// 23450b57cec5SDimitry Andric /// \param __a 23460b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23470b57cec5SDimitry Andric /// \param __b 23480b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23490b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products 23500b57cec5SDimitry Andric /// of both parameters. 23510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23520b57cec5SDimitry Andric _mm_madd_epi16(__m128i __a, __m128i __b) 23530b57cec5SDimitry Andric { 23540b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 23550b57cec5SDimitry Andric } 23560b57cec5SDimitry Andric 23570b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 23580b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 23590b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 23600b57cec5SDimitry Andric /// 23610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23620b57cec5SDimitry Andric /// 23630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. 23640b57cec5SDimitry Andric /// 23650b57cec5SDimitry Andric /// \param __a 23660b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23670b57cec5SDimitry Andric /// \param __b 23680b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23690b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of 23700b57cec5SDimitry Andric /// each comparison. 23710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23720b57cec5SDimitry Andric _mm_max_epi16(__m128i __a, __m128i __b) 23730b57cec5SDimitry Andric { 23740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); 23750b57cec5SDimitry Andric } 23760b57cec5SDimitry Andric 23770b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 23780b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 23790b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 23800b57cec5SDimitry Andric /// 23810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23820b57cec5SDimitry Andric /// 23830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. 23840b57cec5SDimitry Andric /// 23850b57cec5SDimitry Andric /// \param __a 23860b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23870b57cec5SDimitry Andric /// \param __b 23880b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23890b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of 23900b57cec5SDimitry Andric /// each comparison. 23910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23920b57cec5SDimitry Andric _mm_max_epu8(__m128i __a, __m128i __b) 23930b57cec5SDimitry Andric { 23940b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); 23950b57cec5SDimitry Andric } 23960b57cec5SDimitry Andric 23970b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 23980b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 23990b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 24000b57cec5SDimitry Andric /// 24010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24020b57cec5SDimitry Andric /// 24030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. 24040b57cec5SDimitry Andric /// 24050b57cec5SDimitry Andric /// \param __a 24060b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24070b57cec5SDimitry Andric /// \param __b 24080b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24090b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of 24100b57cec5SDimitry Andric /// each comparison. 24110b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24120b57cec5SDimitry Andric _mm_min_epi16(__m128i __a, __m128i __b) 24130b57cec5SDimitry Andric { 24140b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); 24150b57cec5SDimitry Andric } 24160b57cec5SDimitry Andric 24170b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 24180b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 24190b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 24200b57cec5SDimitry Andric /// 24210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24220b57cec5SDimitry Andric /// 24230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. 24240b57cec5SDimitry Andric /// 24250b57cec5SDimitry Andric /// \param __a 24260b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 24270b57cec5SDimitry Andric /// \param __b 24280b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 24290b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of 24300b57cec5SDimitry Andric /// each comparison. 24310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24320b57cec5SDimitry Andric _mm_min_epu8(__m128i __a, __m128i __b) 24330b57cec5SDimitry Andric { 24340b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); 24350b57cec5SDimitry Andric } 24360b57cec5SDimitry Andric 24370b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 24380b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 24390b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 24400b57cec5SDimitry Andric /// 24410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24420b57cec5SDimitry Andric /// 24430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. 24440b57cec5SDimitry Andric /// 24450b57cec5SDimitry Andric /// \param __a 24460b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24470b57cec5SDimitry Andric /// \param __b 24480b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24490b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of 24500b57cec5SDimitry Andric /// each of the eight 32-bit products. 24510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24520b57cec5SDimitry Andric _mm_mulhi_epi16(__m128i __a, __m128i __b) 24530b57cec5SDimitry Andric { 24540b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 24550b57cec5SDimitry Andric } 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16] 24580b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 24590b57cec5SDimitry Andric /// corresponding element of a 128-bit unsigned [8 x i16] result vector. 24600b57cec5SDimitry Andric /// 24610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24620b57cec5SDimitry Andric /// 24630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. 24640b57cec5SDimitry Andric /// 24650b57cec5SDimitry Andric /// \param __a 24660b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 24670b57cec5SDimitry Andric /// \param __b 24680b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 24690b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits 24700b57cec5SDimitry Andric /// of each of the eight 32-bit products. 24710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24720b57cec5SDimitry Andric _mm_mulhi_epu16(__m128i __a, __m128i __b) 24730b57cec5SDimitry Andric { 24740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 24750b57cec5SDimitry Andric } 24760b57cec5SDimitry Andric 24770b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 24780b57cec5SDimitry Andric /// vectors, saving the lower 16 bits of each 32-bit product in the 24790b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 24800b57cec5SDimitry Andric /// 24810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24820b57cec5SDimitry Andric /// 24830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. 24840b57cec5SDimitry Andric /// 24850b57cec5SDimitry Andric /// \param __a 24860b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24870b57cec5SDimitry Andric /// \param __b 24880b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24890b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of 24900b57cec5SDimitry Andric /// each of the eight 32-bit products. 24910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24920b57cec5SDimitry Andric _mm_mullo_epi16(__m128i __a, __m128i __b) 24930b57cec5SDimitry Andric { 24940b57cec5SDimitry Andric return (__m128i)((__v8hu)__a * (__v8hu)__b); 24950b57cec5SDimitry Andric } 24960b57cec5SDimitry Andric 24970b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits 24980b57cec5SDimitry Andric /// of the two 64-bit integer vectors and returns the 64-bit unsigned 24990b57cec5SDimitry Andric /// product. 25000b57cec5SDimitry Andric /// 25010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25020b57cec5SDimitry Andric /// 25030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. 25040b57cec5SDimitry Andric /// 25050b57cec5SDimitry Andric /// \param __a 25060b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 25070b57cec5SDimitry Andric /// \param __b 25080b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 25090b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands. 25100b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 25110b57cec5SDimitry Andric _mm_mul_su32(__m64 __a, __m64 __b) 25120b57cec5SDimitry Andric { 25130b57cec5SDimitry Andric return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 25140b57cec5SDimitry Andric } 25150b57cec5SDimitry Andric 25160b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower 25170b57cec5SDimitry Andric /// bits of the corresponding elements of two [2 x i64] vectors, and returns 25180b57cec5SDimitry Andric /// the 64-bit products in the corresponding elements of a [2 x i64] vector. 25190b57cec5SDimitry Andric /// 25200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25210b57cec5SDimitry Andric /// 25220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. 25230b57cec5SDimitry Andric /// 25240b57cec5SDimitry Andric /// \param __a 25250b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 25260b57cec5SDimitry Andric /// \param __b 25270b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 25280b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands. 25290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25300b57cec5SDimitry Andric _mm_mul_epu32(__m128i __a, __m128i __b) 25310b57cec5SDimitry Andric { 25320b57cec5SDimitry Andric return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 25330b57cec5SDimitry Andric } 25340b57cec5SDimitry Andric 25350b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer 25360b57cec5SDimitry Andric /// values in two 128-bit vectors. Sums the first 8 absolute differences, and 25370b57cec5SDimitry Andric /// separately sums the second 8 absolute differences. Packs these two 25380b57cec5SDimitry Andric /// unsigned 16-bit integer sums into the upper and lower elements of a 25390b57cec5SDimitry Andric /// [2 x i64] vector. 25400b57cec5SDimitry Andric /// 25410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25420b57cec5SDimitry Andric /// 25430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. 25440b57cec5SDimitry Andric /// 25450b57cec5SDimitry Andric /// \param __a 25460b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 25470b57cec5SDimitry Andric /// \param __b 25480b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 25490b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute 25500b57cec5SDimitry Andric /// differences between both operands. 25510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25520b57cec5SDimitry Andric _mm_sad_epu8(__m128i __a, __m128i __b) 25530b57cec5SDimitry Andric { 25540b57cec5SDimitry Andric return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 25550b57cec5SDimitry Andric } 25560b57cec5SDimitry Andric 25570b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands. 25580b57cec5SDimitry Andric /// 25590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25600b57cec5SDimitry Andric /// 25610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. 25620b57cec5SDimitry Andric /// 25630b57cec5SDimitry Andric /// \param __a 25640b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25650b57cec5SDimitry Andric /// \param __b 25660b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25670b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25680b57cec5SDimitry Andric /// in the operands. 25690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25700b57cec5SDimitry Andric _mm_sub_epi8(__m128i __a, __m128i __b) 25710b57cec5SDimitry Andric { 25720b57cec5SDimitry Andric return (__m128i)((__v16qu)__a - (__v16qu)__b); 25730b57cec5SDimitry Andric } 25740b57cec5SDimitry Andric 25750b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands. 25760b57cec5SDimitry Andric /// 25770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25780b57cec5SDimitry Andric /// 25790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. 25800b57cec5SDimitry Andric /// 25810b57cec5SDimitry Andric /// \param __a 25820b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25830b57cec5SDimitry Andric /// \param __b 25840b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25850b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25860b57cec5SDimitry Andric /// in the operands. 25870b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25880b57cec5SDimitry Andric _mm_sub_epi16(__m128i __a, __m128i __b) 25890b57cec5SDimitry Andric { 25900b57cec5SDimitry Andric return (__m128i)((__v8hu)__a - (__v8hu)__b); 25910b57cec5SDimitry Andric } 25920b57cec5SDimitry Andric 25930b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands. 25940b57cec5SDimitry Andric /// 25950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25960b57cec5SDimitry Andric /// 25970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. 25980b57cec5SDimitry Andric /// 25990b57cec5SDimitry Andric /// \param __a 26000b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26010b57cec5SDimitry Andric /// \param __b 26020b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26030b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26040b57cec5SDimitry Andric /// in the operands. 26050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26060b57cec5SDimitry Andric _mm_sub_epi32(__m128i __a, __m128i __b) 26070b57cec5SDimitry Andric { 26080b57cec5SDimitry Andric return (__m128i)((__v4su)__a - (__v4su)__b); 26090b57cec5SDimitry Andric } 26100b57cec5SDimitry Andric 26110b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the 26120b57cec5SDimitry Andric /// difference to the corresponding bits in the destination. 26130b57cec5SDimitry Andric /// 26140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26150b57cec5SDimitry Andric /// 26160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. 26170b57cec5SDimitry Andric /// 26180b57cec5SDimitry Andric /// \param __a 26190b57cec5SDimitry Andric /// A 64-bit integer vector containing the minuend. 26200b57cec5SDimitry Andric /// \param __b 26210b57cec5SDimitry Andric /// A 64-bit integer vector containing the subtrahend. 26220b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in 26230b57cec5SDimitry Andric /// the operands. 26240b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 26250b57cec5SDimitry Andric _mm_sub_si64(__m64 __a, __m64 __b) 26260b57cec5SDimitry Andric { 26270b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); 26280b57cec5SDimitry Andric } 26290b57cec5SDimitry Andric 26300b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors. 26310b57cec5SDimitry Andric /// 26320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26330b57cec5SDimitry Andric /// 26340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. 26350b57cec5SDimitry Andric /// 26360b57cec5SDimitry Andric /// \param __a 26370b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26380b57cec5SDimitry Andric /// \param __b 26390b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26400b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26410b57cec5SDimitry Andric /// in the operands. 26420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26430b57cec5SDimitry Andric _mm_sub_epi64(__m128i __a, __m128i __b) 26440b57cec5SDimitry Andric { 26450b57cec5SDimitry Andric return (__m128i)((__v2du)__a - (__v2du)__b); 26460b57cec5SDimitry Andric } 26470b57cec5SDimitry Andric 26480b57cec5SDimitry Andric /// Subtracts corresponding 8-bit signed integer values in the input and 26490b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 26500b57cec5SDimitry Andric /// Differences greater than 0x7F are saturated to 0x7F, and differences less 26510b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. 26520b57cec5SDimitry Andric /// 26530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26540b57cec5SDimitry Andric /// 26550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. 26560b57cec5SDimitry Andric /// 26570b57cec5SDimitry Andric /// \param __a 26580b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26590b57cec5SDimitry Andric /// \param __b 26600b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26610b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26620b57cec5SDimitry Andric /// in the operands. 26630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26640b57cec5SDimitry Andric _mm_subs_epi8(__m128i __a, __m128i __b) 26650b57cec5SDimitry Andric { 26660b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); 26670b57cec5SDimitry Andric } 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric /// Subtracts corresponding 16-bit signed integer values in the input and 26700b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 26710b57cec5SDimitry Andric /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less 26720b57cec5SDimitry Andric /// than 0x8000 are saturated to 0x8000. 26730b57cec5SDimitry Andric /// 26740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26750b57cec5SDimitry Andric /// 26760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. 26770b57cec5SDimitry Andric /// 26780b57cec5SDimitry Andric /// \param __a 26790b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26800b57cec5SDimitry Andric /// \param __b 26810b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26820b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26830b57cec5SDimitry Andric /// in the operands. 26840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26850b57cec5SDimitry Andric _mm_subs_epi16(__m128i __a, __m128i __b) 26860b57cec5SDimitry Andric { 26870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric 26900b57cec5SDimitry Andric /// Subtracts corresponding 8-bit unsigned integer values in the input 26910b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 26920b57cec5SDimitry Andric /// destination. Differences less than 0x00 are saturated to 0x00. 26930b57cec5SDimitry Andric /// 26940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26950b57cec5SDimitry Andric /// 26960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. 26970b57cec5SDimitry Andric /// 26980b57cec5SDimitry Andric /// \param __a 26990b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 27000b57cec5SDimitry Andric /// \param __b 27010b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 27020b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 27030b57cec5SDimitry Andric /// differences of the values in the operands. 27040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27050b57cec5SDimitry Andric _mm_subs_epu8(__m128i __a, __m128i __b) 27060b57cec5SDimitry Andric { 27070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); 27080b57cec5SDimitry Andric } 27090b57cec5SDimitry Andric 27100b57cec5SDimitry Andric /// Subtracts corresponding 16-bit unsigned integer values in the input 27110b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 27120b57cec5SDimitry Andric /// destination. Differences less than 0x0000 are saturated to 0x0000. 27130b57cec5SDimitry Andric /// 27140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27150b57cec5SDimitry Andric /// 27160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. 27170b57cec5SDimitry Andric /// 27180b57cec5SDimitry Andric /// \param __a 27190b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 27200b57cec5SDimitry Andric /// \param __b 27210b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 27220b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 27230b57cec5SDimitry Andric /// differences of the values in the operands. 27240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27250b57cec5SDimitry Andric _mm_subs_epu16(__m128i __a, __m128i __b) 27260b57cec5SDimitry Andric { 27270b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); 27280b57cec5SDimitry Andric } 27290b57cec5SDimitry Andric 27300b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors. 27310b57cec5SDimitry Andric /// 27320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27330b57cec5SDimitry Andric /// 27340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 27350b57cec5SDimitry Andric /// 27360b57cec5SDimitry Andric /// \param __a 27370b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27380b57cec5SDimitry Andric /// \param __b 27390b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27400b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values 27410b57cec5SDimitry Andric /// in both operands. 27420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27430b57cec5SDimitry Andric _mm_and_si128(__m128i __a, __m128i __b) 27440b57cec5SDimitry Andric { 27450b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 27460b57cec5SDimitry Andric } 27470b57cec5SDimitry Andric 27480b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the 27490b57cec5SDimitry Andric /// one's complement of the values contained in the first source operand. 27500b57cec5SDimitry Andric /// 27510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27520b57cec5SDimitry Andric /// 27530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 27540b57cec5SDimitry Andric /// 27550b57cec5SDimitry Andric /// \param __a 27560b57cec5SDimitry Andric /// A 128-bit vector containing the left source operand. The one's complement 27570b57cec5SDimitry Andric /// of this value is used in the bitwise AND. 27580b57cec5SDimitry Andric /// \param __b 27590b57cec5SDimitry Andric /// A 128-bit vector containing the right source operand. 27600b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's 27610b57cec5SDimitry Andric /// complement of the first operand and the values in the second operand. 27620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27630b57cec5SDimitry Andric _mm_andnot_si128(__m128i __a, __m128i __b) 27640b57cec5SDimitry Andric { 27650b57cec5SDimitry Andric return (__m128i)(~(__v2du)__a & (__v2du)__b); 27660b57cec5SDimitry Andric } 27670b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors. 27680b57cec5SDimitry Andric /// 27690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27700b57cec5SDimitry Andric /// 27710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 27720b57cec5SDimitry Andric /// 27730b57cec5SDimitry Andric /// \param __a 27740b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27750b57cec5SDimitry Andric /// \param __b 27760b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27770b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values 27780b57cec5SDimitry Andric /// in both operands. 27790b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27800b57cec5SDimitry Andric _mm_or_si128(__m128i __a, __m128i __b) 27810b57cec5SDimitry Andric { 27820b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 27830b57cec5SDimitry Andric } 27840b57cec5SDimitry Andric 27850b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors. 27860b57cec5SDimitry Andric /// 27870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27880b57cec5SDimitry Andric /// 27890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 27900b57cec5SDimitry Andric /// 27910b57cec5SDimitry Andric /// \param __a 27920b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27930b57cec5SDimitry Andric /// \param __b 27940b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27950b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the 27960b57cec5SDimitry Andric /// values in both operands. 27970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27980b57cec5SDimitry Andric _mm_xor_si128(__m128i __a, __m128i __b) 27990b57cec5SDimitry Andric { 28000b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 28010b57cec5SDimitry Andric } 28020b57cec5SDimitry Andric 28030b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified 28040b57cec5SDimitry Andric /// number of bytes. Low-order bits are cleared. 28050b57cec5SDimitry Andric /// 28060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28070b57cec5SDimitry Andric /// 28080b57cec5SDimitry Andric /// \code 28090b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm); 28100b57cec5SDimitry Andric /// \endcode 28110b57cec5SDimitry Andric /// 28120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. 28130b57cec5SDimitry Andric /// 28140b57cec5SDimitry Andric /// \param a 28150b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28160b57cec5SDimitry Andric /// \param imm 28170b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to left-shift operand 28180b57cec5SDimitry Andric /// \a a. 28190b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value. 28200b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \ 28210b57cec5SDimitry Andric (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 28220b57cec5SDimitry Andric 28230b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \ 28240b57cec5SDimitry Andric (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 28250b57cec5SDimitry Andric 28260b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 28270b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28280b57cec5SDimitry Andric /// 28290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28300b57cec5SDimitry Andric /// 28310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 28320b57cec5SDimitry Andric /// 28330b57cec5SDimitry Andric /// \param __a 28340b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28350b57cec5SDimitry Andric /// \param __count 28360b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 28370b57cec5SDimitry Andric /// in operand \a __a. 28380b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28400b57cec5SDimitry Andric _mm_slli_epi16(__m128i __a, int __count) 28410b57cec5SDimitry Andric { 28420b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 28430b57cec5SDimitry Andric } 28440b57cec5SDimitry Andric 28450b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 28460b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28470b57cec5SDimitry Andric /// 28480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28490b57cec5SDimitry Andric /// 28500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 28510b57cec5SDimitry Andric /// 28520b57cec5SDimitry Andric /// \param __a 28530b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28540b57cec5SDimitry Andric /// \param __count 28550b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28560b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 28570b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28590b57cec5SDimitry Andric _mm_sll_epi16(__m128i __a, __m128i __count) 28600b57cec5SDimitry Andric { 28610b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 28620b57cec5SDimitry Andric } 28630b57cec5SDimitry Andric 28640b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 28650b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28660b57cec5SDimitry Andric /// 28670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28680b57cec5SDimitry Andric /// 28690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 28700b57cec5SDimitry Andric /// 28710b57cec5SDimitry Andric /// \param __a 28720b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28730b57cec5SDimitry Andric /// \param __count 28740b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 28750b57cec5SDimitry Andric /// in operand \a __a. 28760b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28780b57cec5SDimitry Andric _mm_slli_epi32(__m128i __a, int __count) 28790b57cec5SDimitry Andric { 28800b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 28810b57cec5SDimitry Andric } 28820b57cec5SDimitry Andric 28830b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 28840b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28850b57cec5SDimitry Andric /// 28860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28870b57cec5SDimitry Andric /// 28880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 28890b57cec5SDimitry Andric /// 28900b57cec5SDimitry Andric /// \param __a 28910b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28920b57cec5SDimitry Andric /// \param __count 28930b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28940b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 28950b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28970b57cec5SDimitry Andric _mm_sll_epi32(__m128i __a, __m128i __count) 28980b57cec5SDimitry Andric { 28990b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 29000b57cec5SDimitry Andric } 29010b57cec5SDimitry Andric 29020b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 29030b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 29040b57cec5SDimitry Andric /// 29050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29060b57cec5SDimitry Andric /// 29070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 29080b57cec5SDimitry Andric /// 29090b57cec5SDimitry Andric /// \param __a 29100b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29110b57cec5SDimitry Andric /// \param __count 29120b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 29130b57cec5SDimitry Andric /// in operand \a __a. 29140b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 29150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29160b57cec5SDimitry Andric _mm_slli_epi64(__m128i __a, int __count) 29170b57cec5SDimitry Andric { 29180b57cec5SDimitry Andric return __builtin_ia32_psllqi128((__v2di)__a, __count); 29190b57cec5SDimitry Andric } 29200b57cec5SDimitry Andric 29210b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 29220b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 29230b57cec5SDimitry Andric /// 29240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29250b57cec5SDimitry Andric /// 29260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 29270b57cec5SDimitry Andric /// 29280b57cec5SDimitry Andric /// \param __a 29290b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29300b57cec5SDimitry Andric /// \param __count 29310b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29320b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 29330b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 29340b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29350b57cec5SDimitry Andric _mm_sll_epi64(__m128i __a, __m128i __count) 29360b57cec5SDimitry Andric { 29370b57cec5SDimitry Andric return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); 29380b57cec5SDimitry Andric } 29390b57cec5SDimitry Andric 29400b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 29410b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29420b57cec5SDimitry Andric /// bit of the initial value. 29430b57cec5SDimitry Andric /// 29440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29450b57cec5SDimitry Andric /// 29460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 29470b57cec5SDimitry Andric /// 29480b57cec5SDimitry Andric /// \param __a 29490b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29500b57cec5SDimitry Andric /// \param __count 29510b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29520b57cec5SDimitry Andric /// in operand \a __a. 29530b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 29540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29550b57cec5SDimitry Andric _mm_srai_epi16(__m128i __a, int __count) 29560b57cec5SDimitry Andric { 29570b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 29580b57cec5SDimitry Andric } 29590b57cec5SDimitry Andric 29600b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 29610b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29620b57cec5SDimitry Andric /// bit of the initial value. 29630b57cec5SDimitry Andric /// 29640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29650b57cec5SDimitry Andric /// 29660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 29670b57cec5SDimitry Andric /// 29680b57cec5SDimitry Andric /// \param __a 29690b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29700b57cec5SDimitry Andric /// \param __count 29710b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29720b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29730b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 29740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29750b57cec5SDimitry Andric _mm_sra_epi16(__m128i __a, __m128i __count) 29760b57cec5SDimitry Andric { 29770b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 29780b57cec5SDimitry Andric } 29790b57cec5SDimitry Andric 29800b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 29810b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29820b57cec5SDimitry Andric /// bit of the initial value. 29830b57cec5SDimitry Andric /// 29840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29850b57cec5SDimitry Andric /// 29860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 29870b57cec5SDimitry Andric /// 29880b57cec5SDimitry Andric /// \param __a 29890b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29900b57cec5SDimitry Andric /// \param __count 29910b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29920b57cec5SDimitry Andric /// in operand \a __a. 29930b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 29940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29950b57cec5SDimitry Andric _mm_srai_epi32(__m128i __a, int __count) 29960b57cec5SDimitry Andric { 29970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 29980b57cec5SDimitry Andric } 29990b57cec5SDimitry Andric 30000b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 30010b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 30020b57cec5SDimitry Andric /// bit of the initial value. 30030b57cec5SDimitry Andric /// 30040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30050b57cec5SDimitry Andric /// 30060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 30070b57cec5SDimitry Andric /// 30080b57cec5SDimitry Andric /// \param __a 30090b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30100b57cec5SDimitry Andric /// \param __count 30110b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30120b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30130b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30150b57cec5SDimitry Andric _mm_sra_epi32(__m128i __a, __m128i __count) 30160b57cec5SDimitry Andric { 30170b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 30180b57cec5SDimitry Andric } 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified 30210b57cec5SDimitry Andric /// number of bytes. High-order bits are cleared. 30220b57cec5SDimitry Andric /// 30230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30240b57cec5SDimitry Andric /// 30250b57cec5SDimitry Andric /// \code 30260b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm); 30270b57cec5SDimitry Andric /// \endcode 30280b57cec5SDimitry Andric /// 30290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. 30300b57cec5SDimitry Andric /// 30310b57cec5SDimitry Andric /// \param a 30320b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30330b57cec5SDimitry Andric /// \param imm 30340b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to right-shift operand 30350b57cec5SDimitry Andric /// \a a. 30360b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value. 30370b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \ 30380b57cec5SDimitry Andric (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 30390b57cec5SDimitry Andric 30400b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \ 30410b57cec5SDimitry Andric (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 30420b57cec5SDimitry Andric 30430b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 30440b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30450b57cec5SDimitry Andric /// 30460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30470b57cec5SDimitry Andric /// 30480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 30490b57cec5SDimitry Andric /// 30500b57cec5SDimitry Andric /// \param __a 30510b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30520b57cec5SDimitry Andric /// \param __count 30530b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 30540b57cec5SDimitry Andric /// in operand \a __a. 30550b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30570b57cec5SDimitry Andric _mm_srli_epi16(__m128i __a, int __count) 30580b57cec5SDimitry Andric { 30590b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 30600b57cec5SDimitry Andric } 30610b57cec5SDimitry Andric 30620b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 30630b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30640b57cec5SDimitry Andric /// 30650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30660b57cec5SDimitry Andric /// 30670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 30680b57cec5SDimitry Andric /// 30690b57cec5SDimitry Andric /// \param __a 30700b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30710b57cec5SDimitry Andric /// \param __count 30720b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30730b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30740b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30760b57cec5SDimitry Andric _mm_srl_epi16(__m128i __a, __m128i __count) 30770b57cec5SDimitry Andric { 30780b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 30790b57cec5SDimitry Andric } 30800b57cec5SDimitry Andric 30810b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 30820b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30830b57cec5SDimitry Andric /// 30840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30850b57cec5SDimitry Andric /// 30860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 30870b57cec5SDimitry Andric /// 30880b57cec5SDimitry Andric /// \param __a 30890b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30900b57cec5SDimitry Andric /// \param __count 30910b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 30920b57cec5SDimitry Andric /// in operand \a __a. 30930b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30950b57cec5SDimitry Andric _mm_srli_epi32(__m128i __a, int __count) 30960b57cec5SDimitry Andric { 30970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 30980b57cec5SDimitry Andric } 30990b57cec5SDimitry Andric 31000b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 31010b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 31020b57cec5SDimitry Andric /// 31030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31040b57cec5SDimitry Andric /// 31050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 31060b57cec5SDimitry Andric /// 31070b57cec5SDimitry Andric /// \param __a 31080b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 31090b57cec5SDimitry Andric /// \param __count 31100b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 31110b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 31120b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 31130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31140b57cec5SDimitry Andric _mm_srl_epi32(__m128i __a, __m128i __count) 31150b57cec5SDimitry Andric { 31160b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 31170b57cec5SDimitry Andric } 31180b57cec5SDimitry Andric 31190b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 31200b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 31210b57cec5SDimitry Andric /// 31220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31230b57cec5SDimitry Andric /// 31240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 31250b57cec5SDimitry Andric /// 31260b57cec5SDimitry Andric /// \param __a 31270b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 31280b57cec5SDimitry Andric /// \param __count 31290b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 31300b57cec5SDimitry Andric /// in operand \a __a. 31310b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 31320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31330b57cec5SDimitry Andric _mm_srli_epi64(__m128i __a, int __count) 31340b57cec5SDimitry Andric { 31350b57cec5SDimitry Andric return __builtin_ia32_psrlqi128((__v2di)__a, __count); 31360b57cec5SDimitry Andric } 31370b57cec5SDimitry Andric 31380b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 31390b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 31400b57cec5SDimitry Andric /// 31410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31420b57cec5SDimitry Andric /// 31430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 31440b57cec5SDimitry Andric /// 31450b57cec5SDimitry Andric /// \param __a 31460b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 31470b57cec5SDimitry Andric /// \param __count 31480b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 31490b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 31500b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 31510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31520b57cec5SDimitry Andric _mm_srl_epi64(__m128i __a, __m128i __count) 31530b57cec5SDimitry Andric { 31540b57cec5SDimitry Andric return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); 31550b57cec5SDimitry Andric } 31560b57cec5SDimitry Andric 31570b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit 31580b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF 31590b57cec5SDimitry Andric /// for true. 31600b57cec5SDimitry Andric /// 31610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31620b57cec5SDimitry Andric /// 31630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. 31640b57cec5SDimitry Andric /// 31650b57cec5SDimitry Andric /// \param __a 31660b57cec5SDimitry Andric /// A 128-bit integer vector. 31670b57cec5SDimitry Andric /// \param __b 31680b57cec5SDimitry Andric /// A 128-bit integer vector. 31690b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 31700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31710b57cec5SDimitry Andric _mm_cmpeq_epi8(__m128i __a, __m128i __b) 31720b57cec5SDimitry Andric { 31730b57cec5SDimitry Andric return (__m128i)((__v16qi)__a == (__v16qi)__b); 31740b57cec5SDimitry Andric } 31750b57cec5SDimitry Andric 31760b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit 31770b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 31780b57cec5SDimitry Andric /// 0xFFFF for true. 31790b57cec5SDimitry Andric /// 31800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31810b57cec5SDimitry Andric /// 31820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. 31830b57cec5SDimitry Andric /// 31840b57cec5SDimitry Andric /// \param __a 31850b57cec5SDimitry Andric /// A 128-bit integer vector. 31860b57cec5SDimitry Andric /// \param __b 31870b57cec5SDimitry Andric /// A 128-bit integer vector. 31880b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 31890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31900b57cec5SDimitry Andric _mm_cmpeq_epi16(__m128i __a, __m128i __b) 31910b57cec5SDimitry Andric { 31920b57cec5SDimitry Andric return (__m128i)((__v8hi)__a == (__v8hi)__b); 31930b57cec5SDimitry Andric } 31940b57cec5SDimitry Andric 31950b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit 31960b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 31970b57cec5SDimitry Andric /// 0xFFFFFFFF for true. 31980b57cec5SDimitry Andric /// 31990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32000b57cec5SDimitry Andric /// 32010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. 32020b57cec5SDimitry Andric /// 32030b57cec5SDimitry Andric /// \param __a 32040b57cec5SDimitry Andric /// A 128-bit integer vector. 32050b57cec5SDimitry Andric /// \param __b 32060b57cec5SDimitry Andric /// A 128-bit integer vector. 32070b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32090b57cec5SDimitry Andric _mm_cmpeq_epi32(__m128i __a, __m128i __b) 32100b57cec5SDimitry Andric { 32110b57cec5SDimitry Andric return (__m128i)((__v4si)__a == (__v4si)__b); 32120b57cec5SDimitry Andric } 32130b57cec5SDimitry Andric 32140b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 32150b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are 32160b57cec5SDimitry Andric /// greater than those in the second operand. Each comparison yields 0x0 for 32170b57cec5SDimitry Andric /// false, 0xFF for true. 32180b57cec5SDimitry Andric /// 32190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32200b57cec5SDimitry Andric /// 32210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 32220b57cec5SDimitry Andric /// 32230b57cec5SDimitry Andric /// \param __a 32240b57cec5SDimitry Andric /// A 128-bit integer vector. 32250b57cec5SDimitry Andric /// \param __b 32260b57cec5SDimitry Andric /// A 128-bit integer vector. 32270b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32290b57cec5SDimitry Andric _mm_cmpgt_epi8(__m128i __a, __m128i __b) 32300b57cec5SDimitry Andric { 32310b57cec5SDimitry Andric /* This function always performs a signed comparison, but __v16qi is a char 32320b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 32330b57cec5SDimitry Andric return (__m128i)((__v16qs)__a > (__v16qs)__b); 32340b57cec5SDimitry Andric } 32350b57cec5SDimitry Andric 32360b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 32370b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 32380b57cec5SDimitry Andric /// are greater than those in the second operand. 32390b57cec5SDimitry Andric /// 32400b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 32410b57cec5SDimitry Andric /// 32420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32430b57cec5SDimitry Andric /// 32440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 32450b57cec5SDimitry Andric /// 32460b57cec5SDimitry Andric /// \param __a 32470b57cec5SDimitry Andric /// A 128-bit integer vector. 32480b57cec5SDimitry Andric /// \param __b 32490b57cec5SDimitry Andric /// A 128-bit integer vector. 32500b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32520b57cec5SDimitry Andric _mm_cmpgt_epi16(__m128i __a, __m128i __b) 32530b57cec5SDimitry Andric { 32540b57cec5SDimitry Andric return (__m128i)((__v8hi)__a > (__v8hi)__b); 32550b57cec5SDimitry Andric } 32560b57cec5SDimitry Andric 32570b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 32580b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 32590b57cec5SDimitry Andric /// are greater than those in the second operand. 32600b57cec5SDimitry Andric /// 32610b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 32620b57cec5SDimitry Andric /// 32630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32640b57cec5SDimitry Andric /// 32650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 32660b57cec5SDimitry Andric /// 32670b57cec5SDimitry Andric /// \param __a 32680b57cec5SDimitry Andric /// A 128-bit integer vector. 32690b57cec5SDimitry Andric /// \param __b 32700b57cec5SDimitry Andric /// A 128-bit integer vector. 32710b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32730b57cec5SDimitry Andric _mm_cmpgt_epi32(__m128i __a, __m128i __b) 32740b57cec5SDimitry Andric { 32750b57cec5SDimitry Andric return (__m128i)((__v4si)__a > (__v4si)__b); 32760b57cec5SDimitry Andric } 32770b57cec5SDimitry Andric 32780b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 32790b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are less 32800b57cec5SDimitry Andric /// than those in the second operand. 32810b57cec5SDimitry Andric /// 32820b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFF for true. 32830b57cec5SDimitry Andric /// 32840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32850b57cec5SDimitry Andric /// 32860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 32870b57cec5SDimitry Andric /// 32880b57cec5SDimitry Andric /// \param __a 32890b57cec5SDimitry Andric /// A 128-bit integer vector. 32900b57cec5SDimitry Andric /// \param __b 32910b57cec5SDimitry Andric /// A 128-bit integer vector. 32920b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32940b57cec5SDimitry Andric _mm_cmplt_epi8(__m128i __a, __m128i __b) 32950b57cec5SDimitry Andric { 32960b57cec5SDimitry Andric return _mm_cmpgt_epi8(__b, __a); 32970b57cec5SDimitry Andric } 32980b57cec5SDimitry Andric 32990b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 33000b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 33010b57cec5SDimitry Andric /// are less than those in the second operand. 33020b57cec5SDimitry Andric /// 33030b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 33040b57cec5SDimitry Andric /// 33050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33060b57cec5SDimitry Andric /// 33070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 33080b57cec5SDimitry Andric /// 33090b57cec5SDimitry Andric /// \param __a 33100b57cec5SDimitry Andric /// A 128-bit integer vector. 33110b57cec5SDimitry Andric /// \param __b 33120b57cec5SDimitry Andric /// A 128-bit integer vector. 33130b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 33140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 33150b57cec5SDimitry Andric _mm_cmplt_epi16(__m128i __a, __m128i __b) 33160b57cec5SDimitry Andric { 33170b57cec5SDimitry Andric return _mm_cmpgt_epi16(__b, __a); 33180b57cec5SDimitry Andric } 33190b57cec5SDimitry Andric 33200b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 33210b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 33220b57cec5SDimitry Andric /// are less than those in the second operand. 33230b57cec5SDimitry Andric /// 33240b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 33250b57cec5SDimitry Andric /// 33260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33270b57cec5SDimitry Andric /// 33280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 33290b57cec5SDimitry Andric /// 33300b57cec5SDimitry Andric /// \param __a 33310b57cec5SDimitry Andric /// A 128-bit integer vector. 33320b57cec5SDimitry Andric /// \param __b 33330b57cec5SDimitry Andric /// A 128-bit integer vector. 33340b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 33350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 33360b57cec5SDimitry Andric _mm_cmplt_epi32(__m128i __a, __m128i __b) 33370b57cec5SDimitry Andric { 33380b57cec5SDimitry Andric return _mm_cmpgt_epi32(__b, __a); 33390b57cec5SDimitry Andric } 33400b57cec5SDimitry Andric 33410b57cec5SDimitry Andric #ifdef __x86_64__ 33420b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a 33430b57cec5SDimitry Andric /// double-precision value and returns it in the lower element of a [2 x 33440b57cec5SDimitry Andric /// double] vector; the upper element of the returned vector is copied from 33450b57cec5SDimitry Andric /// the upper element of the first operand. 33460b57cec5SDimitry Andric /// 33470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33480b57cec5SDimitry Andric /// 33490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 33500b57cec5SDimitry Andric /// 33510b57cec5SDimitry Andric /// \param __a 33520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are 33530b57cec5SDimitry Andric /// copied to the upper 64 bits of the destination. 33540b57cec5SDimitry Andric /// \param __b 33550b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 33560b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 33570b57cec5SDimitry Andric /// converted value of the second operand. The upper 64 bits are copied from 33580b57cec5SDimitry Andric /// the upper 64 bits of the first operand. 33590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 33600b57cec5SDimitry Andric _mm_cvtsi64_sd(__m128d __a, long long __b) 33610b57cec5SDimitry Andric { 33620b57cec5SDimitry Andric __a[0] = __b; 33630b57cec5SDimitry Andric return __a; 33640b57cec5SDimitry Andric } 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 33670b57cec5SDimitry Andric /// 64-bit signed integer value, according to the current rounding mode. 33680b57cec5SDimitry Andric /// 33690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33700b57cec5SDimitry Andric /// 33710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 33720b57cec5SDimitry Andric /// 33730b57cec5SDimitry Andric /// \param __a 33740b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 33750b57cec5SDimitry Andric /// conversion. 33760b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 33770b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 33780b57cec5SDimitry Andric _mm_cvtsd_si64(__m128d __a) 33790b57cec5SDimitry Andric { 33800b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si64((__v2df)__a); 33810b57cec5SDimitry Andric } 33820b57cec5SDimitry Andric 33830b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 33840b57cec5SDimitry Andric /// 64-bit signed integer value, truncating the result when it is inexact. 33850b57cec5SDimitry Andric /// 33860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33870b57cec5SDimitry Andric /// 33880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 33890b57cec5SDimitry Andric /// instruction. 33900b57cec5SDimitry Andric /// 33910b57cec5SDimitry Andric /// \param __a 33920b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 33930b57cec5SDimitry Andric /// conversion. 33940b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 33950b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 33960b57cec5SDimitry Andric _mm_cvttsd_si64(__m128d __a) 33970b57cec5SDimitry Andric { 33980b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si64((__v2df)__a); 33990b57cec5SDimitry Andric } 34000b57cec5SDimitry Andric #endif 34010b57cec5SDimitry Andric 34020b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float]. 34030b57cec5SDimitry Andric /// 34040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34050b57cec5SDimitry Andric /// 34060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. 34070b57cec5SDimitry Andric /// 34080b57cec5SDimitry Andric /// \param __a 34090b57cec5SDimitry Andric /// A 128-bit integer vector. 34100b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values. 34110b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 34120b57cec5SDimitry Andric _mm_cvtepi32_ps(__m128i __a) 34130b57cec5SDimitry Andric { 34140b57cec5SDimitry Andric return (__m128)__builtin_convertvector((__v4si)__a, __v4sf); 34150b57cec5SDimitry Andric } 34160b57cec5SDimitry Andric 34170b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32]. 34180b57cec5SDimitry Andric /// 34190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34200b57cec5SDimitry Andric /// 34210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. 34220b57cec5SDimitry Andric /// 34230b57cec5SDimitry Andric /// \param __a 34240b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 34250b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted 34260b57cec5SDimitry Andric /// values. 34270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34280b57cec5SDimitry Andric _mm_cvtps_epi32(__m128 __a) 34290b57cec5SDimitry Andric { 34300b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); 34310b57cec5SDimitry Andric } 34320b57cec5SDimitry Andric 34330b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32], 34340b57cec5SDimitry Andric /// truncating the result when it is inexact. 34350b57cec5SDimitry Andric /// 34360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34370b57cec5SDimitry Andric /// 34380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> 34390b57cec5SDimitry Andric /// instruction. 34400b57cec5SDimitry Andric /// 34410b57cec5SDimitry Andric /// \param __a 34420b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 34430b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values. 34440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34450b57cec5SDimitry Andric _mm_cvttps_epi32(__m128 __a) 34460b57cec5SDimitry Andric { 34470b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); 34480b57cec5SDimitry Andric } 34490b57cec5SDimitry Andric 34500b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input 34510b57cec5SDimitry Andric /// operand and the remaining elements are zero. 34520b57cec5SDimitry Andric /// 34530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34540b57cec5SDimitry Andric /// 34550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 34560b57cec5SDimitry Andric /// 34570b57cec5SDimitry Andric /// \param __a 34580b57cec5SDimitry Andric /// A 32-bit signed integer operand. 34590b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32]. 34600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34610b57cec5SDimitry Andric _mm_cvtsi32_si128(int __a) 34620b57cec5SDimitry Andric { 34630b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 }; 34640b57cec5SDimitry Andric } 34650b57cec5SDimitry Andric 34660b57cec5SDimitry Andric #ifdef __x86_64__ 34670b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input 34680b57cec5SDimitry Andric /// operand and the upper element is zero. 34690b57cec5SDimitry Andric /// 34700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34710b57cec5SDimitry Andric /// 34720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 34730b57cec5SDimitry Andric /// 34740b57cec5SDimitry Andric /// \param __a 34750b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 34760b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value. 34770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34780b57cec5SDimitry Andric _mm_cvtsi64_si128(long long __a) 34790b57cec5SDimitry Andric { 34800b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ __a, 0 }; 34810b57cec5SDimitry Andric } 34820b57cec5SDimitry Andric #endif 34830b57cec5SDimitry Andric 34840b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a 34850b57cec5SDimitry Andric /// 32-bit signed integer value. 34860b57cec5SDimitry Andric /// 34870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34880b57cec5SDimitry Andric /// 34890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 34900b57cec5SDimitry Andric /// 34910b57cec5SDimitry Andric /// \param __a 34920b57cec5SDimitry Andric /// A vector of [4 x i32]. The least significant 32 bits are moved to the 34930b57cec5SDimitry Andric /// destination. 34940b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 34950b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 34960b57cec5SDimitry Andric _mm_cvtsi128_si32(__m128i __a) 34970b57cec5SDimitry Andric { 34980b57cec5SDimitry Andric __v4si __b = (__v4si)__a; 34990b57cec5SDimitry Andric return __b[0]; 35000b57cec5SDimitry Andric } 35010b57cec5SDimitry Andric 35020b57cec5SDimitry Andric #ifdef __x86_64__ 35030b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a 35040b57cec5SDimitry Andric /// 64-bit signed integer value. 35050b57cec5SDimitry Andric /// 35060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35070b57cec5SDimitry Andric /// 35080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 35090b57cec5SDimitry Andric /// 35100b57cec5SDimitry Andric /// \param __a 35110b57cec5SDimitry Andric /// A vector of [2 x i64]. The least significant 64 bits are moved to the 35120b57cec5SDimitry Andric /// destination. 35130b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value. 35140b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 35150b57cec5SDimitry Andric _mm_cvtsi128_si64(__m128i __a) 35160b57cec5SDimitry Andric { 35170b57cec5SDimitry Andric return __a[0]; 35180b57cec5SDimitry Andric } 35190b57cec5SDimitry Andric #endif 35200b57cec5SDimitry Andric 35210b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location 35220b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 35230b57cec5SDimitry Andric /// 35240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35250b57cec5SDimitry Andric /// 35260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. 35270b57cec5SDimitry Andric /// 35280b57cec5SDimitry Andric /// \param __p 35290b57cec5SDimitry Andric /// An aligned pointer to a memory location containing integer values. 35300b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 35310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35320b57cec5SDimitry Andric _mm_load_si128(__m128i const *__p) 35330b57cec5SDimitry Andric { 35340b57cec5SDimitry Andric return *__p; 35350b57cec5SDimitry Andric } 35360b57cec5SDimitry Andric 35370b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location 35380b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 35390b57cec5SDimitry Andric /// 35400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35410b57cec5SDimitry Andric /// 35420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. 35430b57cec5SDimitry Andric /// 35440b57cec5SDimitry Andric /// \param __p 35450b57cec5SDimitry Andric /// A pointer to a memory location containing integer values. 35460b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 35470b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35480b57cec5SDimitry Andric _mm_loadu_si128(__m128i_u const *__p) 35490b57cec5SDimitry Andric { 35500b57cec5SDimitry Andric struct __loadu_si128 { 35510b57cec5SDimitry Andric __m128i_u __v; 35520b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3553480093f4SDimitry Andric return ((const struct __loadu_si128*)__p)->__v; 35540b57cec5SDimitry Andric } 35550b57cec5SDimitry Andric 35560b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from 35570b57cec5SDimitry Andric /// the lower element of the operand, and the upper element is zero. 35580b57cec5SDimitry Andric /// 35590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35600b57cec5SDimitry Andric /// 35610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 35620b57cec5SDimitry Andric /// 35630b57cec5SDimitry Andric /// \param __p 35640b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of 35650b57cec5SDimitry Andric /// the destination. 35660b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the 35670b57cec5SDimitry Andric /// moved value. The higher order bits are cleared. 35680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35690b57cec5SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p) 35700b57cec5SDimitry Andric { 35710b57cec5SDimitry Andric struct __mm_loadl_epi64_struct { 35720b57cec5SDimitry Andric long long __u; 35730b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3574480093f4SDimitry Andric return __extension__ (__m128i) { ((const struct __mm_loadl_epi64_struct*)__p)->__u, 0}; 35750b57cec5SDimitry Andric } 35760b57cec5SDimitry Andric 35770b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content. 35780b57cec5SDimitry Andric /// This could be used as an argument to another intrinsic function where the 35790b57cec5SDimitry Andric /// argument is required but the value is not actually used. 35800b57cec5SDimitry Andric /// 35810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35820b57cec5SDimitry Andric /// 35830b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 35840b57cec5SDimitry Andric /// 35850b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content. 35860b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35870b57cec5SDimitry Andric _mm_undefined_si128(void) 35880b57cec5SDimitry Andric { 35890b57cec5SDimitry Andric return (__m128i)__builtin_ia32_undef128(); 35900b57cec5SDimitry Andric } 35910b57cec5SDimitry Andric 35920b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 35930b57cec5SDimitry Andric /// the specified 64-bit integer values. 35940b57cec5SDimitry Andric /// 35950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35960b57cec5SDimitry Andric /// 35970b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35980b57cec5SDimitry Andric /// instruction. 35990b57cec5SDimitry Andric /// 36000b57cec5SDimitry Andric /// \param __q1 36010b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 36020b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36030b57cec5SDimitry Andric /// \param __q0 36040b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 36050b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36060b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 36070b57cec5SDimitry Andric /// provided in the operands. 36080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36090b57cec5SDimitry Andric _mm_set_epi64x(long long __q1, long long __q0) 36100b57cec5SDimitry Andric { 36110b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 36120b57cec5SDimitry Andric } 36130b57cec5SDimitry Andric 36140b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 36150b57cec5SDimitry Andric /// the specified 64-bit integer values. 36160b57cec5SDimitry Andric /// 36170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36180b57cec5SDimitry Andric /// 36190b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36200b57cec5SDimitry Andric /// instruction. 36210b57cec5SDimitry Andric /// 36220b57cec5SDimitry Andric /// \param __q1 36230b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 36240b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36250b57cec5SDimitry Andric /// \param __q0 36260b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 36270b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36280b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 36290b57cec5SDimitry Andric /// provided in the operands. 36300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36310b57cec5SDimitry Andric _mm_set_epi64(__m64 __q1, __m64 __q0) 36320b57cec5SDimitry Andric { 36330b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 36340b57cec5SDimitry Andric } 36350b57cec5SDimitry Andric 36360b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with 36370b57cec5SDimitry Andric /// the specified 32-bit integer values. 36380b57cec5SDimitry Andric /// 36390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36400b57cec5SDimitry Andric /// 36410b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36420b57cec5SDimitry Andric /// instruction. 36430b57cec5SDimitry Andric /// 36440b57cec5SDimitry Andric /// \param __i3 36450b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [127:96] of the 36460b57cec5SDimitry Andric /// destination vector. 36470b57cec5SDimitry Andric /// \param __i2 36480b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [95:64] of the destination 36490b57cec5SDimitry Andric /// vector. 36500b57cec5SDimitry Andric /// \param __i1 36510b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [63:32] of the destination 36520b57cec5SDimitry Andric /// vector. 36530b57cec5SDimitry Andric /// \param __i0 36540b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [31:0] of the destination 36550b57cec5SDimitry Andric /// vector. 36560b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values 36570b57cec5SDimitry Andric /// provided in the operands. 36580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36590b57cec5SDimitry Andric _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) 36600b57cec5SDimitry Andric { 36610b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3}; 36620b57cec5SDimitry Andric } 36630b57cec5SDimitry Andric 36640b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with 36650b57cec5SDimitry Andric /// the specified 16-bit integer values. 36660b57cec5SDimitry Andric /// 36670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36680b57cec5SDimitry Andric /// 36690b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36700b57cec5SDimitry Andric /// instruction. 36710b57cec5SDimitry Andric /// 36720b57cec5SDimitry Andric /// \param __w7 36730b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [127:112] of the 36740b57cec5SDimitry Andric /// destination vector. 36750b57cec5SDimitry Andric /// \param __w6 36760b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [111:96] of the 36770b57cec5SDimitry Andric /// destination vector. 36780b57cec5SDimitry Andric /// \param __w5 36790b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [95:80] of the destination 36800b57cec5SDimitry Andric /// vector. 36810b57cec5SDimitry Andric /// \param __w4 36820b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [79:64] of the destination 36830b57cec5SDimitry Andric /// vector. 36840b57cec5SDimitry Andric /// \param __w3 36850b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the destination 36860b57cec5SDimitry Andric /// vector. 36870b57cec5SDimitry Andric /// \param __w2 36880b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the destination 36890b57cec5SDimitry Andric /// vector. 36900b57cec5SDimitry Andric /// \param __w1 36910b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the destination 36920b57cec5SDimitry Andric /// vector. 36930b57cec5SDimitry Andric /// \param __w0 36940b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the destination 36950b57cec5SDimitry Andric /// vector. 36960b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values 36970b57cec5SDimitry Andric /// provided in the operands. 36980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36990b57cec5SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) 37000b57cec5SDimitry Andric { 37010b57cec5SDimitry Andric return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 }; 37020b57cec5SDimitry Andric } 37030b57cec5SDimitry Andric 37040b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with 37050b57cec5SDimitry Andric /// the specified 8-bit integer values. 37060b57cec5SDimitry Andric /// 37070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37080b57cec5SDimitry Andric /// 37090b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37100b57cec5SDimitry Andric /// instruction. 37110b57cec5SDimitry Andric /// 37120b57cec5SDimitry Andric /// \param __b15 37130b57cec5SDimitry Andric /// Initializes bits [127:120] of the destination vector. 37140b57cec5SDimitry Andric /// \param __b14 37150b57cec5SDimitry Andric /// Initializes bits [119:112] of the destination vector. 37160b57cec5SDimitry Andric /// \param __b13 37170b57cec5SDimitry Andric /// Initializes bits [111:104] of the destination vector. 37180b57cec5SDimitry Andric /// \param __b12 37190b57cec5SDimitry Andric /// Initializes bits [103:96] of the destination vector. 37200b57cec5SDimitry Andric /// \param __b11 37210b57cec5SDimitry Andric /// Initializes bits [95:88] of the destination vector. 37220b57cec5SDimitry Andric /// \param __b10 37230b57cec5SDimitry Andric /// Initializes bits [87:80] of the destination vector. 37240b57cec5SDimitry Andric /// \param __b9 37250b57cec5SDimitry Andric /// Initializes bits [79:72] of the destination vector. 37260b57cec5SDimitry Andric /// \param __b8 37270b57cec5SDimitry Andric /// Initializes bits [71:64] of the destination vector. 37280b57cec5SDimitry Andric /// \param __b7 37290b57cec5SDimitry Andric /// Initializes bits [63:56] of the destination vector. 37300b57cec5SDimitry Andric /// \param __b6 37310b57cec5SDimitry Andric /// Initializes bits [55:48] of the destination vector. 37320b57cec5SDimitry Andric /// \param __b5 37330b57cec5SDimitry Andric /// Initializes bits [47:40] of the destination vector. 37340b57cec5SDimitry Andric /// \param __b4 37350b57cec5SDimitry Andric /// Initializes bits [39:32] of the destination vector. 37360b57cec5SDimitry Andric /// \param __b3 37370b57cec5SDimitry Andric /// Initializes bits [31:24] of the destination vector. 37380b57cec5SDimitry Andric /// \param __b2 37390b57cec5SDimitry Andric /// Initializes bits [23:16] of the destination vector. 37400b57cec5SDimitry Andric /// \param __b1 37410b57cec5SDimitry Andric /// Initializes bits [15:8] of the destination vector. 37420b57cec5SDimitry Andric /// \param __b0 37430b57cec5SDimitry Andric /// Initializes bits [7:0] of the destination vector. 37440b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values 37450b57cec5SDimitry Andric /// provided in the operands. 37460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37470b57cec5SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) 37480b57cec5SDimitry Andric { 37490b57cec5SDimitry Andric return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 }; 37500b57cec5SDimitry Andric } 37510b57cec5SDimitry Andric 37520b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the 37530b57cec5SDimitry Andric /// specified 64-bit integer value. 37540b57cec5SDimitry Andric /// 37550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37560b57cec5SDimitry Andric /// 37570b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37580b57cec5SDimitry Andric /// instruction. 37590b57cec5SDimitry Andric /// 37600b57cec5SDimitry Andric /// \param __q 37610b57cec5SDimitry Andric /// Integer value used to initialize the elements of the destination integer 37620b57cec5SDimitry Andric /// vector. 37630b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both 37640b57cec5SDimitry Andric /// elements containing the value provided in the operand. 37650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37660b57cec5SDimitry Andric _mm_set1_epi64x(long long __q) 37670b57cec5SDimitry Andric { 37680b57cec5SDimitry Andric return _mm_set_epi64x(__q, __q); 37690b57cec5SDimitry Andric } 37700b57cec5SDimitry Andric 37710b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the 37720b57cec5SDimitry Andric /// specified 64-bit value. 37730b57cec5SDimitry Andric /// 37740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37750b57cec5SDimitry Andric /// 37760b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37770b57cec5SDimitry Andric /// instruction. 37780b57cec5SDimitry Andric /// 37790b57cec5SDimitry Andric /// \param __q 37800b57cec5SDimitry Andric /// A 64-bit value used to initialize the elements of the destination integer 37810b57cec5SDimitry Andric /// vector. 37820b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements 37830b57cec5SDimitry Andric /// containing the value provided in the operand. 37840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37850b57cec5SDimitry Andric _mm_set1_epi64(__m64 __q) 37860b57cec5SDimitry Andric { 37870b57cec5SDimitry Andric return _mm_set_epi64(__q, __q); 37880b57cec5SDimitry Andric } 37890b57cec5SDimitry Andric 37900b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the 37910b57cec5SDimitry Andric /// specified 32-bit value. 37920b57cec5SDimitry Andric /// 37930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37940b57cec5SDimitry Andric /// 37950b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37960b57cec5SDimitry Andric /// instruction. 37970b57cec5SDimitry Andric /// 37980b57cec5SDimitry Andric /// \param __i 37990b57cec5SDimitry Andric /// A 32-bit value used to initialize the elements of the destination integer 38000b57cec5SDimitry Andric /// vector. 38010b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements 38020b57cec5SDimitry Andric /// containing the value provided in the operand. 38030b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38040b57cec5SDimitry Andric _mm_set1_epi32(int __i) 38050b57cec5SDimitry Andric { 38060b57cec5SDimitry Andric return _mm_set_epi32(__i, __i, __i, __i); 38070b57cec5SDimitry Andric } 38080b57cec5SDimitry Andric 38090b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the 38100b57cec5SDimitry Andric /// specified 16-bit value. 38110b57cec5SDimitry Andric /// 38120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38130b57cec5SDimitry Andric /// 38140b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38150b57cec5SDimitry Andric /// instruction. 38160b57cec5SDimitry Andric /// 38170b57cec5SDimitry Andric /// \param __w 38180b57cec5SDimitry Andric /// A 16-bit value used to initialize the elements of the destination integer 38190b57cec5SDimitry Andric /// vector. 38200b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements 38210b57cec5SDimitry Andric /// containing the value provided in the operand. 38220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38230b57cec5SDimitry Andric _mm_set1_epi16(short __w) 38240b57cec5SDimitry Andric { 38250b57cec5SDimitry Andric return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); 38260b57cec5SDimitry Andric } 38270b57cec5SDimitry Andric 38280b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the 38290b57cec5SDimitry Andric /// specified 8-bit value. 38300b57cec5SDimitry Andric /// 38310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38320b57cec5SDimitry Andric /// 38330b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38340b57cec5SDimitry Andric /// instruction. 38350b57cec5SDimitry Andric /// 38360b57cec5SDimitry Andric /// \param __b 38370b57cec5SDimitry Andric /// An 8-bit value used to initialize the elements of the destination integer 38380b57cec5SDimitry Andric /// vector. 38390b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements 38400b57cec5SDimitry Andric /// containing the value provided in the operand. 38410b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38420b57cec5SDimitry Andric _mm_set1_epi8(char __b) 38430b57cec5SDimitry Andric { 38440b57cec5SDimitry Andric return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); 38450b57cec5SDimitry Andric } 38460b57cec5SDimitry Andric 38470b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38480b57cec5SDimitry Andric /// with the specified 64-bit integral values. 38490b57cec5SDimitry Andric /// 38500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38510b57cec5SDimitry Andric /// 38520b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 38530b57cec5SDimitry Andric /// 38540b57cec5SDimitry Andric /// \param __q0 38550b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the lower 64 bits of the 38560b57cec5SDimitry Andric /// result. 38570b57cec5SDimitry Andric /// \param __q1 38580b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the upper 64 bits of the 38590b57cec5SDimitry Andric /// result. 38600b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 38610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38620b57cec5SDimitry Andric _mm_setr_epi64(__m64 __q0, __m64 __q1) 38630b57cec5SDimitry Andric { 38640b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 38650b57cec5SDimitry Andric } 38660b57cec5SDimitry Andric 38670b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38680b57cec5SDimitry Andric /// with the specified 32-bit integral values. 38690b57cec5SDimitry Andric /// 38700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38710b57cec5SDimitry Andric /// 38720b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38730b57cec5SDimitry Andric /// instruction. 38740b57cec5SDimitry Andric /// 38750b57cec5SDimitry Andric /// \param __i0 38760b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [31:0] of the result. 38770b57cec5SDimitry Andric /// \param __i1 38780b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [63:32] of the result. 38790b57cec5SDimitry Andric /// \param __i2 38800b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [95:64] of the result. 38810b57cec5SDimitry Andric /// \param __i3 38820b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [127:96] of the result. 38830b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 38840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38850b57cec5SDimitry Andric _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) 38860b57cec5SDimitry Andric { 38870b57cec5SDimitry Andric return _mm_set_epi32(__i3, __i2, __i1, __i0); 38880b57cec5SDimitry Andric } 38890b57cec5SDimitry Andric 38900b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38910b57cec5SDimitry Andric /// with the specified 16-bit integral values. 38920b57cec5SDimitry Andric /// 38930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38940b57cec5SDimitry Andric /// 38950b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38960b57cec5SDimitry Andric /// instruction. 38970b57cec5SDimitry Andric /// 38980b57cec5SDimitry Andric /// \param __w0 38990b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [15:0] of the result. 39000b57cec5SDimitry Andric /// \param __w1 39010b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [31:16] of the result. 39020b57cec5SDimitry Andric /// \param __w2 39030b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [47:32] of the result. 39040b57cec5SDimitry Andric /// \param __w3 39050b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [63:48] of the result. 39060b57cec5SDimitry Andric /// \param __w4 39070b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [79:64] of the result. 39080b57cec5SDimitry Andric /// \param __w5 39090b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [95:80] of the result. 39100b57cec5SDimitry Andric /// \param __w6 39110b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [111:96] of the result. 39120b57cec5SDimitry Andric /// \param __w7 39130b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [127:112] of the result. 39140b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 39150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 39160b57cec5SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) 39170b57cec5SDimitry Andric { 39180b57cec5SDimitry Andric return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); 39190b57cec5SDimitry Andric } 39200b57cec5SDimitry Andric 39210b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 39220b57cec5SDimitry Andric /// with the specified 8-bit integral values. 39230b57cec5SDimitry Andric /// 39240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39250b57cec5SDimitry Andric /// 39260b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 39270b57cec5SDimitry Andric /// instruction. 39280b57cec5SDimitry Andric /// 39290b57cec5SDimitry Andric /// \param __b0 39300b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [7:0] of the result. 39310b57cec5SDimitry Andric /// \param __b1 39320b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [15:8] of the result. 39330b57cec5SDimitry Andric /// \param __b2 39340b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [23:16] of the result. 39350b57cec5SDimitry Andric /// \param __b3 39360b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [31:24] of the result. 39370b57cec5SDimitry Andric /// \param __b4 39380b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [39:32] of the result. 39390b57cec5SDimitry Andric /// \param __b5 39400b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [47:40] of the result. 39410b57cec5SDimitry Andric /// \param __b6 39420b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [55:48] of the result. 39430b57cec5SDimitry Andric /// \param __b7 39440b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [63:56] of the result. 39450b57cec5SDimitry Andric /// \param __b8 39460b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [71:64] of the result. 39470b57cec5SDimitry Andric /// \param __b9 39480b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [79:72] of the result. 39490b57cec5SDimitry Andric /// \param __b10 39500b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [87:80] of the result. 39510b57cec5SDimitry Andric /// \param __b11 39520b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [95:88] of the result. 39530b57cec5SDimitry Andric /// \param __b12 39540b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [103:96] of the result. 39550b57cec5SDimitry Andric /// \param __b13 39560b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [111:104] of the result. 39570b57cec5SDimitry Andric /// \param __b14 39580b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [119:112] of the result. 39590b57cec5SDimitry Andric /// \param __b15 39600b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [127:120] of the result. 39610b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 39620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 39630b57cec5SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) 39640b57cec5SDimitry Andric { 39650b57cec5SDimitry Andric return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 39660b57cec5SDimitry Andric } 39670b57cec5SDimitry Andric 39680b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero. 39690b57cec5SDimitry Andric /// 39700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39710b57cec5SDimitry Andric /// 39720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 39730b57cec5SDimitry Andric /// 39740b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to 39750b57cec5SDimitry Andric /// zero. 39760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 39770b57cec5SDimitry Andric _mm_setzero_si128(void) 39780b57cec5SDimitry Andric { 39790b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ 0LL, 0LL }; 39800b57cec5SDimitry Andric } 39810b57cec5SDimitry Andric 39820b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a 39830b57cec5SDimitry Andric /// 128-bit boundary. 39840b57cec5SDimitry Andric /// 39850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39860b57cec5SDimitry Andric /// 39870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. 39880b57cec5SDimitry Andric /// 39890b57cec5SDimitry Andric /// \param __p 39900b57cec5SDimitry Andric /// A pointer to an aligned memory location that will receive the integer 39910b57cec5SDimitry Andric /// values. 39920b57cec5SDimitry Andric /// \param __b 39930b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 39940b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 39950b57cec5SDimitry Andric _mm_store_si128(__m128i *__p, __m128i __b) 39960b57cec5SDimitry Andric { 39970b57cec5SDimitry Andric *__p = __b; 39980b57cec5SDimitry Andric } 39990b57cec5SDimitry Andric 40000b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location. 40010b57cec5SDimitry Andric /// 40020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40030b57cec5SDimitry Andric /// 40040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. 40050b57cec5SDimitry Andric /// 40060b57cec5SDimitry Andric /// \param __p 40070b57cec5SDimitry Andric /// A pointer to a memory location that will receive the integer values. 40080b57cec5SDimitry Andric /// \param __b 40090b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 40100b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40110b57cec5SDimitry Andric _mm_storeu_si128(__m128i_u *__p, __m128i __b) 40120b57cec5SDimitry Andric { 40130b57cec5SDimitry Andric struct __storeu_si128 { 40140b57cec5SDimitry Andric __m128i_u __v; 40150b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40160b57cec5SDimitry Andric ((struct __storeu_si128*)__p)->__v = __b; 40170b57cec5SDimitry Andric } 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer 40200b57cec5SDimitry Andric /// vector. 40210b57cec5SDimitry Andric /// 40220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40230b57cec5SDimitry Andric /// 40240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 40250b57cec5SDimitry Andric /// 40260b57cec5SDimitry Andric /// \param __p 40270b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 40280b57cec5SDimitry Andric /// location does not have to be algned. 40290b57cec5SDimitry Andric /// \param __b 40300b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 40310b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40320b57cec5SDimitry Andric _mm_storeu_si64(void *__p, __m128i __b) 40330b57cec5SDimitry Andric { 40340b57cec5SDimitry Andric struct __storeu_si64 { 40350b57cec5SDimitry Andric long long __v; 40360b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40370b57cec5SDimitry Andric ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0]; 40380b57cec5SDimitry Andric } 40390b57cec5SDimitry Andric 40400b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer 40410b57cec5SDimitry Andric /// vector. 40420b57cec5SDimitry Andric /// 40430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40440b57cec5SDimitry Andric /// 40450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 40460b57cec5SDimitry Andric /// 40470b57cec5SDimitry Andric /// \param __p 40480b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 40490b57cec5SDimitry Andric /// location does not have to be aligned. 40500b57cec5SDimitry Andric /// \param __b 40510b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 40520b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40530b57cec5SDimitry Andric _mm_storeu_si32(void *__p, __m128i __b) 40540b57cec5SDimitry Andric { 40550b57cec5SDimitry Andric struct __storeu_si32 { 40560b57cec5SDimitry Andric int __v; 40570b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40580b57cec5SDimitry Andric ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0]; 40590b57cec5SDimitry Andric } 40600b57cec5SDimitry Andric 40610b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer 40620b57cec5SDimitry Andric /// vector. 40630b57cec5SDimitry Andric /// 40640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40650b57cec5SDimitry Andric /// 40660b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 40670b57cec5SDimitry Andric /// 40680b57cec5SDimitry Andric /// \param __p 40690b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 40700b57cec5SDimitry Andric /// location does not have to be aligned. 40710b57cec5SDimitry Andric /// \param __b 40720b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 40730b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40740b57cec5SDimitry Andric _mm_storeu_si16(void *__p, __m128i __b) 40750b57cec5SDimitry Andric { 40760b57cec5SDimitry Andric struct __storeu_si16 { 40770b57cec5SDimitry Andric short __v; 40780b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40790b57cec5SDimitry Andric ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0]; 40800b57cec5SDimitry Andric } 40810b57cec5SDimitry Andric 40820b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the 40830b57cec5SDimitry Andric /// specified unaligned memory location. When a mask bit is 1, the 40840b57cec5SDimitry Andric /// corresponding byte is written, otherwise it is not written. 40850b57cec5SDimitry Andric /// 40860b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 40870b57cec5SDimitry Andric /// used again soon). Exception and trap behavior for elements not selected 40880b57cec5SDimitry Andric /// for storage to memory are implementation dependent. 40890b57cec5SDimitry Andric /// 40900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40910b57cec5SDimitry Andric /// 40920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> 40930b57cec5SDimitry Andric /// instruction. 40940b57cec5SDimitry Andric /// 40950b57cec5SDimitry Andric /// \param __d 40960b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 40970b57cec5SDimitry Andric /// \param __n 40980b57cec5SDimitry Andric /// A 128-bit integer vector containing the mask. The most significant bit of 40990b57cec5SDimitry Andric /// each byte represents the mask bits. 41000b57cec5SDimitry Andric /// \param __p 41010b57cec5SDimitry Andric /// A pointer to an unaligned 128-bit memory location where the specified 41020b57cec5SDimitry Andric /// values are moved. 41030b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41040b57cec5SDimitry Andric _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) 41050b57cec5SDimitry Andric { 41060b57cec5SDimitry Andric __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 41070b57cec5SDimitry Andric } 41080b57cec5SDimitry Andric 41090b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to 41100b57cec5SDimitry Andric /// a memory location. 41110b57cec5SDimitry Andric /// 41120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41130b57cec5SDimitry Andric /// 41140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. 41150b57cec5SDimitry Andric /// 41160b57cec5SDimitry Andric /// \param __p 41170b57cec5SDimitry Andric /// A pointer to a 64-bit memory location that will receive the lower 64 bits 41180b57cec5SDimitry Andric /// of the integer vector parameter. 41190b57cec5SDimitry Andric /// \param __a 41200b57cec5SDimitry Andric /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the 41210b57cec5SDimitry Andric /// value to be stored. 41220b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41230b57cec5SDimitry Andric _mm_storel_epi64(__m128i_u *__p, __m128i __a) 41240b57cec5SDimitry Andric { 41250b57cec5SDimitry Andric struct __mm_storel_epi64_struct { 41260b57cec5SDimitry Andric long long __u; 41270b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 41280b57cec5SDimitry Andric ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; 41290b57cec5SDimitry Andric } 41300b57cec5SDimitry Andric 41310b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit 41320b57cec5SDimitry Andric /// aligned memory location. 41330b57cec5SDimitry Andric /// 41340b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41350b57cec5SDimitry Andric /// used again soon). 41360b57cec5SDimitry Andric /// 41370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41380b57cec5SDimitry Andric /// 41390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 41400b57cec5SDimitry Andric /// 41410b57cec5SDimitry Andric /// \param __p 41420b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 41430b57cec5SDimitry Andric /// \param __a 41440b57cec5SDimitry Andric /// A vector of [2 x double] containing the 64-bit values to be stored. 41450b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41460b57cec5SDimitry Andric _mm_stream_pd(double *__p, __m128d __a) 41470b57cec5SDimitry Andric { 41480b57cec5SDimitry Andric __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p); 41490b57cec5SDimitry Andric } 41500b57cec5SDimitry Andric 41510b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location. 41520b57cec5SDimitry Andric /// 41530b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41540b57cec5SDimitry Andric /// used again soon). 41550b57cec5SDimitry Andric /// 41560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41570b57cec5SDimitry Andric /// 41580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 41590b57cec5SDimitry Andric /// 41600b57cec5SDimitry Andric /// \param __p 41610b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 41620b57cec5SDimitry Andric /// \param __a 41630b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be stored. 41640b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41650b57cec5SDimitry Andric _mm_stream_si128(__m128i *__p, __m128i __a) 41660b57cec5SDimitry Andric { 41670b57cec5SDimitry Andric __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p); 41680b57cec5SDimitry Andric } 41690b57cec5SDimitry Andric 41700b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location. 41710b57cec5SDimitry Andric /// 41720b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41730b57cec5SDimitry Andric /// used again soon). 41740b57cec5SDimitry Andric /// 41750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41760b57cec5SDimitry Andric /// 41770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. 41780b57cec5SDimitry Andric /// 41790b57cec5SDimitry Andric /// \param __p 41800b57cec5SDimitry Andric /// A pointer to the 32-bit memory location used to store the value. 41810b57cec5SDimitry Andric /// \param __a 41820b57cec5SDimitry Andric /// A 32-bit integer containing the value to be stored. 41830b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 41840b57cec5SDimitry Andric _mm_stream_si32(int *__p, int __a) 41850b57cec5SDimitry Andric { 41860b57cec5SDimitry Andric __builtin_ia32_movnti(__p, __a); 41870b57cec5SDimitry Andric } 41880b57cec5SDimitry Andric 41890b57cec5SDimitry Andric #ifdef __x86_64__ 41900b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location. 41910b57cec5SDimitry Andric /// 41920b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41930b57cec5SDimitry Andric /// used again soon). 41940b57cec5SDimitry Andric /// 41950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41960b57cec5SDimitry Andric /// 41970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. 41980b57cec5SDimitry Andric /// 41990b57cec5SDimitry Andric /// \param __p 42000b57cec5SDimitry Andric /// A pointer to the 64-bit memory location used to store the value. 42010b57cec5SDimitry Andric /// \param __a 42020b57cec5SDimitry Andric /// A 64-bit integer containing the value to be stored. 42030b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 42040b57cec5SDimitry Andric _mm_stream_si64(long long *__p, long long __a) 42050b57cec5SDimitry Andric { 42060b57cec5SDimitry Andric __builtin_ia32_movnti64(__p, __a); 42070b57cec5SDimitry Andric } 42080b57cec5SDimitry Andric #endif 42090b57cec5SDimitry Andric 42100b57cec5SDimitry Andric #if defined(__cplusplus) 42110b57cec5SDimitry Andric extern "C" { 42120b57cec5SDimitry Andric #endif 42130b57cec5SDimitry Andric 42140b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all 42150b57cec5SDimitry Andric /// caches in the coherency domain. 42160b57cec5SDimitry Andric /// 42170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42180b57cec5SDimitry Andric /// 42190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. 42200b57cec5SDimitry Andric /// 42210b57cec5SDimitry Andric /// \param __p 42220b57cec5SDimitry Andric /// A pointer to the memory location used to identify the cache line to be 42230b57cec5SDimitry Andric /// flushed. 42240b57cec5SDimitry Andric void _mm_clflush(void const * __p); 42250b57cec5SDimitry Andric 42260b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load 42270b57cec5SDimitry Andric /// instructions preceding this instruction and load instructions following 42280b57cec5SDimitry Andric /// this instruction, ensuring the system completes all previous loads before 42290b57cec5SDimitry Andric /// executing subsequent loads. 42300b57cec5SDimitry Andric /// 42310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42320b57cec5SDimitry Andric /// 42330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction. 42340b57cec5SDimitry Andric /// 42350b57cec5SDimitry Andric void _mm_lfence(void); 42360b57cec5SDimitry Andric 42370b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store 42380b57cec5SDimitry Andric /// instructions preceding this instruction and load and store instructions 42390b57cec5SDimitry Andric /// following this instruction, ensuring that the system completes all 42400b57cec5SDimitry Andric /// previous memory accesses before executing subsequent memory accesses. 42410b57cec5SDimitry Andric /// 42420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42430b57cec5SDimitry Andric /// 42440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction. 42450b57cec5SDimitry Andric /// 42460b57cec5SDimitry Andric void _mm_mfence(void); 42470b57cec5SDimitry Andric 42480b57cec5SDimitry Andric #if defined(__cplusplus) 42490b57cec5SDimitry Andric } // extern "C" 42500b57cec5SDimitry Andric #endif 42510b57cec5SDimitry Andric 42520b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 42530b57cec5SDimitry Andric /// operands into 8-bit signed integers, and packs the results into the 42540b57cec5SDimitry Andric /// destination. Positive values greater than 0x7F are saturated to 0x7F. 42550b57cec5SDimitry Andric /// Negative values less than 0x80 are saturated to 0x80. 42560b57cec5SDimitry Andric /// 42570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42580b57cec5SDimitry Andric /// 42590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. 42600b57cec5SDimitry Andric /// 42610b57cec5SDimitry Andric /// \param __a 42620b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 42630b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 42640b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 42650b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 42660b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 42670b57cec5SDimitry Andric /// \param __b 42680b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 42690b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 42700b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 42710b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 42720b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 42730b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 42740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 42750b57cec5SDimitry Andric _mm_packs_epi16(__m128i __a, __m128i __b) 42760b57cec5SDimitry Andric { 42770b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 42780b57cec5SDimitry Andric } 42790b57cec5SDimitry Andric 42800b57cec5SDimitry Andric /// Converts 32-bit signed integers from both 128-bit integer vector 42810b57cec5SDimitry Andric /// operands into 16-bit signed integers, and packs the results into the 42820b57cec5SDimitry Andric /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. 42830b57cec5SDimitry Andric /// Negative values less than 0x8000 are saturated to 0x8000. 42840b57cec5SDimitry Andric /// 42850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42860b57cec5SDimitry Andric /// 42870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. 42880b57cec5SDimitry Andric /// 42890b57cec5SDimitry Andric /// \param __a 42900b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 42910b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 42920b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 42930b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 42940b57cec5SDimitry Andric /// are written to the lower 64 bits of the result. 42950b57cec5SDimitry Andric /// \param __b 42960b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 42970b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 42980b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 42990b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 43000b57cec5SDimitry Andric /// are written to the higher 64 bits of the result. 43010b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values. 43020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 43030b57cec5SDimitry Andric _mm_packs_epi32(__m128i __a, __m128i __b) 43040b57cec5SDimitry Andric { 43050b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 43060b57cec5SDimitry Andric } 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 43090b57cec5SDimitry Andric /// operands into 8-bit unsigned integers, and packs the results into the 43100b57cec5SDimitry Andric /// destination. Values greater than 0xFF are saturated to 0xFF. Values less 43110b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. 43120b57cec5SDimitry Andric /// 43130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43140b57cec5SDimitry Andric /// 43150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. 43160b57cec5SDimitry Andric /// 43170b57cec5SDimitry Andric /// \param __a 43180b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 43190b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 43200b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 43210b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 43220b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 43230b57cec5SDimitry Andric /// \param __b 43240b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 43250b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 43260b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 43270b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 43280b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 43290b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 43300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 43310b57cec5SDimitry Andric _mm_packus_epi16(__m128i __a, __m128i __b) 43320b57cec5SDimitry Andric { 43330b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 43340b57cec5SDimitry Andric } 43350b57cec5SDimitry Andric 43360b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using 43370b57cec5SDimitry Andric /// the immediate-value parameter as a selector. 43380b57cec5SDimitry Andric /// 43390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43400b57cec5SDimitry Andric /// 43410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. 43420b57cec5SDimitry Andric /// 43430b57cec5SDimitry Andric /// \param __a 43440b57cec5SDimitry Andric /// A 128-bit integer vector. 43450b57cec5SDimitry Andric /// \param __imm 43460b57cec5SDimitry Andric /// An immediate value. Bits [2:0] selects values from \a __a to be assigned 43470b57cec5SDimitry Andric /// to bits[15:0] of the result. \n 43480b57cec5SDimitry Andric /// 000: assign values from bits [15:0] of \a __a. \n 43490b57cec5SDimitry Andric /// 001: assign values from bits [31:16] of \a __a. \n 43500b57cec5SDimitry Andric /// 010: assign values from bits [47:32] of \a __a. \n 43510b57cec5SDimitry Andric /// 011: assign values from bits [63:48] of \a __a. \n 43520b57cec5SDimitry Andric /// 100: assign values from bits [79:64] of \a __a. \n 43530b57cec5SDimitry Andric /// 101: assign values from bits [95:80] of \a __a. \n 43540b57cec5SDimitry Andric /// 110: assign values from bits [111:96] of \a __a. \n 43550b57cec5SDimitry Andric /// 111: assign values from bits [127:112] of \a __a. 43560b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit 43570b57cec5SDimitry Andric /// integer vector parameter and the remaining bits are assigned zeros. 43580b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \ 43590b57cec5SDimitry Andric (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ 43600b57cec5SDimitry Andric (int)(imm)) 43610b57cec5SDimitry Andric 43620b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the 43630b57cec5SDimitry Andric /// 128-bit integer vector parameter, and then inserting the lower 16 bits 43640b57cec5SDimitry Andric /// of an integer parameter into an offset specified by the immediate-value 43650b57cec5SDimitry Andric /// parameter. 43660b57cec5SDimitry Andric /// 43670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43680b57cec5SDimitry Andric /// 43690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. 43700b57cec5SDimitry Andric /// 43710b57cec5SDimitry Andric /// \param __a 43720b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. This vector is copied to the 43730b57cec5SDimitry Andric /// result and then one of the eight elements in the result is replaced by 43740b57cec5SDimitry Andric /// the lower 16 bits of \a __b. 43750b57cec5SDimitry Andric /// \param __b 43760b57cec5SDimitry Andric /// An integer. The lower 16 bits of this parameter are written to the 43770b57cec5SDimitry Andric /// result beginning at an offset specified by \a __imm. 43780b57cec5SDimitry Andric /// \param __imm 43790b57cec5SDimitry Andric /// An immediate value specifying the bit offset in the result at which the 43800b57cec5SDimitry Andric /// lower 16 bits of \a __b are written. 43810b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values. 43820b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \ 43830b57cec5SDimitry Andric (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ 43840b57cec5SDimitry Andric (int)(imm)) 43850b57cec5SDimitry Andric 43860b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit 43870b57cec5SDimitry Andric /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask 43880b57cec5SDimitry Andric /// value, zero-extends the value, and writes it to the destination. 43890b57cec5SDimitry Andric /// 43900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43910b57cec5SDimitry Andric /// 43920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. 43930b57cec5SDimitry Andric /// 43940b57cec5SDimitry Andric /// \param __a 43950b57cec5SDimitry Andric /// A 128-bit integer vector containing the values with bits to be extracted. 43960b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a, 43970b57cec5SDimitry Andric /// written to bits [15:0]. The other bits are assigned zeros. 43980b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 43990b57cec5SDimitry Andric _mm_movemask_epi8(__m128i __a) 44000b57cec5SDimitry Andric { 44010b57cec5SDimitry Andric return __builtin_ia32_pmovmskb128((__v16qi)__a); 44020b57cec5SDimitry Andric } 44030b57cec5SDimitry Andric 44040b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit 44050b57cec5SDimitry Andric /// elements of a 128-bit integer vector parameter, using the immediate-value 44060b57cec5SDimitry Andric /// parameter as a specifier. 44070b57cec5SDimitry Andric /// 44080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44090b57cec5SDimitry Andric /// 44100b57cec5SDimitry Andric /// \code 44110b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); 44120b57cec5SDimitry Andric /// \endcode 44130b57cec5SDimitry Andric /// 44140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 44150b57cec5SDimitry Andric /// 44160b57cec5SDimitry Andric /// \param a 44170b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 44180b57cec5SDimitry Andric /// \param imm 44190b57cec5SDimitry Andric /// An immediate value containing an 8-bit value specifying which elements to 44200b57cec5SDimitry Andric /// copy from a. The destinations within the 128-bit destination are assigned 44210b57cec5SDimitry Andric /// values as follows: \n 44220b57cec5SDimitry Andric /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n 44230b57cec5SDimitry Andric /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n 44240b57cec5SDimitry Andric /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n 44250b57cec5SDimitry Andric /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n 44260b57cec5SDimitry Andric /// Bit value assignments: \n 44270b57cec5SDimitry Andric /// 00: assign values from bits [31:0] of \a a. \n 44280b57cec5SDimitry Andric /// 01: assign values from bits [63:32] of \a a. \n 44290b57cec5SDimitry Andric /// 10: assign values from bits [95:64] of \a a. \n 44300b57cec5SDimitry Andric /// 11: assign values from bits [127:96] of \a a. 44310b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 44320b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \ 44330b57cec5SDimitry Andric (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)) 44340b57cec5SDimitry Andric 44350b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit 44360b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 44370b57cec5SDimitry Andric /// value parameter as a specifier. 44380b57cec5SDimitry Andric /// 44390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44400b57cec5SDimitry Andric /// 44410b57cec5SDimitry Andric /// \code 44420b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); 44430b57cec5SDimitry Andric /// \endcode 44440b57cec5SDimitry Andric /// 44450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 44460b57cec5SDimitry Andric /// 44470b57cec5SDimitry Andric /// \param a 44480b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits 44490b57cec5SDimitry Andric /// [127:64] of the result. 44500b57cec5SDimitry Andric /// \param imm 44510b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 44520b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n 44530b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n 44540b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n 44550b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n 44560b57cec5SDimitry Andric /// Bit value assignments: \n 44570b57cec5SDimitry Andric /// 00: assign values from bits [15:0] of \a a. \n 44580b57cec5SDimitry Andric /// 01: assign values from bits [31:16] of \a a. \n 44590b57cec5SDimitry Andric /// 10: assign values from bits [47:32] of \a a. \n 44600b57cec5SDimitry Andric /// 11: assign values from bits [63:48] of \a a. \n 44610b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 44620b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \ 44630b57cec5SDimitry Andric (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)) 44640b57cec5SDimitry Andric 44650b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit 44660b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 44670b57cec5SDimitry Andric /// value parameter as a specifier. 44680b57cec5SDimitry Andric /// 44690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44700b57cec5SDimitry Andric /// 44710b57cec5SDimitry Andric /// \code 44720b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); 44730b57cec5SDimitry Andric /// \endcode 44740b57cec5SDimitry Andric /// 44750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. 44760b57cec5SDimitry Andric /// 44770b57cec5SDimitry Andric /// \param a 44780b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits 44790b57cec5SDimitry Andric /// [63:0] of the result. 44800b57cec5SDimitry Andric /// \param imm 44810b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 44820b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n 44830b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n 44840b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n 44850b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n 44860b57cec5SDimitry Andric /// Bit value assignments: \n 44870b57cec5SDimitry Andric /// 00: assign values from bits [79:64] of \a a. \n 44880b57cec5SDimitry Andric /// 01: assign values from bits [95:80] of \a a. \n 44890b57cec5SDimitry Andric /// 10: assign values from bits [111:96] of \a a. \n 44900b57cec5SDimitry Andric /// 11: assign values from bits [127:112] of \a a. \n 44910b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 44920b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \ 44930b57cec5SDimitry Andric (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)) 44940b57cec5SDimitry Andric 44950b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors 44960b57cec5SDimitry Andric /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 44970b57cec5SDimitry Andric /// 44980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44990b57cec5SDimitry Andric /// 45000b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> 45010b57cec5SDimitry Andric /// instruction. 45020b57cec5SDimitry Andric /// 45030b57cec5SDimitry Andric /// \param __a 45040b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 45050b57cec5SDimitry Andric /// Bits [71:64] are written to bits [7:0] of the result. \n 45060b57cec5SDimitry Andric /// Bits [79:72] are written to bits [23:16] of the result. \n 45070b57cec5SDimitry Andric /// Bits [87:80] are written to bits [39:32] of the result. \n 45080b57cec5SDimitry Andric /// Bits [95:88] are written to bits [55:48] of the result. \n 45090b57cec5SDimitry Andric /// Bits [103:96] are written to bits [71:64] of the result. \n 45100b57cec5SDimitry Andric /// Bits [111:104] are written to bits [87:80] of the result. \n 45110b57cec5SDimitry Andric /// Bits [119:112] are written to bits [103:96] of the result. \n 45120b57cec5SDimitry Andric /// Bits [127:120] are written to bits [119:112] of the result. 45130b57cec5SDimitry Andric /// \param __b 45140b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 45150b57cec5SDimitry Andric /// Bits [71:64] are written to bits [15:8] of the result. \n 45160b57cec5SDimitry Andric /// Bits [79:72] are written to bits [31:24] of the result. \n 45170b57cec5SDimitry Andric /// Bits [87:80] are written to bits [47:40] of the result. \n 45180b57cec5SDimitry Andric /// Bits [95:88] are written to bits [63:56] of the result. \n 45190b57cec5SDimitry Andric /// Bits [103:96] are written to bits [79:72] of the result. \n 45200b57cec5SDimitry Andric /// Bits [111:104] are written to bits [95:88] of the result. \n 45210b57cec5SDimitry Andric /// Bits [119:112] are written to bits [111:104] of the result. \n 45220b57cec5SDimitry Andric /// Bits [127:120] are written to bits [127:120] of the result. 45230b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 45240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45250b57cec5SDimitry Andric _mm_unpackhi_epi8(__m128i __a, __m128i __b) 45260b57cec5SDimitry Andric { 45270b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 45280b57cec5SDimitry Andric } 45290b57cec5SDimitry Andric 45300b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of 45310b57cec5SDimitry Andric /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. 45320b57cec5SDimitry Andric /// 45330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45340b57cec5SDimitry Andric /// 45350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> 45360b57cec5SDimitry Andric /// instruction. 45370b57cec5SDimitry Andric /// 45380b57cec5SDimitry Andric /// \param __a 45390b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 45400b57cec5SDimitry Andric /// Bits [79:64] are written to bits [15:0] of the result. \n 45410b57cec5SDimitry Andric /// Bits [95:80] are written to bits [47:32] of the result. \n 45420b57cec5SDimitry Andric /// Bits [111:96] are written to bits [79:64] of the result. \n 45430b57cec5SDimitry Andric /// Bits [127:112] are written to bits [111:96] of the result. 45440b57cec5SDimitry Andric /// \param __b 45450b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 45460b57cec5SDimitry Andric /// Bits [79:64] are written to bits [31:16] of the result. \n 45470b57cec5SDimitry Andric /// Bits [95:80] are written to bits [63:48] of the result. \n 45480b57cec5SDimitry Andric /// Bits [111:96] are written to bits [95:80] of the result. \n 45490b57cec5SDimitry Andric /// Bits [127:112] are written to bits [127:112] of the result. 45500b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 45510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45520b57cec5SDimitry Andric _mm_unpackhi_epi16(__m128i __a, __m128i __b) 45530b57cec5SDimitry Andric { 45540b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); 45550b57cec5SDimitry Andric } 45560b57cec5SDimitry Andric 45570b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of 45580b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 45590b57cec5SDimitry Andric /// 45600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45610b57cec5SDimitry Andric /// 45620b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> 45630b57cec5SDimitry Andric /// instruction. 45640b57cec5SDimitry Andric /// 45650b57cec5SDimitry Andric /// \param __a 45660b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 45670b57cec5SDimitry Andric /// Bits [95:64] are written to bits [31:0] of the destination. \n 45680b57cec5SDimitry Andric /// Bits [127:96] are written to bits [95:64] of the destination. 45690b57cec5SDimitry Andric /// \param __b 45700b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 45710b57cec5SDimitry Andric /// Bits [95:64] are written to bits [64:32] of the destination. \n 45720b57cec5SDimitry Andric /// Bits [127:96] are written to bits [127:96] of the destination. 45730b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 45740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45750b57cec5SDimitry Andric _mm_unpackhi_epi32(__m128i __a, __m128i __b) 45760b57cec5SDimitry Andric { 45770b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); 45780b57cec5SDimitry Andric } 45790b57cec5SDimitry Andric 45800b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 45810b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 45820b57cec5SDimitry Andric /// 45830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45840b57cec5SDimitry Andric /// 45850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> 45860b57cec5SDimitry Andric /// instruction. 45870b57cec5SDimitry Andric /// 45880b57cec5SDimitry Andric /// \param __a 45890b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45900b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 45910b57cec5SDimitry Andric /// \param __b 45920b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45930b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 45940b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 45950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45960b57cec5SDimitry Andric _mm_unpackhi_epi64(__m128i __a, __m128i __b) 45970b57cec5SDimitry Andric { 45980b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1); 45990b57cec5SDimitry Andric } 46000b57cec5SDimitry Andric 46010b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of 46020b57cec5SDimitry Andric /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 46030b57cec5SDimitry Andric /// 46040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46050b57cec5SDimitry Andric /// 46060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> 46070b57cec5SDimitry Andric /// instruction. 46080b57cec5SDimitry Andric /// 46090b57cec5SDimitry Andric /// \param __a 46100b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 46110b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 46120b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 46130b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 46140b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. \n 46150b57cec5SDimitry Andric /// Bits [39:32] are written to bits [71:64] of the result. \n 46160b57cec5SDimitry Andric /// Bits [47:40] are written to bits [87:80] of the result. \n 46170b57cec5SDimitry Andric /// Bits [55:48] are written to bits [103:96] of the result. \n 46180b57cec5SDimitry Andric /// Bits [63:56] are written to bits [119:112] of the result. 46190b57cec5SDimitry Andric /// \param __b 46200b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 46210b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 46220b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 46230b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 46240b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. \n 46250b57cec5SDimitry Andric /// Bits [39:32] are written to bits [79:72] of the result. \n 46260b57cec5SDimitry Andric /// Bits [47:40] are written to bits [95:88] of the result. \n 46270b57cec5SDimitry Andric /// Bits [55:48] are written to bits [111:104] of the result. \n 46280b57cec5SDimitry Andric /// Bits [63:56] are written to bits [127:120] of the result. 46290b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 46300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46310b57cec5SDimitry Andric _mm_unpacklo_epi8(__m128i __a, __m128i __b) 46320b57cec5SDimitry Andric { 46330b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); 46340b57cec5SDimitry Andric } 46350b57cec5SDimitry Andric 46360b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit 46370b57cec5SDimitry Andric /// vectors of [8 x i16] and interleaves them into a 128-bit vector of 46380b57cec5SDimitry Andric /// [8 x i16]. 46390b57cec5SDimitry Andric /// 46400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46410b57cec5SDimitry Andric /// 46420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> 46430b57cec5SDimitry Andric /// instruction. 46440b57cec5SDimitry Andric /// 46450b57cec5SDimitry Andric /// \param __a 46460b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 46470b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 46480b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. \n 46490b57cec5SDimitry Andric /// Bits [47:32] are written to bits [79:64] of the result. \n 46500b57cec5SDimitry Andric /// Bits [63:48] are written to bits [111:96] of the result. 46510b57cec5SDimitry Andric /// \param __b 46520b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 46530b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 46540b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. \n 46550b57cec5SDimitry Andric /// Bits [47:32] are written to bits [95:80] of the result. \n 46560b57cec5SDimitry Andric /// Bits [63:48] are written to bits [127:112] of the result. 46570b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 46580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46590b57cec5SDimitry Andric _mm_unpacklo_epi16(__m128i __a, __m128i __b) 46600b57cec5SDimitry Andric { 46610b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); 46620b57cec5SDimitry Andric } 46630b57cec5SDimitry Andric 46640b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of 46650b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 46660b57cec5SDimitry Andric /// 46670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46680b57cec5SDimitry Andric /// 46690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> 46700b57cec5SDimitry Andric /// instruction. 46710b57cec5SDimitry Andric /// 46720b57cec5SDimitry Andric /// \param __a 46730b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 46740b57cec5SDimitry Andric /// Bits [31:0] are written to bits [31:0] of the destination. \n 46750b57cec5SDimitry Andric /// Bits [63:32] are written to bits [95:64] of the destination. 46760b57cec5SDimitry Andric /// \param __b 46770b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 46780b57cec5SDimitry Andric /// Bits [31:0] are written to bits [64:32] of the destination. \n 46790b57cec5SDimitry Andric /// Bits [63:32] are written to bits [127:96] of the destination. 46800b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 46810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46820b57cec5SDimitry Andric _mm_unpacklo_epi32(__m128i __a, __m128i __b) 46830b57cec5SDimitry Andric { 46840b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); 46850b57cec5SDimitry Andric } 46860b57cec5SDimitry Andric 46870b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of 46880b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 46890b57cec5SDimitry Andric /// 46900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46910b57cec5SDimitry Andric /// 46920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> 46930b57cec5SDimitry Andric /// instruction. 46940b57cec5SDimitry Andric /// 46950b57cec5SDimitry Andric /// \param __a 46960b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 46970b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. \n 46980b57cec5SDimitry Andric /// \param __b 46990b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 47000b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. \n 47010b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 47020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 47030b57cec5SDimitry Andric _mm_unpacklo_epi64(__m128i __a, __m128i __b) 47040b57cec5SDimitry Andric { 47050b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0); 47060b57cec5SDimitry Andric } 47070b57cec5SDimitry Andric 47080b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit 47090b57cec5SDimitry Andric /// integer. 47100b57cec5SDimitry Andric /// 47110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47120b57cec5SDimitry Andric /// 47130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. 47140b57cec5SDimitry Andric /// 47150b57cec5SDimitry Andric /// \param __a 47160b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 47170b57cec5SDimitry Andric /// destination. 47180b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter. 47190b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 47200b57cec5SDimitry Andric _mm_movepi64_pi64(__m128i __a) 47210b57cec5SDimitry Andric { 47220b57cec5SDimitry Andric return (__m64)__a[0]; 47230b57cec5SDimitry Andric } 47240b57cec5SDimitry Andric 47250b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the 47260b57cec5SDimitry Andric /// upper bits. 47270b57cec5SDimitry Andric /// 47280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47290b57cec5SDimitry Andric /// 47300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. 47310b57cec5SDimitry Andric /// 47320b57cec5SDimitry Andric /// \param __a 47330b57cec5SDimitry Andric /// A 64-bit value. 47340b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 47350b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 47360b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 47370b57cec5SDimitry Andric _mm_movpi64_epi64(__m64 __a) 47380b57cec5SDimitry Andric { 47390b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ (long long)__a, 0 }; 47400b57cec5SDimitry Andric } 47410b57cec5SDimitry Andric 47420b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit 47430b57cec5SDimitry Andric /// integer vector, zeroing the upper bits. 47440b57cec5SDimitry Andric /// 47450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47460b57cec5SDimitry Andric /// 47470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 47480b57cec5SDimitry Andric /// 47490b57cec5SDimitry Andric /// \param __a 47500b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 47510b57cec5SDimitry Andric /// destination. 47520b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 47530b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 47540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 47550b57cec5SDimitry Andric _mm_move_epi64(__m128i __a) 47560b57cec5SDimitry Andric { 47570b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); 47580b57cec5SDimitry Andric } 47590b57cec5SDimitry Andric 47600b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 47610b57cec5SDimitry Andric /// [2 x double] and interleaves them into a 128-bit vector of [2 x 47620b57cec5SDimitry Andric /// double]. 47630b57cec5SDimitry Andric /// 47640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47650b57cec5SDimitry Andric /// 47660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. 47670b57cec5SDimitry Andric /// 47680b57cec5SDimitry Andric /// \param __a 47690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47700b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 47710b57cec5SDimitry Andric /// \param __b 47720b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47730b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 47740b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 47750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 47760b57cec5SDimitry Andric _mm_unpackhi_pd(__m128d __a, __m128d __b) 47770b57cec5SDimitry Andric { 47780b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1); 47790b57cec5SDimitry Andric } 47800b57cec5SDimitry Andric 47810b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors 47820b57cec5SDimitry Andric /// of [2 x double] and interleaves them into a 128-bit vector of [2 x 47830b57cec5SDimitry Andric /// double]. 47840b57cec5SDimitry Andric /// 47850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47860b57cec5SDimitry Andric /// 47870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 47880b57cec5SDimitry Andric /// 47890b57cec5SDimitry Andric /// \param __a 47900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47910b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. 47920b57cec5SDimitry Andric /// \param __b 47930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47940b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. 47950b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 47960b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 47970b57cec5SDimitry Andric _mm_unpacklo_pd(__m128d __a, __m128d __b) 47980b57cec5SDimitry Andric { 47990b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0); 48000b57cec5SDimitry Andric } 48010b57cec5SDimitry Andric 48020b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit 48030b57cec5SDimitry Andric /// vector of [2 x double], zero-extends the value, and writes it to the 48040b57cec5SDimitry Andric /// low-order bits of the destination. 48050b57cec5SDimitry Andric /// 48060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48070b57cec5SDimitry Andric /// 48080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. 48090b57cec5SDimitry Andric /// 48100b57cec5SDimitry Andric /// \param __a 48110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values with sign bits to 48120b57cec5SDimitry Andric /// be extracted. 48130b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a, 48140b57cec5SDimitry Andric /// written to bits [1:0]. The remaining bits are assigned values of zero. 48150b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 48160b57cec5SDimitry Andric _mm_movemask_pd(__m128d __a) 48170b57cec5SDimitry Andric { 48180b57cec5SDimitry Andric return __builtin_ia32_movmskpd((__v2df)__a); 48190b57cec5SDimitry Andric } 48200b57cec5SDimitry Andric 48210b57cec5SDimitry Andric 48220b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two 48230b57cec5SDimitry Andric /// 128-bit vector parameters of [2 x double], using the immediate-value 48240b57cec5SDimitry Andric /// parameter as a specifier. 48250b57cec5SDimitry Andric /// 48260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48270b57cec5SDimitry Andric /// 48280b57cec5SDimitry Andric /// \code 48290b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); 48300b57cec5SDimitry Andric /// \endcode 48310b57cec5SDimitry Andric /// 48320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. 48330b57cec5SDimitry Andric /// 48340b57cec5SDimitry Andric /// \param a 48350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 48360b57cec5SDimitry Andric /// \param b 48370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 48380b57cec5SDimitry Andric /// \param i 48390b57cec5SDimitry Andric /// An 8-bit immediate value. The least significant two bits specify which 48400b57cec5SDimitry Andric /// elements to copy from \a a and \a b: \n 48410b57cec5SDimitry Andric /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n 48420b57cec5SDimitry Andric /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n 48430b57cec5SDimitry Andric /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n 48440b57cec5SDimitry Andric /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n 48450b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values. 48460b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \ 48470b57cec5SDimitry Andric (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 48480b57cec5SDimitry Andric (int)(i)) 48490b57cec5SDimitry Andric 48500b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 48510b57cec5SDimitry Andric /// floating-point vector of [4 x float]. 48520b57cec5SDimitry Andric /// 48530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48540b57cec5SDimitry Andric /// 48550b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 48560b57cec5SDimitry Andric /// 48570b57cec5SDimitry Andric /// \param __a 48580b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 48590b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 48600b57cec5SDimitry Andric /// bitwise pattern as the parameter. 48610b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 48620b57cec5SDimitry Andric _mm_castpd_ps(__m128d __a) 48630b57cec5SDimitry Andric { 48640b57cec5SDimitry Andric return (__m128)__a; 48650b57cec5SDimitry Andric } 48660b57cec5SDimitry Andric 48670b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 48680b57cec5SDimitry Andric /// integer vector. 48690b57cec5SDimitry Andric /// 48700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48710b57cec5SDimitry Andric /// 48720b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 48730b57cec5SDimitry Andric /// 48740b57cec5SDimitry Andric /// \param __a 48750b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 48760b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 48770b57cec5SDimitry Andric /// parameter. 48780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 48790b57cec5SDimitry Andric _mm_castpd_si128(__m128d __a) 48800b57cec5SDimitry Andric { 48810b57cec5SDimitry Andric return (__m128i)__a; 48820b57cec5SDimitry Andric } 48830b57cec5SDimitry Andric 48840b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 48850b57cec5SDimitry Andric /// floating-point vector of [2 x double]. 48860b57cec5SDimitry Andric /// 48870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48880b57cec5SDimitry Andric /// 48890b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 48900b57cec5SDimitry Andric /// 48910b57cec5SDimitry Andric /// \param __a 48920b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 48930b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 48940b57cec5SDimitry Andric /// bitwise pattern as the parameter. 48950b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 48960b57cec5SDimitry Andric _mm_castps_pd(__m128 __a) 48970b57cec5SDimitry Andric { 48980b57cec5SDimitry Andric return (__m128d)__a; 48990b57cec5SDimitry Andric } 49000b57cec5SDimitry Andric 49010b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 49020b57cec5SDimitry Andric /// integer vector. 49030b57cec5SDimitry Andric /// 49040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49050b57cec5SDimitry Andric /// 49060b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 49070b57cec5SDimitry Andric /// 49080b57cec5SDimitry Andric /// \param __a 49090b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 49100b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 49110b57cec5SDimitry Andric /// parameter. 49120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 49130b57cec5SDimitry Andric _mm_castps_si128(__m128 __a) 49140b57cec5SDimitry Andric { 49150b57cec5SDimitry Andric return (__m128i)__a; 49160b57cec5SDimitry Andric } 49170b57cec5SDimitry Andric 49180b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 49190b57cec5SDimitry Andric /// of [4 x float]. 49200b57cec5SDimitry Andric /// 49210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49220b57cec5SDimitry Andric /// 49230b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 49240b57cec5SDimitry Andric /// 49250b57cec5SDimitry Andric /// \param __a 49260b57cec5SDimitry Andric /// A 128-bit integer vector. 49270b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 49280b57cec5SDimitry Andric /// bitwise pattern as the parameter. 49290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 49300b57cec5SDimitry Andric _mm_castsi128_ps(__m128i __a) 49310b57cec5SDimitry Andric { 49320b57cec5SDimitry Andric return (__m128)__a; 49330b57cec5SDimitry Andric } 49340b57cec5SDimitry Andric 49350b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 49360b57cec5SDimitry Andric /// of [2 x double]. 49370b57cec5SDimitry Andric /// 49380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49390b57cec5SDimitry Andric /// 49400b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 49410b57cec5SDimitry Andric /// 49420b57cec5SDimitry Andric /// \param __a 49430b57cec5SDimitry Andric /// A 128-bit integer vector. 49440b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 49450b57cec5SDimitry Andric /// bitwise pattern as the parameter. 49460b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 49470b57cec5SDimitry Andric _mm_castsi128_pd(__m128i __a) 49480b57cec5SDimitry Andric { 49490b57cec5SDimitry Andric return (__m128d)__a; 49500b57cec5SDimitry Andric } 49510b57cec5SDimitry Andric 49520b57cec5SDimitry Andric #if defined(__cplusplus) 49530b57cec5SDimitry Andric extern "C" { 49540b57cec5SDimitry Andric #endif 49550b57cec5SDimitry Andric 49560b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of 49570b57cec5SDimitry Andric /// optimizing power consumption during the loop. 49580b57cec5SDimitry Andric /// 49590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49600b57cec5SDimitry Andric /// 49610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction. 49620b57cec5SDimitry Andric /// 49630b57cec5SDimitry Andric void _mm_pause(void); 49640b57cec5SDimitry Andric 49650b57cec5SDimitry Andric #if defined(__cplusplus) 49660b57cec5SDimitry Andric } // extern "C" 49670b57cec5SDimitry Andric #endif 49680b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 49690b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 49700b57cec5SDimitry Andric 49710b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 49720b57cec5SDimitry Andric 4973*5ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_ON (0x0040U) 4974*5ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_OFF (0x0000U) 49750b57cec5SDimitry Andric 4976*5ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040U) 49770b57cec5SDimitry Andric 49780b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 49790b57cec5SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 49800b57cec5SDimitry Andric 49810b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */ 4982