10b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __EMMINTRIN_H 110b57cec5SDimitry Andric #define __EMMINTRIN_H 120b57cec5SDimitry Andric 13*349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14*349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15*349cc55cSDimitry Andric #endif 16*349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <xmmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); 200b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); 230b57cec5SDimitry Andric typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric /* Type defines. */ 260b57cec5SDimitry Andric typedef double __v2df __attribute__ ((__vector_size__ (16))); 270b57cec5SDimitry Andric typedef long long __v2di __attribute__ ((__vector_size__ (16))); 280b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16))); 290b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16))); 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric /* Unsigned types */ 320b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); 330b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16))); 340b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16))); 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 370b57cec5SDimitry Andric * appear in the interface though. */ 380b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16))); 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 410b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128))) 420b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64))) 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the 450b57cec5SDimitry Andric /// sum in the lower 64 bits of the result. The upper 64 bits of the result 460b57cec5SDimitry Andric /// are copied from the upper double-precision value of the first operand. 470b57cec5SDimitry Andric /// 480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 490b57cec5SDimitry Andric /// 500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. 510b57cec5SDimitry Andric /// 520b57cec5SDimitry Andric /// \param __a 530b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 540b57cec5SDimitry Andric /// \param __b 550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 560b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 570b57cec5SDimitry Andric /// sum of the lower 64 bits of both operands. The upper 64 bits are copied 580b57cec5SDimitry Andric /// from the upper 64 bits of the first source operand. 590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 600b57cec5SDimitry Andric _mm_add_sd(__m128d __a, __m128d __b) 610b57cec5SDimitry Andric { 620b57cec5SDimitry Andric __a[0] += __b[0]; 630b57cec5SDimitry Andric return __a; 640b57cec5SDimitry Andric } 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double]. 670b57cec5SDimitry Andric /// 680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 690b57cec5SDimitry Andric /// 700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. 710b57cec5SDimitry Andric /// 720b57cec5SDimitry Andric /// \param __a 730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 740b57cec5SDimitry Andric /// \param __b 750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 760b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both 770b57cec5SDimitry Andric /// operands. 780b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 790b57cec5SDimitry Andric _mm_add_pd(__m128d __a, __m128d __b) 800b57cec5SDimitry Andric { 810b57cec5SDimitry Andric return (__m128d)((__v2df)__a + (__v2df)__b); 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand 850b57cec5SDimitry Andric /// from the lower double-precision value of the first operand and returns 860b57cec5SDimitry Andric /// the difference in the lower 64 bits of the result. The upper 64 bits of 870b57cec5SDimitry Andric /// the result are copied from the upper double-precision value of the first 880b57cec5SDimitry Andric /// operand. 890b57cec5SDimitry Andric /// 900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 910b57cec5SDimitry Andric /// 920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. 930b57cec5SDimitry Andric /// 940b57cec5SDimitry Andric /// \param __a 950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 960b57cec5SDimitry Andric /// \param __b 970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 990b57cec5SDimitry Andric /// difference of the lower 64 bits of both operands. The upper 64 bits are 1000b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 1010b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1020b57cec5SDimitry Andric _mm_sub_sd(__m128d __a, __m128d __b) 1030b57cec5SDimitry Andric { 1040b57cec5SDimitry Andric __a[0] -= __b[0]; 1050b57cec5SDimitry Andric return __a; 1060b57cec5SDimitry Andric } 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double]. 1090b57cec5SDimitry Andric /// 1100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1110b57cec5SDimitry Andric /// 1120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. 1130b57cec5SDimitry Andric /// 1140b57cec5SDimitry Andric /// \param __a 1150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1160b57cec5SDimitry Andric /// \param __b 1170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1180b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between 1190b57cec5SDimitry Andric /// both operands. 1200b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1210b57cec5SDimitry Andric _mm_sub_pd(__m128d __a, __m128d __b) 1220b57cec5SDimitry Andric { 1230b57cec5SDimitry Andric return (__m128d)((__v2df)__a - (__v2df)__b); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns 1270b57cec5SDimitry Andric /// the product in the lower 64 bits of the result. The upper 64 bits of the 1280b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1290b57cec5SDimitry Andric /// operand. 1300b57cec5SDimitry Andric /// 1310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1320b57cec5SDimitry Andric /// 1330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. 1340b57cec5SDimitry Andric /// 1350b57cec5SDimitry Andric /// \param __a 1360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1370b57cec5SDimitry Andric /// \param __b 1380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1390b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1400b57cec5SDimitry Andric /// product of the lower 64 bits of both operands. The upper 64 bits are 1410b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 1420b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1430b57cec5SDimitry Andric _mm_mul_sd(__m128d __a, __m128d __b) 1440b57cec5SDimitry Andric { 1450b57cec5SDimitry Andric __a[0] *= __b[0]; 1460b57cec5SDimitry Andric return __a; 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double]. 1500b57cec5SDimitry Andric /// 1510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1520b57cec5SDimitry Andric /// 1530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. 1540b57cec5SDimitry Andric /// 1550b57cec5SDimitry Andric /// \param __a 1560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1570b57cec5SDimitry Andric /// \param __b 1580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1590b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both 1600b57cec5SDimitry Andric /// operands. 1610b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1620b57cec5SDimitry Andric _mm_mul_pd(__m128d __a, __m128d __b) 1630b57cec5SDimitry Andric { 1640b57cec5SDimitry Andric return (__m128d)((__v2df)__a * (__v2df)__b); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the 1680b57cec5SDimitry Andric /// lower double-precision value of the second operand and returns the 1690b57cec5SDimitry Andric /// quotient in the lower 64 bits of the result. The upper 64 bits of the 1700b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1710b57cec5SDimitry Andric /// operand. 1720b57cec5SDimitry Andric /// 1730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1740b57cec5SDimitry Andric /// 1750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. 1760b57cec5SDimitry Andric /// 1770b57cec5SDimitry Andric /// \param __a 1780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1790b57cec5SDimitry Andric /// \param __b 1800b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing divisor. 1810b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1820b57cec5SDimitry Andric /// quotient of the lower 64 bits of both operands. The upper 64 bits are 1830b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 1840b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1850b57cec5SDimitry Andric _mm_div_sd(__m128d __a, __m128d __b) 1860b57cec5SDimitry Andric { 1870b57cec5SDimitry Andric __a[0] /= __b[0]; 1880b57cec5SDimitry Andric return __a; 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of 1920b57cec5SDimitry Andric /// [2 x double]. 1930b57cec5SDimitry Andric /// 1940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1950b57cec5SDimitry Andric /// 1960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. 1970b57cec5SDimitry Andric /// 1980b57cec5SDimitry Andric /// \param __a 1990b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 2000b57cec5SDimitry Andric /// \param __b 2010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the divisor. 2020b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both 2030b57cec5SDimitry Andric /// operands. 2040b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2050b57cec5SDimitry Andric _mm_div_pd(__m128d __a, __m128d __b) 2060b57cec5SDimitry Andric { 2070b57cec5SDimitry Andric return (__m128d)((__v2df)__a / (__v2df)__b); 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of 2110b57cec5SDimitry Andric /// the second operand and returns it in the lower 64 bits of the result. 2120b57cec5SDimitry Andric /// The upper 64 bits of the result are copied from the upper 2130b57cec5SDimitry Andric /// double-precision value of the first operand. 2140b57cec5SDimitry Andric /// 2150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2160b57cec5SDimitry Andric /// 2170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. 2180b57cec5SDimitry Andric /// 2190b57cec5SDimitry Andric /// \param __a 2200b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2210b57cec5SDimitry Andric /// upper 64 bits of this operand are copied to the upper 64 bits of the 2220b57cec5SDimitry Andric /// result. 2230b57cec5SDimitry Andric /// \param __b 2240b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2250b57cec5SDimitry Andric /// square root is calculated using the lower 64 bits of this operand. 2260b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2270b57cec5SDimitry Andric /// square root of the lower 64 bits of operand \a __b, and whose upper 64 2280b57cec5SDimitry Andric /// bits are copied from the upper 64 bits of operand \a __a. 2290b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2300b57cec5SDimitry Andric _mm_sqrt_sd(__m128d __a, __m128d __b) 2310b57cec5SDimitry Andric { 2320b57cec5SDimitry Andric __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); 2330b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a 2370b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 2380b57cec5SDimitry Andric /// 2390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2400b57cec5SDimitry Andric /// 2410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. 2420b57cec5SDimitry Andric /// 2430b57cec5SDimitry Andric /// \param __a 2440b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 2450b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the 2460b57cec5SDimitry Andric /// values in the operand. 2470b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2480b57cec5SDimitry Andric _mm_sqrt_pd(__m128d __a) 2490b57cec5SDimitry Andric { 2500b57cec5SDimitry Andric return __builtin_ia32_sqrtpd((__v2df)__a); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2540b57cec5SDimitry Andric /// returns the lesser of the pair of values in the lower 64-bits of the 2550b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2560b57cec5SDimitry Andric /// double-precision value of the first operand. 2570b57cec5SDimitry Andric /// 2580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2590b57cec5SDimitry Andric /// 2600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. 2610b57cec5SDimitry Andric /// 2620b57cec5SDimitry Andric /// \param __a 2630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2640b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2650b57cec5SDimitry Andric /// \param __b 2660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2670b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2680b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2690b57cec5SDimitry Andric /// minimum value between both operands. The upper 64 bits are copied from 2700b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 2710b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2720b57cec5SDimitry Andric _mm_min_sd(__m128d __a, __m128d __b) 2730b57cec5SDimitry Andric { 2740b57cec5SDimitry Andric return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 2780b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the lesser of each pair of 2790b57cec5SDimitry Andric /// values. 2800b57cec5SDimitry Andric /// 2810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2820b57cec5SDimitry Andric /// 2830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. 2840b57cec5SDimitry Andric /// 2850b57cec5SDimitry Andric /// \param __a 2860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2870b57cec5SDimitry Andric /// \param __b 2880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2890b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values 2900b57cec5SDimitry Andric /// between both operands. 2910b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2920b57cec5SDimitry Andric _mm_min_pd(__m128d __a, __m128d __b) 2930b57cec5SDimitry Andric { 2940b57cec5SDimitry Andric return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2980b57cec5SDimitry Andric /// returns the greater of the pair of values in the lower 64-bits of the 2990b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 3000b57cec5SDimitry Andric /// double-precision value of the first operand. 3010b57cec5SDimitry Andric /// 3020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3030b57cec5SDimitry Andric /// 3040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. 3050b57cec5SDimitry Andric /// 3060b57cec5SDimitry Andric /// \param __a 3070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3080b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3090b57cec5SDimitry Andric /// \param __b 3100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3110b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3120b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 3130b57cec5SDimitry Andric /// maximum value between both operands. The upper 64 bits are copied from 3140b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 3150b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3160b57cec5SDimitry Andric _mm_max_sd(__m128d __a, __m128d __b) 3170b57cec5SDimitry Andric { 3180b57cec5SDimitry Andric return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 3220b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the greater of each pair 3230b57cec5SDimitry Andric /// of values. 3240b57cec5SDimitry Andric /// 3250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3260b57cec5SDimitry Andric /// 3270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. 3280b57cec5SDimitry Andric /// 3290b57cec5SDimitry Andric /// \param __a 3300b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3310b57cec5SDimitry Andric /// \param __b 3320b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3330b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values 3340b57cec5SDimitry Andric /// between both operands. 3350b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3360b57cec5SDimitry Andric _mm_max_pd(__m128d __a, __m128d __b) 3370b57cec5SDimitry Andric { 3380b57cec5SDimitry Andric return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); 3390b57cec5SDimitry Andric } 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. 3420b57cec5SDimitry Andric /// 3430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3440b57cec5SDimitry Andric /// 3450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 3460b57cec5SDimitry Andric /// 3470b57cec5SDimitry Andric /// \param __a 3480b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3490b57cec5SDimitry Andric /// \param __b 3500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3510b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3520b57cec5SDimitry Andric /// values between both operands. 3530b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3540b57cec5SDimitry Andric _mm_and_pd(__m128d __a, __m128d __b) 3550b57cec5SDimitry Andric { 3560b57cec5SDimitry Andric return (__m128d)((__v2du)__a & (__v2du)__b); 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using 3600b57cec5SDimitry Andric /// the one's complement of the values contained in the first source operand. 3610b57cec5SDimitry Andric /// 3620b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3630b57cec5SDimitry Andric /// 3640b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 3650b57cec5SDimitry Andric /// 3660b57cec5SDimitry Andric /// \param __a 3670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. The 3680b57cec5SDimitry Andric /// one's complement of this value is used in the bitwise AND. 3690b57cec5SDimitry Andric /// \param __b 3700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 3710b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3720b57cec5SDimitry Andric /// values in the second operand and the one's complement of the first 3730b57cec5SDimitry Andric /// operand. 3740b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3750b57cec5SDimitry Andric _mm_andnot_pd(__m128d __a, __m128d __b) 3760b57cec5SDimitry Andric { 3770b57cec5SDimitry Andric return (__m128d)(~(__v2du)__a & (__v2du)__b); 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. 3810b57cec5SDimitry Andric /// 3820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3830b57cec5SDimitry Andric /// 3840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 3850b57cec5SDimitry Andric /// 3860b57cec5SDimitry Andric /// \param __a 3870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3880b57cec5SDimitry Andric /// \param __b 3890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the 3910b57cec5SDimitry Andric /// values between both operands. 3920b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3930b57cec5SDimitry Andric _mm_or_pd(__m128d __a, __m128d __b) 3940b57cec5SDimitry Andric { 3950b57cec5SDimitry Andric return (__m128d)((__v2du)__a | (__v2du)__b); 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. 3990b57cec5SDimitry Andric /// 4000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4010b57cec5SDimitry Andric /// 4020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 4030b57cec5SDimitry Andric /// 4040b57cec5SDimitry Andric /// \param __a 4050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4060b57cec5SDimitry Andric /// \param __b 4070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4080b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the 4090b57cec5SDimitry Andric /// values between both operands. 4100b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4110b57cec5SDimitry Andric _mm_xor_pd(__m128d __a, __m128d __b) 4120b57cec5SDimitry Andric { 4130b57cec5SDimitry Andric return (__m128d)((__v2du)__a ^ (__v2du)__b); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4170b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 4180b57cec5SDimitry Andric /// for false, 0xFFFFFFFFFFFFFFFF for true. 4190b57cec5SDimitry Andric /// 4200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4210b57cec5SDimitry Andric /// 4220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. 4230b57cec5SDimitry Andric /// 4240b57cec5SDimitry Andric /// \param __a 4250b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4260b57cec5SDimitry Andric /// \param __b 4270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4280b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4290b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4300b57cec5SDimitry Andric _mm_cmpeq_pd(__m128d __a, __m128d __b) 4310b57cec5SDimitry Andric { 4320b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4360b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4370b57cec5SDimitry Andric /// operand are less than those in the second operand. Each comparison 4380b57cec5SDimitry Andric /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4390b57cec5SDimitry Andric /// 4400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4410b57cec5SDimitry Andric /// 4420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4430b57cec5SDimitry Andric /// 4440b57cec5SDimitry Andric /// \param __a 4450b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4460b57cec5SDimitry Andric /// \param __b 4470b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4480b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4490b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4500b57cec5SDimitry Andric _mm_cmplt_pd(__m128d __a, __m128d __b) 4510b57cec5SDimitry Andric { 4520b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); 4530b57cec5SDimitry Andric } 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4560b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4570b57cec5SDimitry Andric /// operand are less than or equal to those in the second operand. 4580b57cec5SDimitry Andric /// 4590b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4600b57cec5SDimitry Andric /// 4610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4620b57cec5SDimitry Andric /// 4630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4640b57cec5SDimitry Andric /// 4650b57cec5SDimitry Andric /// \param __a 4660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4670b57cec5SDimitry Andric /// \param __b 4680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4690b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4700b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4710b57cec5SDimitry Andric _mm_cmple_pd(__m128d __a, __m128d __b) 4720b57cec5SDimitry Andric { 4730b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4770b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4780b57cec5SDimitry Andric /// operand are greater than those in the second operand. 4790b57cec5SDimitry Andric /// 4800b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4810b57cec5SDimitry Andric /// 4820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4830b57cec5SDimitry Andric /// 4840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4850b57cec5SDimitry Andric /// 4860b57cec5SDimitry Andric /// \param __a 4870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4880b57cec5SDimitry Andric /// \param __b 4890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4900b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 4910b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4920b57cec5SDimitry Andric _mm_cmpgt_pd(__m128d __a, __m128d __b) 4930b57cec5SDimitry Andric { 4940b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4980b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4990b57cec5SDimitry Andric /// operand are greater than or equal to those in the second operand. 5000b57cec5SDimitry Andric /// 5010b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5020b57cec5SDimitry Andric /// 5030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5040b57cec5SDimitry Andric /// 5050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 5060b57cec5SDimitry Andric /// 5070b57cec5SDimitry Andric /// \param __a 5080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5090b57cec5SDimitry Andric /// \param __b 5100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5110b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5120b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5130b57cec5SDimitry Andric _mm_cmpge_pd(__m128d __a, __m128d __b) 5140b57cec5SDimitry Andric { 5150b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); 5160b57cec5SDimitry Andric } 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5190b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5200b57cec5SDimitry Andric /// operand are ordered with respect to those in the second operand. 5210b57cec5SDimitry Andric /// 5220b57cec5SDimitry Andric /// A pair of double-precision values are "ordered" with respect to each 5230b57cec5SDimitry Andric /// other if neither value is a NaN. Each comparison yields 0x0 for false, 5240b57cec5SDimitry Andric /// 0xFFFFFFFFFFFFFFFF for true. 5250b57cec5SDimitry Andric /// 5260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5270b57cec5SDimitry Andric /// 5280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. 5290b57cec5SDimitry Andric /// 5300b57cec5SDimitry Andric /// \param __a 5310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5320b57cec5SDimitry Andric /// \param __b 5330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5340b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5350b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5360b57cec5SDimitry Andric _mm_cmpord_pd(__m128d __a, __m128d __b) 5370b57cec5SDimitry Andric { 5380b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5420b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5430b57cec5SDimitry Andric /// operand are unordered with respect to those in the second operand. 5440b57cec5SDimitry Andric /// 5450b57cec5SDimitry Andric /// A pair of double-precision values are "unordered" with respect to each 5460b57cec5SDimitry Andric /// other if one or both values are NaN. Each comparison yields 0x0 for 5470b57cec5SDimitry Andric /// false, 0xFFFFFFFFFFFFFFFF for true. 5480b57cec5SDimitry Andric /// 5490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5500b57cec5SDimitry Andric /// 5510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> 5520b57cec5SDimitry Andric /// instruction. 5530b57cec5SDimitry Andric /// 5540b57cec5SDimitry Andric /// \param __a 5550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5560b57cec5SDimitry Andric /// \param __b 5570b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5580b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5600b57cec5SDimitry Andric _mm_cmpunord_pd(__m128d __a, __m128d __b) 5610b57cec5SDimitry Andric { 5620b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5660b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5670b57cec5SDimitry Andric /// operand are unequal to those in the second operand. 5680b57cec5SDimitry Andric /// 5690b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5700b57cec5SDimitry Andric /// 5710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5720b57cec5SDimitry Andric /// 5730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. 5740b57cec5SDimitry Andric /// 5750b57cec5SDimitry Andric /// \param __a 5760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5770b57cec5SDimitry Andric /// \param __b 5780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5790b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 5800b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 5810b57cec5SDimitry Andric _mm_cmpneq_pd(__m128d __a, __m128d __b) 5820b57cec5SDimitry Andric { 5830b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5870b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5880b57cec5SDimitry Andric /// operand are not less than those in the second operand. 5890b57cec5SDimitry Andric /// 5900b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5910b57cec5SDimitry Andric /// 5920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5930b57cec5SDimitry Andric /// 5940b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 5950b57cec5SDimitry Andric /// 5960b57cec5SDimitry Andric /// \param __a 5970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5980b57cec5SDimitry Andric /// \param __b 5990b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6000b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6010b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6020b57cec5SDimitry Andric _mm_cmpnlt_pd(__m128d __a, __m128d __b) 6030b57cec5SDimitry Andric { 6040b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric 6070b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6080b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6090b57cec5SDimitry Andric /// operand are not less than or equal to those in the second operand. 6100b57cec5SDimitry Andric /// 6110b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6120b57cec5SDimitry Andric /// 6130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6140b57cec5SDimitry Andric /// 6150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6160b57cec5SDimitry Andric /// 6170b57cec5SDimitry Andric /// \param __a 6180b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6190b57cec5SDimitry Andric /// \param __b 6200b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6210b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6230b57cec5SDimitry Andric _mm_cmpnle_pd(__m128d __a, __m128d __b) 6240b57cec5SDimitry Andric { 6250b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6290b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6300b57cec5SDimitry Andric /// operand are not greater than those in the second operand. 6310b57cec5SDimitry Andric /// 6320b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6330b57cec5SDimitry Andric /// 6340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6350b57cec5SDimitry Andric /// 6360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 6370b57cec5SDimitry Andric /// 6380b57cec5SDimitry Andric /// \param __a 6390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6400b57cec5SDimitry Andric /// \param __b 6410b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6420b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6430b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6440b57cec5SDimitry Andric _mm_cmpngt_pd(__m128d __a, __m128d __b) 6450b57cec5SDimitry Andric { 6460b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6500b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6510b57cec5SDimitry Andric /// operand are not greater than or equal to those in the second operand. 6520b57cec5SDimitry Andric /// 6530b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6540b57cec5SDimitry Andric /// 6550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6560b57cec5SDimitry Andric /// 6570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6580b57cec5SDimitry Andric /// 6590b57cec5SDimitry Andric /// \param __a 6600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6610b57cec5SDimitry Andric /// \param __b 6620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6630b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 6640b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6650b57cec5SDimitry Andric _mm_cmpnge_pd(__m128d __a, __m128d __b) 6660b57cec5SDimitry Andric { 6670b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6710b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 6720b57cec5SDimitry Andric /// 6730b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6740b57cec5SDimitry Andric /// 6750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6760b57cec5SDimitry Andric /// 6770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. 6780b57cec5SDimitry Andric /// 6790b57cec5SDimitry Andric /// \param __a 6800b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6810b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6820b57cec5SDimitry Andric /// \param __b 6830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6840b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6850b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6860b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 6870b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 6880b57cec5SDimitry Andric _mm_cmpeq_sd(__m128d __a, __m128d __b) 6890b57cec5SDimitry Andric { 6900b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6940b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 6950b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 6960b57cec5SDimitry Andric /// the second parameter. 6970b57cec5SDimitry Andric /// 6980b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6990b57cec5SDimitry Andric /// 7000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7010b57cec5SDimitry Andric /// 7020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7030b57cec5SDimitry Andric /// 7040b57cec5SDimitry Andric /// \param __a 7050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7060b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7070b57cec5SDimitry Andric /// \param __b 7080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7090b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7100b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7110b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7120b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7130b57cec5SDimitry Andric _mm_cmplt_sd(__m128d __a, __m128d __b) 7140b57cec5SDimitry Andric { 7150b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7190b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7200b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 7210b57cec5SDimitry Andric /// corresponding value in the second parameter. 7220b57cec5SDimitry Andric /// 7230b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7240b57cec5SDimitry Andric /// 7250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7260b57cec5SDimitry Andric /// 7270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7280b57cec5SDimitry Andric /// 7290b57cec5SDimitry Andric /// \param __a 7300b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7310b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7320b57cec5SDimitry Andric /// \param __b 7330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7340b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7350b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7360b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7370b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7380b57cec5SDimitry Andric _mm_cmple_sd(__m128d __a, __m128d __b) 7390b57cec5SDimitry Andric { 7400b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); 7410b57cec5SDimitry Andric } 7420b57cec5SDimitry Andric 7430b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7440b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7450b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 7460b57cec5SDimitry Andric /// in the second parameter. 7470b57cec5SDimitry Andric /// 7480b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7490b57cec5SDimitry Andric /// 7500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7510b57cec5SDimitry Andric /// 7520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7530b57cec5SDimitry Andric /// 7540b57cec5SDimitry Andric /// \param __a 7550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7560b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7570b57cec5SDimitry Andric /// \param __b 7580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7590b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7600b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7610b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7620b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7630b57cec5SDimitry Andric _mm_cmpgt_sd(__m128d __a, __m128d __b) 7640b57cec5SDimitry Andric { 7650b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); 7660b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7700b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7710b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 7720b57cec5SDimitry Andric /// corresponding value in the second parameter. 7730b57cec5SDimitry Andric /// 7740b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7750b57cec5SDimitry Andric /// 7760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7770b57cec5SDimitry Andric /// 7780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7790b57cec5SDimitry Andric /// 7800b57cec5SDimitry Andric /// \param __a 7810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7820b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7830b57cec5SDimitry Andric /// \param __b 7840b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7850b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7860b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7870b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 7880b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 7890b57cec5SDimitry Andric _mm_cmpge_sd(__m128d __a, __m128d __b) 7900b57cec5SDimitry Andric { 7910b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); 7920b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7960b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7970b57cec5SDimitry Andric /// the value in the first parameter is "ordered" with respect to the 7980b57cec5SDimitry Andric /// corresponding value in the second parameter. 7990b57cec5SDimitry Andric /// 8000b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 8010b57cec5SDimitry Andric /// of double-precision values are "ordered" with respect to each other if 8020b57cec5SDimitry Andric /// neither value is a NaN. 8030b57cec5SDimitry Andric /// 8040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8050b57cec5SDimitry Andric /// 8060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. 8070b57cec5SDimitry Andric /// 8080b57cec5SDimitry Andric /// \param __a 8090b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8100b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8110b57cec5SDimitry Andric /// \param __b 8120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8130b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8140b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8150b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8160b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8170b57cec5SDimitry Andric _mm_cmpord_sd(__m128d __a, __m128d __b) 8180b57cec5SDimitry Andric { 8190b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); 8200b57cec5SDimitry Andric } 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8230b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8240b57cec5SDimitry Andric /// the value in the first parameter is "unordered" with respect to the 8250b57cec5SDimitry Andric /// corresponding value in the second parameter. 8260b57cec5SDimitry Andric /// 8270b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 8280b57cec5SDimitry Andric /// of double-precision values are "unordered" with respect to each other if 8290b57cec5SDimitry Andric /// one or both values are NaN. 8300b57cec5SDimitry Andric /// 8310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8320b57cec5SDimitry Andric /// 8330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> 8340b57cec5SDimitry Andric /// instruction. 8350b57cec5SDimitry Andric /// 8360b57cec5SDimitry Andric /// \param __a 8370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8380b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8390b57cec5SDimitry Andric /// \param __b 8400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8410b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8420b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8430b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8450b57cec5SDimitry Andric _mm_cmpunord_sd(__m128d __a, __m128d __b) 8460b57cec5SDimitry Andric { 8470b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8510b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8520b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 8530b57cec5SDimitry Andric /// the second parameter. 8540b57cec5SDimitry Andric /// 8550b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8560b57cec5SDimitry Andric /// 8570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8580b57cec5SDimitry Andric /// 8590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. 8600b57cec5SDimitry Andric /// 8610b57cec5SDimitry Andric /// \param __a 8620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8630b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8640b57cec5SDimitry Andric /// \param __b 8650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8660b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8670b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8680b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8690b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8700b57cec5SDimitry Andric _mm_cmpneq_sd(__m128d __a, __m128d __b) 8710b57cec5SDimitry Andric { 8720b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8760b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8770b57cec5SDimitry Andric /// the value in the first parameter is not less than the corresponding 8780b57cec5SDimitry Andric /// value in the second parameter. 8790b57cec5SDimitry Andric /// 8800b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8810b57cec5SDimitry Andric /// 8820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8830b57cec5SDimitry Andric /// 8840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 8850b57cec5SDimitry Andric /// 8860b57cec5SDimitry Andric /// \param __a 8870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8880b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8890b57cec5SDimitry Andric /// \param __b 8900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8910b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8920b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8930b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 8940b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 8950b57cec5SDimitry Andric _mm_cmpnlt_sd(__m128d __a, __m128d __b) 8960b57cec5SDimitry Andric { 8970b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); 8980b57cec5SDimitry Andric } 8990b57cec5SDimitry Andric 9000b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9010b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9020b57cec5SDimitry Andric /// the value in the first parameter is not less than or equal to the 9030b57cec5SDimitry Andric /// corresponding value in the second parameter. 9040b57cec5SDimitry Andric /// 9050b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9060b57cec5SDimitry Andric /// 9070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9080b57cec5SDimitry Andric /// 9090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9100b57cec5SDimitry Andric /// 9110b57cec5SDimitry Andric /// \param __a 9120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9130b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9140b57cec5SDimitry Andric /// \param __b 9150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9160b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9170b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9180b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 9190b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 9200b57cec5SDimitry Andric _mm_cmpnle_sd(__m128d __a, __m128d __b) 9210b57cec5SDimitry Andric { 9220b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); 9230b57cec5SDimitry Andric } 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9260b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9270b57cec5SDimitry Andric /// the value in the first parameter is not greater than the corresponding 9280b57cec5SDimitry Andric /// value in the second parameter. 9290b57cec5SDimitry Andric /// 9300b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9310b57cec5SDimitry Andric /// 9320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9330b57cec5SDimitry Andric /// 9340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 9350b57cec5SDimitry Andric /// 9360b57cec5SDimitry Andric /// \param __a 9370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9380b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9390b57cec5SDimitry Andric /// \param __b 9400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9410b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9420b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9430b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 9440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 9450b57cec5SDimitry Andric _mm_cmpngt_sd(__m128d __a, __m128d __b) 9460b57cec5SDimitry Andric { 9470b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); 9480b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9520b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9530b57cec5SDimitry Andric /// the value in the first parameter is not greater than or equal to the 9540b57cec5SDimitry Andric /// corresponding value in the second parameter. 9550b57cec5SDimitry Andric /// 9560b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9570b57cec5SDimitry Andric /// 9580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9590b57cec5SDimitry Andric /// 9600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9610b57cec5SDimitry Andric /// 9620b57cec5SDimitry Andric /// \param __a 9630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9640b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9650b57cec5SDimitry Andric /// \param __b 9660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9670b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9680b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9690b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 9700b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 9710b57cec5SDimitry Andric _mm_cmpnge_sd(__m128d __a, __m128d __b) 9720b57cec5SDimitry Andric { 9730b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); 9740b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 9750b57cec5SDimitry Andric } 9760b57cec5SDimitry Andric 9770b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9780b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 9790b57cec5SDimitry Andric /// 9800b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9810b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9820b57cec5SDimitry Andric /// 9830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9840b57cec5SDimitry Andric /// 9850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9860b57cec5SDimitry Andric /// 9870b57cec5SDimitry Andric /// \param __a 9880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9900b57cec5SDimitry Andric /// \param __b 9910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9920b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9930b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 9940b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9950b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 9960b57cec5SDimitry Andric _mm_comieq_sd(__m128d __a, __m128d __b) 9970b57cec5SDimitry Andric { 9980b57cec5SDimitry Andric return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10020b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10030b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 10040b57cec5SDimitry Andric /// the second parameter. 10050b57cec5SDimitry Andric /// 10060b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10070b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10080b57cec5SDimitry Andric /// 10090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10100b57cec5SDimitry Andric /// 10110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10120b57cec5SDimitry Andric /// 10130b57cec5SDimitry Andric /// \param __a 10140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10150b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10160b57cec5SDimitry Andric /// \param __b 10170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10180b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10190b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10200b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10210b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10220b57cec5SDimitry Andric _mm_comilt_sd(__m128d __a, __m128d __b) 10230b57cec5SDimitry Andric { 10240b57cec5SDimitry Andric return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); 10250b57cec5SDimitry Andric } 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10280b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10290b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 10300b57cec5SDimitry Andric /// corresponding value in the second parameter. 10310b57cec5SDimitry Andric /// 10320b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10330b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10340b57cec5SDimitry Andric /// 10350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10360b57cec5SDimitry Andric /// 10370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10380b57cec5SDimitry Andric /// 10390b57cec5SDimitry Andric /// \param __a 10400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10410b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10420b57cec5SDimitry Andric /// \param __b 10430b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10440b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10450b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10460b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10470b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10480b57cec5SDimitry Andric _mm_comile_sd(__m128d __a, __m128d __b) 10490b57cec5SDimitry Andric { 10500b57cec5SDimitry Andric return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); 10510b57cec5SDimitry Andric } 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10540b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10550b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 10560b57cec5SDimitry Andric /// in the second parameter. 10570b57cec5SDimitry Andric /// 10580b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10590b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10600b57cec5SDimitry Andric /// 10610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10620b57cec5SDimitry Andric /// 10630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10640b57cec5SDimitry Andric /// 10650b57cec5SDimitry Andric /// \param __a 10660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10670b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10680b57cec5SDimitry Andric /// \param __b 10690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10700b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10710b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10720b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10730b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 10740b57cec5SDimitry Andric _mm_comigt_sd(__m128d __a, __m128d __b) 10750b57cec5SDimitry Andric { 10760b57cec5SDimitry Andric return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10800b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10810b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 10820b57cec5SDimitry Andric /// corresponding value in the second parameter. 10830b57cec5SDimitry Andric /// 10840b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10850b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10860b57cec5SDimitry Andric /// 10870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10880b57cec5SDimitry Andric /// 10890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10900b57cec5SDimitry Andric /// 10910b57cec5SDimitry Andric /// \param __a 10920b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10930b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10940b57cec5SDimitry Andric /// \param __b 10950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10960b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10970b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10980b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10990b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11000b57cec5SDimitry Andric _mm_comige_sd(__m128d __a, __m128d __b) 11010b57cec5SDimitry Andric { 11020b57cec5SDimitry Andric return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); 11030b57cec5SDimitry Andric } 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11060b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11070b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 11080b57cec5SDimitry Andric /// the second parameter. 11090b57cec5SDimitry Andric /// 11100b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 11110b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 11120b57cec5SDimitry Andric /// 11130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11140b57cec5SDimitry Andric /// 11150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 11160b57cec5SDimitry Andric /// 11170b57cec5SDimitry Andric /// \param __a 11180b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11190b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11200b57cec5SDimitry Andric /// \param __b 11210b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11220b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11230b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11240b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 11250b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11260b57cec5SDimitry Andric _mm_comineq_sd(__m128d __a, __m128d __b) 11270b57cec5SDimitry Andric { 11280b57cec5SDimitry Andric return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); 11290b57cec5SDimitry Andric } 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11320b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. The 11330b57cec5SDimitry Andric /// comparison yields 0 for false, 1 for true. 11340b57cec5SDimitry Andric /// 11350b57cec5SDimitry Andric /// If either of the two lower double-precision values is NaN, 0 is returned. 11360b57cec5SDimitry Andric /// 11370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11380b57cec5SDimitry Andric /// 11390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11400b57cec5SDimitry Andric /// 11410b57cec5SDimitry Andric /// \param __a 11420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11430b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11440b57cec5SDimitry Andric /// \param __b 11450b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11460b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11470b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11480b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 11490b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11500b57cec5SDimitry Andric _mm_ucomieq_sd(__m128d __a, __m128d __b) 11510b57cec5SDimitry Andric { 11520b57cec5SDimitry Andric return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11560b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11570b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 11580b57cec5SDimitry Andric /// the second parameter. 11590b57cec5SDimitry Andric /// 11600b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11610b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11620b57cec5SDimitry Andric /// 11630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11640b57cec5SDimitry Andric /// 11650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11660b57cec5SDimitry Andric /// 11670b57cec5SDimitry Andric /// \param __a 11680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11690b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11700b57cec5SDimitry Andric /// \param __b 11710b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11720b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11730b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11740b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 11750b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 11760b57cec5SDimitry Andric _mm_ucomilt_sd(__m128d __a, __m128d __b) 11770b57cec5SDimitry Andric { 11780b57cec5SDimitry Andric return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11820b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11830b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 11840b57cec5SDimitry Andric /// corresponding value in the second parameter. 11850b57cec5SDimitry Andric /// 11860b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11870b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11880b57cec5SDimitry Andric /// 11890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11900b57cec5SDimitry Andric /// 11910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11920b57cec5SDimitry Andric /// 11930b57cec5SDimitry Andric /// \param __a 11940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11950b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11960b57cec5SDimitry Andric /// \param __b 11970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11980b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11990b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12000b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12010b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12020b57cec5SDimitry Andric _mm_ucomile_sd(__m128d __a, __m128d __b) 12030b57cec5SDimitry Andric { 12040b57cec5SDimitry Andric return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); 12050b57cec5SDimitry Andric } 12060b57cec5SDimitry Andric 12070b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12080b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12090b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 12100b57cec5SDimitry Andric /// in the second parameter. 12110b57cec5SDimitry Andric /// 12120b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 12130b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 12140b57cec5SDimitry Andric /// 12150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12160b57cec5SDimitry Andric /// 12170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12180b57cec5SDimitry Andric /// 12190b57cec5SDimitry Andric /// \param __a 12200b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12210b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12220b57cec5SDimitry Andric /// \param __b 12230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12240b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12250b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12260b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12270b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12280b57cec5SDimitry Andric _mm_ucomigt_sd(__m128d __a, __m128d __b) 12290b57cec5SDimitry Andric { 12300b57cec5SDimitry Andric return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); 12310b57cec5SDimitry Andric } 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12340b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12350b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 12360b57cec5SDimitry Andric /// corresponding value in the second parameter. 12370b57cec5SDimitry Andric /// 12380b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 12390b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12400b57cec5SDimitry Andric /// 12410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12420b57cec5SDimitry Andric /// 12430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12440b57cec5SDimitry Andric /// 12450b57cec5SDimitry Andric /// \param __a 12460b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12470b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12480b57cec5SDimitry Andric /// \param __b 12490b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12500b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12510b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12520b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12530b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12540b57cec5SDimitry Andric _mm_ucomige_sd(__m128d __a, __m128d __b) 12550b57cec5SDimitry Andric { 12560b57cec5SDimitry Andric return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); 12570b57cec5SDimitry Andric } 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12600b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12610b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 12620b57cec5SDimitry Andric /// the second parameter. 12630b57cec5SDimitry Andric /// 12640b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 12650b57cec5SDimitry Andric /// double-precision values is NaN, 1 is returned. 12660b57cec5SDimitry Andric /// 12670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12680b57cec5SDimitry Andric /// 12690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12700b57cec5SDimitry Andric /// 12710b57cec5SDimitry Andric /// \param __a 12720b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12730b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12740b57cec5SDimitry Andric /// \param __b 12750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12760b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12770b57cec5SDimitry Andric /// \returns An integer containing the comparison result. If either of the two 12780b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 12790b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 12800b57cec5SDimitry Andric _mm_ucomineq_sd(__m128d __a, __m128d __b) 12810b57cec5SDimitry Andric { 12820b57cec5SDimitry Andric return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 12860b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two single-precision floating-point 12870b57cec5SDimitry Andric /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. 12880b57cec5SDimitry Andric /// The upper 64 bits of the result vector are set to zero. 12890b57cec5SDimitry Andric /// 12900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12910b57cec5SDimitry Andric /// 12920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. 12930b57cec5SDimitry Andric /// 12940b57cec5SDimitry Andric /// \param __a 12950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 12960b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the 12970b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 12980b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 12990b57cec5SDimitry Andric _mm_cvtpd_ps(__m128d __a) 13000b57cec5SDimitry Andric { 13010b57cec5SDimitry Andric return __builtin_ia32_cvtpd2ps((__v2df)__a); 13020b57cec5SDimitry Andric } 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a 13050b57cec5SDimitry Andric /// 128-bit vector of [4 x float] into two double-precision floating-point 13060b57cec5SDimitry Andric /// values, returned in a 128-bit vector of [2 x double]. The upper two 13070b57cec5SDimitry Andric /// elements of the input vector are unused. 13080b57cec5SDimitry Andric /// 13090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13100b57cec5SDimitry Andric /// 13110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. 13120b57cec5SDimitry Andric /// 13130b57cec5SDimitry Andric /// \param __a 13140b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower two single-precision 13150b57cec5SDimitry Andric /// floating-point elements are converted to double-precision values. The 13160b57cec5SDimitry Andric /// upper two elements are unused. 13170b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 13180b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 13190b57cec5SDimitry Andric _mm_cvtps_pd(__m128 __a) 13200b57cec5SDimitry Andric { 13210b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 13220b57cec5SDimitry Andric __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of 13260b57cec5SDimitry Andric /// [4 x i32] into two double-precision floating-point values, returned in a 13270b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 13280b57cec5SDimitry Andric /// 13290b57cec5SDimitry Andric /// The upper two elements of the input vector are unused. 13300b57cec5SDimitry Andric /// 13310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13320b57cec5SDimitry Andric /// 13330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. 13340b57cec5SDimitry Andric /// 13350b57cec5SDimitry Andric /// \param __a 13360b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are 13370b57cec5SDimitry Andric /// converted to double-precision values. 13380b57cec5SDimitry Andric /// 13390b57cec5SDimitry Andric /// The upper two elements are unused. 13400b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 13410b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 13420b57cec5SDimitry Andric _mm_cvtepi32_pd(__m128i __a) 13430b57cec5SDimitry Andric { 13440b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 13450b57cec5SDimitry Andric __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); 13460b57cec5SDimitry Andric } 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 13490b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 13500b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper 13510b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 13520b57cec5SDimitry Andric /// 13530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13540b57cec5SDimitry Andric /// 13550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. 13560b57cec5SDimitry Andric /// 13570b57cec5SDimitry Andric /// \param __a 13580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 13590b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 13600b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 13610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 13620b57cec5SDimitry Andric _mm_cvtpd_epi32(__m128d __a) 13630b57cec5SDimitry Andric { 13640b57cec5SDimitry Andric return __builtin_ia32_cvtpd2dq((__v2df)__a); 13650b57cec5SDimitry Andric } 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double] 13680b57cec5SDimitry Andric /// into a 32-bit signed integer value. 13690b57cec5SDimitry Andric /// 13700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13710b57cec5SDimitry Andric /// 13720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 13730b57cec5SDimitry Andric /// 13740b57cec5SDimitry Andric /// \param __a 13750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 13760b57cec5SDimitry Andric /// conversion. 13770b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 13780b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 13790b57cec5SDimitry Andric _mm_cvtsd_si32(__m128d __a) 13800b57cec5SDimitry Andric { 13810b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si((__v2df)__a); 13820b57cec5SDimitry Andric } 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a 13850b57cec5SDimitry Andric /// 128-bit vector of [2 x double], in the second parameter, into a 13860b57cec5SDimitry Andric /// single-precision floating-point value, returned in the lower 32 bits of a 13870b57cec5SDimitry Andric /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are 13880b57cec5SDimitry Andric /// copied from the upper 96 bits of the first parameter. 13890b57cec5SDimitry Andric /// 13900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13910b57cec5SDimitry Andric /// 13920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. 13930b57cec5SDimitry Andric /// 13940b57cec5SDimitry Andric /// \param __a 13950b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are 13960b57cec5SDimitry Andric /// copied to the upper 96 bits of the result. 13970b57cec5SDimitry Andric /// \param __b 13980b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision 13990b57cec5SDimitry Andric /// floating-point element is used in the conversion. 14000b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the 14010b57cec5SDimitry Andric /// converted value from the second parameter. The upper 96 bits are copied 14020b57cec5SDimitry Andric /// from the upper 96 bits of the first parameter. 14030b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 14040b57cec5SDimitry Andric _mm_cvtsd_ss(__m128 __a, __m128d __b) 14050b57cec5SDimitry Andric { 14060b57cec5SDimitry Andric return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); 14070b57cec5SDimitry Andric } 14080b57cec5SDimitry Andric 14090b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into 14100b57cec5SDimitry Andric /// a double-precision floating-point value, returned in the lower 64 bits of 14110b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 14120b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 14130b57cec5SDimitry Andric /// 14140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14150b57cec5SDimitry Andric /// 14160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 14170b57cec5SDimitry Andric /// 14180b57cec5SDimitry Andric /// \param __a 14190b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 14200b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 14210b57cec5SDimitry Andric /// \param __b 14220b57cec5SDimitry Andric /// A 32-bit signed integer containing the value to be converted. 14230b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 14240b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 14250b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 14260b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 14270b57cec5SDimitry Andric _mm_cvtsi32_sd(__m128d __a, int __b) 14280b57cec5SDimitry Andric { 14290b57cec5SDimitry Andric __a[0] = __b; 14300b57cec5SDimitry Andric return __a; 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a 14340b57cec5SDimitry Andric /// 128-bit vector of [4 x float], in the second parameter, into a 14350b57cec5SDimitry Andric /// double-precision floating-point value, returned in the lower 64 bits of 14360b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 14370b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 14380b57cec5SDimitry Andric /// 14390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14400b57cec5SDimitry Andric /// 14410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. 14420b57cec5SDimitry Andric /// 14430b57cec5SDimitry Andric /// \param __a 14440b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 14450b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 14460b57cec5SDimitry Andric /// \param __b 14470b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower single-precision 14480b57cec5SDimitry Andric /// floating-point element is used in the conversion. 14490b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 14500b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 14510b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 14520b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 14530b57cec5SDimitry Andric _mm_cvtss_sd(__m128d __a, __m128 __b) 14540b57cec5SDimitry Andric { 14550b57cec5SDimitry Andric __a[0] = __b[0]; 14560b57cec5SDimitry Andric return __a; 14570b57cec5SDimitry Andric } 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14600b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14610b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. 14620b57cec5SDimitry Andric /// 14630b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 14640b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. The upper 14650b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 14660b57cec5SDimitry Andric /// 14670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14680b57cec5SDimitry Andric /// 14690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> 14700b57cec5SDimitry Andric /// instruction. 14710b57cec5SDimitry Andric /// 14720b57cec5SDimitry Andric /// \param __a 14730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14740b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 14750b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 14760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 14770b57cec5SDimitry Andric _mm_cvttpd_epi32(__m128d __a) 14780b57cec5SDimitry Andric { 14790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); 14800b57cec5SDimitry Andric } 14810b57cec5SDimitry Andric 14820b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit 14830b57cec5SDimitry Andric /// signed integer value, truncating the result when it is inexact. 14840b57cec5SDimitry Andric /// 14850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14860b57cec5SDimitry Andric /// 14870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 14880b57cec5SDimitry Andric /// instruction. 14890b57cec5SDimitry Andric /// 14900b57cec5SDimitry Andric /// \param __a 14910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 14920b57cec5SDimitry Andric /// conversion. 14930b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 14940b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 14950b57cec5SDimitry Andric _mm_cvttsd_si32(__m128d __a) 14960b57cec5SDimitry Andric { 14970b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si((__v2df)__a); 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 15010b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 15020b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 15030b57cec5SDimitry Andric /// 15040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15050b57cec5SDimitry Andric /// 15060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. 15070b57cec5SDimitry Andric /// 15080b57cec5SDimitry Andric /// \param __a 15090b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 15100b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 15110b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 15120b57cec5SDimitry Andric _mm_cvtpd_pi32(__m128d __a) 15130b57cec5SDimitry Andric { 15140b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); 15150b57cec5SDimitry Andric } 15160b57cec5SDimitry Andric 15170b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 15180b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 15190b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 15200b57cec5SDimitry Andric /// 15210b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 15220b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. 15230b57cec5SDimitry Andric /// 15240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15250b57cec5SDimitry Andric /// 15260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. 15270b57cec5SDimitry Andric /// 15280b57cec5SDimitry Andric /// \param __a 15290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 15300b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 15310b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 15320b57cec5SDimitry Andric _mm_cvttpd_pi32(__m128d __a) 15330b57cec5SDimitry Andric { 15340b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric 15370b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of 15380b57cec5SDimitry Andric /// [2 x i32] into two double-precision floating-point values, returned in a 15390b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 15400b57cec5SDimitry Andric /// 15410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15420b57cec5SDimitry Andric /// 15430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. 15440b57cec5SDimitry Andric /// 15450b57cec5SDimitry Andric /// \param __a 15460b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 15470b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 15480b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX 15490b57cec5SDimitry Andric _mm_cvtpi32_pd(__m64 __a) 15500b57cec5SDimitry Andric { 15510b57cec5SDimitry Andric return __builtin_ia32_cvtpi2pd((__v2si)__a); 15520b57cec5SDimitry Andric } 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as 15550b57cec5SDimitry Andric /// a double-precision floating-point value. 15560b57cec5SDimitry Andric /// 15570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15580b57cec5SDimitry Andric /// 15590b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 15600b57cec5SDimitry Andric /// 15610b57cec5SDimitry Andric /// \param __a 15620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. 15630b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64 15640b57cec5SDimitry Andric /// bits of \a __a. 15650b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS 15660b57cec5SDimitry Andric _mm_cvtsd_f64(__m128d __a) 15670b57cec5SDimitry Andric { 15680b57cec5SDimitry Andric return __a[0]; 15690b57cec5SDimitry Andric } 15700b57cec5SDimitry Andric 15710b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned 15720b57cec5SDimitry Andric /// memory location. 15730b57cec5SDimitry Andric /// 15740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15750b57cec5SDimitry Andric /// 15760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. 15770b57cec5SDimitry Andric /// 15780b57cec5SDimitry Andric /// \param __dp 15790b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15800b57cec5SDimitry Andric /// location has to be 16-byte aligned. 15810b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 15820b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 15830b57cec5SDimitry Andric _mm_load_pd(double const *__dp) 15840b57cec5SDimitry Andric { 1585480093f4SDimitry Andric return *(const __m128d*)__dp; 15860b57cec5SDimitry Andric } 15870b57cec5SDimitry Andric 15880b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory 15890b57cec5SDimitry Andric /// location and duplicates it to both vector elements of a 128-bit vector of 15900b57cec5SDimitry Andric /// [2 x double]. 15910b57cec5SDimitry Andric /// 15920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15930b57cec5SDimitry Andric /// 15940b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. 15950b57cec5SDimitry Andric /// 15960b57cec5SDimitry Andric /// \param __dp 15970b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 15980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and 15990b57cec5SDimitry Andric /// duplicated values. 16000b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 16010b57cec5SDimitry Andric _mm_load1_pd(double const *__dp) 16020b57cec5SDimitry Andric { 16030b57cec5SDimitry Andric struct __mm_load1_pd_struct { 16040b57cec5SDimitry Andric double __u; 16050b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1606480093f4SDimitry Andric double __u = ((const struct __mm_load1_pd_struct*)__dp)->__u; 16070b57cec5SDimitry Andric return __extension__ (__m128d){ __u, __u }; 16080b57cec5SDimitry Andric } 16090b57cec5SDimitry Andric 16100b57cec5SDimitry Andric #define _mm_load_pd1(dp) _mm_load1_pd(dp) 16110b57cec5SDimitry Andric 16120b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned 16130b57cec5SDimitry Andric /// memory location into a 128-bit vector of [2 x double]. 16140b57cec5SDimitry Andric /// 16150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16160b57cec5SDimitry Andric /// 16170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + 16180b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined 16190b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. 16200b57cec5SDimitry Andric /// 16210b57cec5SDimitry Andric /// \param __dp 16220b57cec5SDimitry Andric /// A 16-byte aligned pointer to an array of double-precision values to be 16230b57cec5SDimitry Andric /// loaded in reverse order. 16240b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded 16250b57cec5SDimitry Andric /// values. 16260b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 16270b57cec5SDimitry Andric _mm_loadr_pd(double const *__dp) 16280b57cec5SDimitry Andric { 1629480093f4SDimitry Andric __m128d __u = *(const __m128d*)__dp; 16300b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); 16310b57cec5SDimitry Andric } 16320b57cec5SDimitry Andric 16330b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an 16340b57cec5SDimitry Andric /// unaligned memory location. 16350b57cec5SDimitry Andric /// 16360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16370b57cec5SDimitry Andric /// 16380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 16390b57cec5SDimitry Andric /// 16400b57cec5SDimitry Andric /// \param __dp 16410b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 16420b57cec5SDimitry Andric /// location does not have to be aligned. 16430b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 16440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 16450b57cec5SDimitry Andric _mm_loadu_pd(double const *__dp) 16460b57cec5SDimitry Andric { 16470b57cec5SDimitry Andric struct __loadu_pd { 16480b57cec5SDimitry Andric __m128d_u __v; 16490b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1650480093f4SDimitry Andric return ((const struct __loadu_pd*)__dp)->__v; 16510b57cec5SDimitry Andric } 16520b57cec5SDimitry Andric 16530b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer 16540b57cec5SDimitry Andric /// vector and clears the upper element. 16550b57cec5SDimitry Andric /// 16560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16570b57cec5SDimitry Andric /// 16580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 16590b57cec5SDimitry Andric /// 16600b57cec5SDimitry Andric /// \param __a 16610b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 16620b57cec5SDimitry Andric /// location does not have to be aligned. 16630b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value. 16640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 16650b57cec5SDimitry Andric _mm_loadu_si64(void const *__a) 16660b57cec5SDimitry Andric { 16670b57cec5SDimitry Andric struct __loadu_si64 { 16680b57cec5SDimitry Andric long long __v; 16690b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1670480093f4SDimitry Andric long long __u = ((const struct __loadu_si64*)__a)->__v; 16710b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){__u, 0LL}; 16720b57cec5SDimitry Andric } 16730b57cec5SDimitry Andric 16740b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer 16750b57cec5SDimitry Andric /// vector and clears the upper element. 16760b57cec5SDimitry Andric /// 16770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16780b57cec5SDimitry Andric /// 16790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 16800b57cec5SDimitry Andric /// 16810b57cec5SDimitry Andric /// \param __a 16820b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 16830b57cec5SDimitry Andric /// location does not have to be aligned. 16840b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value. 16850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 16860b57cec5SDimitry Andric _mm_loadu_si32(void const *__a) 16870b57cec5SDimitry Andric { 16880b57cec5SDimitry Andric struct __loadu_si32 { 16890b57cec5SDimitry Andric int __v; 16900b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1691480093f4SDimitry Andric int __u = ((const struct __loadu_si32*)__a)->__v; 16920b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){__u, 0, 0, 0}; 16930b57cec5SDimitry Andric } 16940b57cec5SDimitry Andric 16950b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer 16960b57cec5SDimitry Andric /// vector and clears the upper element. 16970b57cec5SDimitry Andric /// 16980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16990b57cec5SDimitry Andric /// 17000b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 17010b57cec5SDimitry Andric /// 17020b57cec5SDimitry Andric /// \param __a 17030b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 17040b57cec5SDimitry Andric /// location does not have to be aligned. 17050b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value. 17060b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 17070b57cec5SDimitry Andric _mm_loadu_si16(void const *__a) 17080b57cec5SDimitry Andric { 17090b57cec5SDimitry Andric struct __loadu_si16 { 17100b57cec5SDimitry Andric short __v; 17110b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1712480093f4SDimitry Andric short __u = ((const struct __loadu_si16*)__a)->__v; 17130b57cec5SDimitry Andric return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; 17140b57cec5SDimitry Andric } 17150b57cec5SDimitry Andric 17160b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a 17170b57cec5SDimitry Andric /// 128-bit integer vector and clears the upper element. 17180b57cec5SDimitry Andric /// 17190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17200b57cec5SDimitry Andric /// 17210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 17220b57cec5SDimitry Andric /// 17230b57cec5SDimitry Andric /// \param __dp 17240b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 17250b57cec5SDimitry Andric /// The address of the memory location does not have to be aligned. 17260b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value. 17270b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17280b57cec5SDimitry Andric _mm_load_sd(double const *__dp) 17290b57cec5SDimitry Andric { 17300b57cec5SDimitry Andric struct __mm_load_sd_struct { 17310b57cec5SDimitry Andric double __u; 17320b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1733480093f4SDimitry Andric double __u = ((const struct __mm_load_sd_struct*)__dp)->__u; 17340b57cec5SDimitry Andric return __extension__ (__m128d){ __u, 0 }; 17350b57cec5SDimitry Andric } 17360b57cec5SDimitry Andric 17370b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit 17380b57cec5SDimitry Andric /// vector of [2 x double]. The low-order bits are copied from the low-order 17390b57cec5SDimitry Andric /// bits of the first operand. 17400b57cec5SDimitry Andric /// 17410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17420b57cec5SDimitry Andric /// 17430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 17440b57cec5SDimitry Andric /// 17450b57cec5SDimitry Andric /// \param __a 17460b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 17470b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the result. 17480b57cec5SDimitry Andric /// \param __dp 17490b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 17500b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 17510b57cec5SDimitry Andric /// [127:64] of the result. The address of the memory location does not have 17520b57cec5SDimitry Andric /// to be aligned. 17530b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 17540b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17550b57cec5SDimitry Andric _mm_loadh_pd(__m128d __a, double const *__dp) 17560b57cec5SDimitry Andric { 17570b57cec5SDimitry Andric struct __mm_loadh_pd_struct { 17580b57cec5SDimitry Andric double __u; 17590b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1760480093f4SDimitry Andric double __u = ((const struct __mm_loadh_pd_struct*)__dp)->__u; 17610b57cec5SDimitry Andric return __extension__ (__m128d){ __a[0], __u }; 17620b57cec5SDimitry Andric } 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit 17650b57cec5SDimitry Andric /// vector of [2 x double]. The high-order bits are copied from the 17660b57cec5SDimitry Andric /// high-order bits of the first operand. 17670b57cec5SDimitry Andric /// 17680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17690b57cec5SDimitry Andric /// 17700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 17710b57cec5SDimitry Andric /// 17720b57cec5SDimitry Andric /// \param __a 17730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 17740b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the result. 17750b57cec5SDimitry Andric /// \param __dp 17760b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 17770b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 17780b57cec5SDimitry Andric /// [63:0] of the result. The address of the memory location does not have to 17790b57cec5SDimitry Andric /// be aligned. 17800b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 17810b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 17820b57cec5SDimitry Andric _mm_loadl_pd(__m128d __a, double const *__dp) 17830b57cec5SDimitry Andric { 17840b57cec5SDimitry Andric struct __mm_loadl_pd_struct { 17850b57cec5SDimitry Andric double __u; 17860b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1787480093f4SDimitry Andric double __u = ((const struct __mm_loadl_pd_struct*)__dp)->__u; 17880b57cec5SDimitry Andric return __extension__ (__m128d){ __u, __a[1] }; 17890b57cec5SDimitry Andric } 17900b57cec5SDimitry Andric 17910b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with 17920b57cec5SDimitry Andric /// unspecified content. This could be used as an argument to another 17930b57cec5SDimitry Andric /// intrinsic function where the argument is required but the value is not 17940b57cec5SDimitry Andric /// actually used. 17950b57cec5SDimitry Andric /// 17960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17970b57cec5SDimitry Andric /// 17980b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 17990b57cec5SDimitry Andric /// 18000b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified 18010b57cec5SDimitry Andric /// content. 18020b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18030b57cec5SDimitry Andric _mm_undefined_pd(void) 18040b57cec5SDimitry Andric { 18050b57cec5SDimitry Andric return (__m128d)__builtin_ia32_undef128(); 18060b57cec5SDimitry Andric } 18070b57cec5SDimitry Andric 18080b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 18090b57cec5SDimitry Andric /// 64 bits of the vector are initialized with the specified double-precision 18100b57cec5SDimitry Andric /// floating-point value. The upper 64 bits are set to zero. 18110b57cec5SDimitry Andric /// 18120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18130b57cec5SDimitry Andric /// 18140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 18150b57cec5SDimitry Andric /// 18160b57cec5SDimitry Andric /// \param __w 18170b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18180b57cec5SDimitry Andric /// bits of the result. 18190b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The 18200b57cec5SDimitry Andric /// lower 64 bits contain the value of the parameter. The upper 64 bits are 18210b57cec5SDimitry Andric /// set to zero. 18220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18230b57cec5SDimitry Andric _mm_set_sd(double __w) 18240b57cec5SDimitry Andric { 18250b57cec5SDimitry Andric return __extension__ (__m128d){ __w, 0 }; 18260b57cec5SDimitry Andric } 18270b57cec5SDimitry Andric 18280b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 18290b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 18300b57cec5SDimitry Andric /// specified double-precision floating-point value. 18310b57cec5SDimitry Andric /// 18320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18330b57cec5SDimitry Andric /// 18340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 18350b57cec5SDimitry Andric /// 18360b57cec5SDimitry Andric /// \param __w 18370b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 18380b57cec5SDimitry Andric /// element of the result. 18390b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18410b57cec5SDimitry Andric _mm_set1_pd(double __w) 18420b57cec5SDimitry Andric { 18430b57cec5SDimitry Andric return __extension__ (__m128d){ __w, __w }; 18440b57cec5SDimitry Andric } 18450b57cec5SDimitry Andric 18460b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 18470b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 18480b57cec5SDimitry Andric /// specified double-precision floating-point value. 18490b57cec5SDimitry Andric /// 18500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18510b57cec5SDimitry Andric /// 18520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 18530b57cec5SDimitry Andric /// 18540b57cec5SDimitry Andric /// \param __w 18550b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 18560b57cec5SDimitry Andric /// element of the result. 18570b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18580b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18590b57cec5SDimitry Andric _mm_set_pd1(double __w) 18600b57cec5SDimitry Andric { 18610b57cec5SDimitry Andric return _mm_set1_pd(__w); 18620b57cec5SDimitry Andric } 18630b57cec5SDimitry Andric 18640b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 18650b57cec5SDimitry Andric /// initialized with the specified double-precision floating-point values. 18660b57cec5SDimitry Andric /// 18670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18680b57cec5SDimitry Andric /// 18690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 18700b57cec5SDimitry Andric /// 18710b57cec5SDimitry Andric /// \param __w 18720b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18730b57cec5SDimitry Andric /// bits of the result. 18740b57cec5SDimitry Andric /// \param __x 18750b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18760b57cec5SDimitry Andric /// bits of the result. 18770b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18780b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 18790b57cec5SDimitry Andric _mm_set_pd(double __w, double __x) 18800b57cec5SDimitry Andric { 18810b57cec5SDimitry Andric return __extension__ (__m128d){ __x, __w }; 18820b57cec5SDimitry Andric } 18830b57cec5SDimitry Andric 18840b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], 18850b57cec5SDimitry Andric /// initialized in reverse order with the specified double-precision 18860b57cec5SDimitry Andric /// floating-point values. 18870b57cec5SDimitry Andric /// 18880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18890b57cec5SDimitry Andric /// 18900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 18910b57cec5SDimitry Andric /// 18920b57cec5SDimitry Andric /// \param __w 18930b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18940b57cec5SDimitry Andric /// bits of the result. 18950b57cec5SDimitry Andric /// \param __x 18960b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18970b57cec5SDimitry Andric /// bits of the result. 18980b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 18990b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 19000b57cec5SDimitry Andric _mm_setr_pd(double __w, double __x) 19010b57cec5SDimitry Andric { 19020b57cec5SDimitry Andric return __extension__ (__m128d){ __w, __x }; 19030b57cec5SDimitry Andric } 19040b57cec5SDimitry Andric 19050b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 19060b57cec5SDimitry Andric /// initialized to zero. 19070b57cec5SDimitry Andric /// 19080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19090b57cec5SDimitry Andric /// 19100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 19110b57cec5SDimitry Andric /// 19120b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with 19130b57cec5SDimitry Andric /// all elements set to zero. 19140b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 19150b57cec5SDimitry Andric _mm_setzero_pd(void) 19160b57cec5SDimitry Andric { 19170b57cec5SDimitry Andric return __extension__ (__m128d){ 0, 0 }; 19180b57cec5SDimitry Andric } 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 19210b57cec5SDimitry Andric /// 64 bits are set to the lower 64 bits of the second parameter. The upper 19220b57cec5SDimitry Andric /// 64 bits are set to the upper 64 bits of the first parameter. 19230b57cec5SDimitry Andric /// 19240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19250b57cec5SDimitry Andric /// 19260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 19270b57cec5SDimitry Andric /// 19280b57cec5SDimitry Andric /// \param __a 19290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the 19300b57cec5SDimitry Andric /// upper 64 bits of the result. 19310b57cec5SDimitry Andric /// \param __b 19320b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the 19330b57cec5SDimitry Andric /// lower 64 bits of the result. 19340b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 19350b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 19360b57cec5SDimitry Andric _mm_move_sd(__m128d __a, __m128d __b) 19370b57cec5SDimitry Andric { 19380b57cec5SDimitry Andric __a[0] = __b[0]; 19390b57cec5SDimitry Andric return __a; 19400b57cec5SDimitry Andric } 19410b57cec5SDimitry Andric 19420b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 19430b57cec5SDimitry Andric /// memory location. 19440b57cec5SDimitry Andric /// 19450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19460b57cec5SDimitry Andric /// 19470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 19480b57cec5SDimitry Andric /// 19490b57cec5SDimitry Andric /// \param __dp 19500b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 19510b57cec5SDimitry Andric /// \param __a 19520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 19530b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 19540b57cec5SDimitry Andric _mm_store_sd(double *__dp, __m128d __a) 19550b57cec5SDimitry Andric { 19560b57cec5SDimitry Andric struct __mm_store_sd_struct { 19570b57cec5SDimitry Andric double __u; 19580b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19590b57cec5SDimitry Andric ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; 19600b57cec5SDimitry Andric } 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of 19630b57cec5SDimitry Andric /// [2 x double] to a memory location. 19640b57cec5SDimitry Andric /// 19650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19660b57cec5SDimitry Andric /// 19670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. 19680b57cec5SDimitry Andric /// 19690b57cec5SDimitry Andric /// \param __dp 19700b57cec5SDimitry Andric /// A pointer to an aligned memory location that can store two 19710b57cec5SDimitry Andric /// double-precision values. 19720b57cec5SDimitry Andric /// \param __a 19730b57cec5SDimitry Andric /// A packed 128-bit vector of [2 x double] containing the values to be 19740b57cec5SDimitry Andric /// moved. 19750b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 19760b57cec5SDimitry Andric _mm_store_pd(double *__dp, __m128d __a) 19770b57cec5SDimitry Andric { 19780b57cec5SDimitry Andric *(__m128d*)__dp = __a; 19790b57cec5SDimitry Andric } 19800b57cec5SDimitry Andric 19810b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 19820b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 19830b57cec5SDimitry Andric /// 19840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19850b57cec5SDimitry Andric /// 19860b57cec5SDimitry Andric /// This intrinsic corresponds to the 19870b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 19880b57cec5SDimitry Andric /// 19890b57cec5SDimitry Andric /// \param __dp 19900b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 19910b57cec5SDimitry Andric /// values. 19920b57cec5SDimitry Andric /// \param __a 19930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 19940b57cec5SDimitry Andric /// of the values in \a __dp. 19950b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 19960b57cec5SDimitry Andric _mm_store1_pd(double *__dp, __m128d __a) 19970b57cec5SDimitry Andric { 19980b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 19990b57cec5SDimitry Andric _mm_store_pd(__dp, __a); 20000b57cec5SDimitry Andric } 20010b57cec5SDimitry Andric 20020b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 20030b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 20040b57cec5SDimitry Andric /// 20050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20060b57cec5SDimitry Andric /// 20070b57cec5SDimitry Andric /// This intrinsic corresponds to the 20080b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 20090b57cec5SDimitry Andric /// 20100b57cec5SDimitry Andric /// \param __dp 20110b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 20120b57cec5SDimitry Andric /// values. 20130b57cec5SDimitry Andric /// \param __a 20140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 20150b57cec5SDimitry Andric /// of the values in \a __dp. 20160b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20170b57cec5SDimitry Andric _mm_store_pd1(double *__dp, __m128d __a) 20180b57cec5SDimitry Andric { 20190b57cec5SDimitry Andric _mm_store1_pd(__dp, __a); 20200b57cec5SDimitry Andric } 20210b57cec5SDimitry Andric 20220b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory 20230b57cec5SDimitry Andric /// location. 20240b57cec5SDimitry Andric /// 20250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20260b57cec5SDimitry Andric /// 20270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 20280b57cec5SDimitry Andric /// 20290b57cec5SDimitry Andric /// \param __dp 20300b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 20310b57cec5SDimitry Andric /// location does not have to be aligned. 20320b57cec5SDimitry Andric /// \param __a 20330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be stored. 20340b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20350b57cec5SDimitry Andric _mm_storeu_pd(double *__dp, __m128d __a) 20360b57cec5SDimitry Andric { 20370b57cec5SDimitry Andric struct __storeu_pd { 20380b57cec5SDimitry Andric __m128d_u __v; 20390b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20400b57cec5SDimitry Andric ((struct __storeu_pd*)__dp)->__v = __a; 20410b57cec5SDimitry Andric } 20420b57cec5SDimitry Andric 20430b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit 20440b57cec5SDimitry Andric /// vector of [2 x double] to a 16-byte aligned memory location. 20450b57cec5SDimitry Andric /// 20460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20470b57cec5SDimitry Andric /// 20480b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a 20490b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction. 20500b57cec5SDimitry Andric /// 20510b57cec5SDimitry Andric /// \param __dp 20520b57cec5SDimitry Andric /// A pointer to a 16-byte aligned memory location that can store two 20530b57cec5SDimitry Andric /// double-precision values. 20540b57cec5SDimitry Andric /// \param __a 20550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be reversed and 20560b57cec5SDimitry Andric /// stored. 20570b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20580b57cec5SDimitry Andric _mm_storer_pd(double *__dp, __m128d __a) 20590b57cec5SDimitry Andric { 20600b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); 20610b57cec5SDimitry Andric *(__m128d *)__dp = __a; 20620b57cec5SDimitry Andric } 20630b57cec5SDimitry Andric 20640b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a 20650b57cec5SDimitry Andric /// memory location. 20660b57cec5SDimitry Andric /// 20670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20680b57cec5SDimitry Andric /// 20690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 20700b57cec5SDimitry Andric /// 20710b57cec5SDimitry Andric /// \param __dp 20720b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 20730b57cec5SDimitry Andric /// \param __a 20740b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 20750b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20760b57cec5SDimitry Andric _mm_storeh_pd(double *__dp, __m128d __a) 20770b57cec5SDimitry Andric { 20780b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 20790b57cec5SDimitry Andric double __u; 20800b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20810b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; 20820b57cec5SDimitry Andric } 20830b57cec5SDimitry Andric 20840b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 20850b57cec5SDimitry Andric /// memory location. 20860b57cec5SDimitry Andric /// 20870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20880b57cec5SDimitry Andric /// 20890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 20900b57cec5SDimitry Andric /// 20910b57cec5SDimitry Andric /// \param __dp 20920b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 20930b57cec5SDimitry Andric /// \param __a 20940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 20950b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 20960b57cec5SDimitry Andric _mm_storel_pd(double *__dp, __m128d __a) 20970b57cec5SDimitry Andric { 20980b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 20990b57cec5SDimitry Andric double __u; 21000b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 21010b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; 21020b57cec5SDimitry Andric } 21030b57cec5SDimitry Andric 21040b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], 21050b57cec5SDimitry Andric /// saving the lower 8 bits of each sum in the corresponding element of a 21060b57cec5SDimitry Andric /// 128-bit result vector of [16 x i8]. 21070b57cec5SDimitry Andric /// 21080b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21090b57cec5SDimitry Andric /// 21100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21110b57cec5SDimitry Andric /// 21120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. 21130b57cec5SDimitry Andric /// 21140b57cec5SDimitry Andric /// \param __a 21150b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 21160b57cec5SDimitry Andric /// \param __b 21170b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 21180b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both 21190b57cec5SDimitry Andric /// parameters. 21200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 21210b57cec5SDimitry Andric _mm_add_epi8(__m128i __a, __m128i __b) 21220b57cec5SDimitry Andric { 21230b57cec5SDimitry Andric return (__m128i)((__v16qu)__a + (__v16qu)__b); 21240b57cec5SDimitry Andric } 21250b57cec5SDimitry Andric 21260b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], 21270b57cec5SDimitry Andric /// saving the lower 16 bits of each sum in the corresponding element of a 21280b57cec5SDimitry Andric /// 128-bit result vector of [8 x i16]. 21290b57cec5SDimitry Andric /// 21300b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21310b57cec5SDimitry Andric /// 21320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21330b57cec5SDimitry Andric /// 21340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. 21350b57cec5SDimitry Andric /// 21360b57cec5SDimitry Andric /// \param __a 21370b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 21380b57cec5SDimitry Andric /// \param __b 21390b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 21400b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both 21410b57cec5SDimitry Andric /// parameters. 21420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 21430b57cec5SDimitry Andric _mm_add_epi16(__m128i __a, __m128i __b) 21440b57cec5SDimitry Andric { 21450b57cec5SDimitry Andric return (__m128i)((__v8hu)__a + (__v8hu)__b); 21460b57cec5SDimitry Andric } 21470b57cec5SDimitry Andric 21480b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], 21490b57cec5SDimitry Andric /// saving the lower 32 bits of each sum in the corresponding element of a 21500b57cec5SDimitry Andric /// 128-bit result vector of [4 x i32]. 21510b57cec5SDimitry Andric /// 21520b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21530b57cec5SDimitry Andric /// 21540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21550b57cec5SDimitry Andric /// 21560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. 21570b57cec5SDimitry Andric /// 21580b57cec5SDimitry Andric /// \param __a 21590b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 21600b57cec5SDimitry Andric /// \param __b 21610b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 21620b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both 21630b57cec5SDimitry Andric /// parameters. 21640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 21650b57cec5SDimitry Andric _mm_add_epi32(__m128i __a, __m128i __b) 21660b57cec5SDimitry Andric { 21670b57cec5SDimitry Andric return (__m128i)((__v4su)__a + (__v4su)__b); 21680b57cec5SDimitry Andric } 21690b57cec5SDimitry Andric 21700b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the 21710b57cec5SDimitry Andric /// lower 64 bits of the sum. 21720b57cec5SDimitry Andric /// 21730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21740b57cec5SDimitry Andric /// 21750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction. 21760b57cec5SDimitry Andric /// 21770b57cec5SDimitry Andric /// \param __a 21780b57cec5SDimitry Andric /// A 64-bit integer. 21790b57cec5SDimitry Andric /// \param __b 21800b57cec5SDimitry Andric /// A 64-bit integer. 21810b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters. 21820b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 21830b57cec5SDimitry Andric _mm_add_si64(__m64 __a, __m64 __b) 21840b57cec5SDimitry Andric { 21850b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); 21860b57cec5SDimitry Andric } 21870b57cec5SDimitry Andric 21880b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], 21890b57cec5SDimitry Andric /// saving the lower 64 bits of each sum in the corresponding element of a 21900b57cec5SDimitry Andric /// 128-bit result vector of [2 x i64]. 21910b57cec5SDimitry Andric /// 21920b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21930b57cec5SDimitry Andric /// 21940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21950b57cec5SDimitry Andric /// 21960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. 21970b57cec5SDimitry Andric /// 21980b57cec5SDimitry Andric /// \param __a 21990b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 22000b57cec5SDimitry Andric /// \param __b 22010b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 22020b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both 22030b57cec5SDimitry Andric /// parameters. 22040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22050b57cec5SDimitry Andric _mm_add_epi64(__m128i __a, __m128i __b) 22060b57cec5SDimitry Andric { 22070b57cec5SDimitry Andric return (__m128i)((__v2du)__a + (__v2du)__b); 22080b57cec5SDimitry Andric } 22090b57cec5SDimitry Andric 22100b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22110b57cec5SDimitry Andric /// signed [16 x i8] vectors, saving each sum in the corresponding element of 22120b57cec5SDimitry Andric /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are 22130b57cec5SDimitry Andric /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. 22140b57cec5SDimitry Andric /// 22150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22160b57cec5SDimitry Andric /// 22170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. 22180b57cec5SDimitry Andric /// 22190b57cec5SDimitry Andric /// \param __a 22200b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 22210b57cec5SDimitry Andric /// \param __b 22220b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 22230b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of 22240b57cec5SDimitry Andric /// both parameters. 22250b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22260b57cec5SDimitry Andric _mm_adds_epi8(__m128i __a, __m128i __b) 22270b57cec5SDimitry Andric { 22280b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); 22290b57cec5SDimitry Andric } 22300b57cec5SDimitry Andric 22310b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22320b57cec5SDimitry Andric /// signed [8 x i16] vectors, saving each sum in the corresponding element of 22330b57cec5SDimitry Andric /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF 22340b57cec5SDimitry Andric /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 22350b57cec5SDimitry Andric /// 0x8000. 22360b57cec5SDimitry Andric /// 22370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22380b57cec5SDimitry Andric /// 22390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. 22400b57cec5SDimitry Andric /// 22410b57cec5SDimitry Andric /// \param __a 22420b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22430b57cec5SDimitry Andric /// \param __b 22440b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22450b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of 22460b57cec5SDimitry Andric /// both parameters. 22470b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22480b57cec5SDimitry Andric _mm_adds_epi16(__m128i __a, __m128i __b) 22490b57cec5SDimitry Andric { 22500b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); 22510b57cec5SDimitry Andric } 22520b57cec5SDimitry Andric 22530b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22540b57cec5SDimitry Andric /// unsigned [16 x i8] vectors, saving each sum in the corresponding element 22550b57cec5SDimitry Andric /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF 22560b57cec5SDimitry Andric /// are saturated to 0xFF. Negative sums are saturated to 0x00. 22570b57cec5SDimitry Andric /// 22580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22590b57cec5SDimitry Andric /// 22600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 22610b57cec5SDimitry Andric /// 22620b57cec5SDimitry Andric /// \param __a 22630b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22640b57cec5SDimitry Andric /// \param __b 22650b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22660b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums 22670b57cec5SDimitry Andric /// of both parameters. 22680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22690b57cec5SDimitry Andric _mm_adds_epu8(__m128i __a, __m128i __b) 22700b57cec5SDimitry Andric { 22710b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); 22720b57cec5SDimitry Andric } 22730b57cec5SDimitry Andric 22740b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22750b57cec5SDimitry Andric /// unsigned [8 x i16] vectors, saving each sum in the corresponding element 22760b57cec5SDimitry Andric /// of a 128-bit result vector of [8 x i16]. Positive sums greater than 22770b57cec5SDimitry Andric /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. 22780b57cec5SDimitry Andric /// 22790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22800b57cec5SDimitry Andric /// 22810b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 22820b57cec5SDimitry Andric /// 22830b57cec5SDimitry Andric /// \param __a 22840b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22850b57cec5SDimitry Andric /// \param __b 22860b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22870b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums 22880b57cec5SDimitry Andric /// of both parameters. 22890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 22900b57cec5SDimitry Andric _mm_adds_epu16(__m128i __a, __m128i __b) 22910b57cec5SDimitry Andric { 22920b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); 22930b57cec5SDimitry Andric } 22940b57cec5SDimitry Andric 2295480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 22960b57cec5SDimitry Andric /// 128-bit unsigned [16 x i8] vectors, saving each result in the 22970b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 22980b57cec5SDimitry Andric /// 22990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23000b57cec5SDimitry Andric /// 23010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. 23020b57cec5SDimitry Andric /// 23030b57cec5SDimitry Andric /// \param __a 23040b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23050b57cec5SDimitry Andric /// \param __b 23060b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23070b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded 23080b57cec5SDimitry Andric /// averages of both parameters. 23090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23100b57cec5SDimitry Andric _mm_avg_epu8(__m128i __a, __m128i __b) 23110b57cec5SDimitry Andric { 23120b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 23130b57cec5SDimitry Andric } 23140b57cec5SDimitry Andric 2315480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 23160b57cec5SDimitry Andric /// 128-bit unsigned [8 x i16] vectors, saving each result in the 23170b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 23180b57cec5SDimitry Andric /// 23190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23200b57cec5SDimitry Andric /// 23210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. 23220b57cec5SDimitry Andric /// 23230b57cec5SDimitry Andric /// \param __a 23240b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23250b57cec5SDimitry Andric /// \param __b 23260b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23270b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded 23280b57cec5SDimitry Andric /// averages of both parameters. 23290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23300b57cec5SDimitry Andric _mm_avg_epu16(__m128i __a, __m128i __b) 23310b57cec5SDimitry Andric { 23320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 23330b57cec5SDimitry Andric } 23340b57cec5SDimitry Andric 23350b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] 23360b57cec5SDimitry Andric /// vectors, producing eight intermediate 32-bit signed integer products, and 23370b57cec5SDimitry Andric /// adds the consecutive pairs of 32-bit products to form a 128-bit signed 23380b57cec5SDimitry Andric /// [4 x i32] vector. 23390b57cec5SDimitry Andric /// 23400b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied producing a 23410b57cec5SDimitry Andric /// 32-bit product, bits [31:16] of both parameters are multiplied producing 23420b57cec5SDimitry Andric /// a 32-bit product, and the sum of those two products becomes bits [31:0] 23430b57cec5SDimitry Andric /// of the result. 23440b57cec5SDimitry Andric /// 23450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23460b57cec5SDimitry Andric /// 23470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. 23480b57cec5SDimitry Andric /// 23490b57cec5SDimitry Andric /// \param __a 23500b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23510b57cec5SDimitry Andric /// \param __b 23520b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23530b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products 23540b57cec5SDimitry Andric /// of both parameters. 23550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23560b57cec5SDimitry Andric _mm_madd_epi16(__m128i __a, __m128i __b) 23570b57cec5SDimitry Andric { 23580b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 23590b57cec5SDimitry Andric } 23600b57cec5SDimitry Andric 23610b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 23620b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 23630b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 23640b57cec5SDimitry Andric /// 23650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23660b57cec5SDimitry Andric /// 23670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. 23680b57cec5SDimitry Andric /// 23690b57cec5SDimitry Andric /// \param __a 23700b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23710b57cec5SDimitry Andric /// \param __b 23720b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23730b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of 23740b57cec5SDimitry Andric /// each comparison. 23750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23760b57cec5SDimitry Andric _mm_max_epi16(__m128i __a, __m128i __b) 23770b57cec5SDimitry Andric { 23780b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); 23790b57cec5SDimitry Andric } 23800b57cec5SDimitry Andric 23810b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 23820b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 23830b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 23840b57cec5SDimitry Andric /// 23850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23860b57cec5SDimitry Andric /// 23870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. 23880b57cec5SDimitry Andric /// 23890b57cec5SDimitry Andric /// \param __a 23900b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23910b57cec5SDimitry Andric /// \param __b 23920b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23930b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of 23940b57cec5SDimitry Andric /// each comparison. 23950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 23960b57cec5SDimitry Andric _mm_max_epu8(__m128i __a, __m128i __b) 23970b57cec5SDimitry Andric { 23980b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); 23990b57cec5SDimitry Andric } 24000b57cec5SDimitry Andric 24010b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 24020b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 24030b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 24040b57cec5SDimitry Andric /// 24050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24060b57cec5SDimitry Andric /// 24070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. 24080b57cec5SDimitry Andric /// 24090b57cec5SDimitry Andric /// \param __a 24100b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24110b57cec5SDimitry Andric /// \param __b 24120b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24130b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of 24140b57cec5SDimitry Andric /// each comparison. 24150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24160b57cec5SDimitry Andric _mm_min_epi16(__m128i __a, __m128i __b) 24170b57cec5SDimitry Andric { 24180b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); 24190b57cec5SDimitry Andric } 24200b57cec5SDimitry Andric 24210b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 24220b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 24230b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 24240b57cec5SDimitry Andric /// 24250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24260b57cec5SDimitry Andric /// 24270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. 24280b57cec5SDimitry Andric /// 24290b57cec5SDimitry Andric /// \param __a 24300b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 24310b57cec5SDimitry Andric /// \param __b 24320b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 24330b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of 24340b57cec5SDimitry Andric /// each comparison. 24350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24360b57cec5SDimitry Andric _mm_min_epu8(__m128i __a, __m128i __b) 24370b57cec5SDimitry Andric { 24380b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); 24390b57cec5SDimitry Andric } 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 24420b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 24430b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 24440b57cec5SDimitry Andric /// 24450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24460b57cec5SDimitry Andric /// 24470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. 24480b57cec5SDimitry Andric /// 24490b57cec5SDimitry Andric /// \param __a 24500b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24510b57cec5SDimitry Andric /// \param __b 24520b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24530b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of 24540b57cec5SDimitry Andric /// each of the eight 32-bit products. 24550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24560b57cec5SDimitry Andric _mm_mulhi_epi16(__m128i __a, __m128i __b) 24570b57cec5SDimitry Andric { 24580b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 24590b57cec5SDimitry Andric } 24600b57cec5SDimitry Andric 24610b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16] 24620b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 24630b57cec5SDimitry Andric /// corresponding element of a 128-bit unsigned [8 x i16] result vector. 24640b57cec5SDimitry Andric /// 24650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24660b57cec5SDimitry Andric /// 24670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. 24680b57cec5SDimitry Andric /// 24690b57cec5SDimitry Andric /// \param __a 24700b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 24710b57cec5SDimitry Andric /// \param __b 24720b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 24730b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits 24740b57cec5SDimitry Andric /// of each of the eight 32-bit products. 24750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24760b57cec5SDimitry Andric _mm_mulhi_epu16(__m128i __a, __m128i __b) 24770b57cec5SDimitry Andric { 24780b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 24790b57cec5SDimitry Andric } 24800b57cec5SDimitry Andric 24810b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 24820b57cec5SDimitry Andric /// vectors, saving the lower 16 bits of each 32-bit product in the 24830b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 24840b57cec5SDimitry Andric /// 24850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24860b57cec5SDimitry Andric /// 24870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. 24880b57cec5SDimitry Andric /// 24890b57cec5SDimitry Andric /// \param __a 24900b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24910b57cec5SDimitry Andric /// \param __b 24920b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24930b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of 24940b57cec5SDimitry Andric /// each of the eight 32-bit products. 24950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 24960b57cec5SDimitry Andric _mm_mullo_epi16(__m128i __a, __m128i __b) 24970b57cec5SDimitry Andric { 24980b57cec5SDimitry Andric return (__m128i)((__v8hu)__a * (__v8hu)__b); 24990b57cec5SDimitry Andric } 25000b57cec5SDimitry Andric 25010b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits 25020b57cec5SDimitry Andric /// of the two 64-bit integer vectors and returns the 64-bit unsigned 25030b57cec5SDimitry Andric /// product. 25040b57cec5SDimitry Andric /// 25050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25060b57cec5SDimitry Andric /// 25070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. 25080b57cec5SDimitry Andric /// 25090b57cec5SDimitry Andric /// \param __a 25100b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 25110b57cec5SDimitry Andric /// \param __b 25120b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 25130b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands. 25140b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 25150b57cec5SDimitry Andric _mm_mul_su32(__m64 __a, __m64 __b) 25160b57cec5SDimitry Andric { 25170b57cec5SDimitry Andric return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 25180b57cec5SDimitry Andric } 25190b57cec5SDimitry Andric 25200b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower 25210b57cec5SDimitry Andric /// bits of the corresponding elements of two [2 x i64] vectors, and returns 25220b57cec5SDimitry Andric /// the 64-bit products in the corresponding elements of a [2 x i64] vector. 25230b57cec5SDimitry Andric /// 25240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25250b57cec5SDimitry Andric /// 25260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. 25270b57cec5SDimitry Andric /// 25280b57cec5SDimitry Andric /// \param __a 25290b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 25300b57cec5SDimitry Andric /// \param __b 25310b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 25320b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands. 25330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25340b57cec5SDimitry Andric _mm_mul_epu32(__m128i __a, __m128i __b) 25350b57cec5SDimitry Andric { 25360b57cec5SDimitry Andric return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 25370b57cec5SDimitry Andric } 25380b57cec5SDimitry Andric 25390b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer 25400b57cec5SDimitry Andric /// values in two 128-bit vectors. Sums the first 8 absolute differences, and 25410b57cec5SDimitry Andric /// separately sums the second 8 absolute differences. Packs these two 25420b57cec5SDimitry Andric /// unsigned 16-bit integer sums into the upper and lower elements of a 25430b57cec5SDimitry Andric /// [2 x i64] vector. 25440b57cec5SDimitry Andric /// 25450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25460b57cec5SDimitry Andric /// 25470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. 25480b57cec5SDimitry Andric /// 25490b57cec5SDimitry Andric /// \param __a 25500b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 25510b57cec5SDimitry Andric /// \param __b 25520b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 25530b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute 25540b57cec5SDimitry Andric /// differences between both operands. 25550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25560b57cec5SDimitry Andric _mm_sad_epu8(__m128i __a, __m128i __b) 25570b57cec5SDimitry Andric { 25580b57cec5SDimitry Andric return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 25590b57cec5SDimitry Andric } 25600b57cec5SDimitry Andric 25610b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands. 25620b57cec5SDimitry Andric /// 25630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25640b57cec5SDimitry Andric /// 25650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. 25660b57cec5SDimitry Andric /// 25670b57cec5SDimitry Andric /// \param __a 25680b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25690b57cec5SDimitry Andric /// \param __b 25700b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25710b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25720b57cec5SDimitry Andric /// in the operands. 25730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25740b57cec5SDimitry Andric _mm_sub_epi8(__m128i __a, __m128i __b) 25750b57cec5SDimitry Andric { 25760b57cec5SDimitry Andric return (__m128i)((__v16qu)__a - (__v16qu)__b); 25770b57cec5SDimitry Andric } 25780b57cec5SDimitry Andric 25790b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands. 25800b57cec5SDimitry Andric /// 25810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25820b57cec5SDimitry Andric /// 25830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. 25840b57cec5SDimitry Andric /// 25850b57cec5SDimitry Andric /// \param __a 25860b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25870b57cec5SDimitry Andric /// \param __b 25880b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25890b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25900b57cec5SDimitry Andric /// in the operands. 25910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 25920b57cec5SDimitry Andric _mm_sub_epi16(__m128i __a, __m128i __b) 25930b57cec5SDimitry Andric { 25940b57cec5SDimitry Andric return (__m128i)((__v8hu)__a - (__v8hu)__b); 25950b57cec5SDimitry Andric } 25960b57cec5SDimitry Andric 25970b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands. 25980b57cec5SDimitry Andric /// 25990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26000b57cec5SDimitry Andric /// 26010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. 26020b57cec5SDimitry Andric /// 26030b57cec5SDimitry Andric /// \param __a 26040b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26050b57cec5SDimitry Andric /// \param __b 26060b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26070b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26080b57cec5SDimitry Andric /// in the operands. 26090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26100b57cec5SDimitry Andric _mm_sub_epi32(__m128i __a, __m128i __b) 26110b57cec5SDimitry Andric { 26120b57cec5SDimitry Andric return (__m128i)((__v4su)__a - (__v4su)__b); 26130b57cec5SDimitry Andric } 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the 26160b57cec5SDimitry Andric /// difference to the corresponding bits in the destination. 26170b57cec5SDimitry Andric /// 26180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26190b57cec5SDimitry Andric /// 26200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. 26210b57cec5SDimitry Andric /// 26220b57cec5SDimitry Andric /// \param __a 26230b57cec5SDimitry Andric /// A 64-bit integer vector containing the minuend. 26240b57cec5SDimitry Andric /// \param __b 26250b57cec5SDimitry Andric /// A 64-bit integer vector containing the subtrahend. 26260b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in 26270b57cec5SDimitry Andric /// the operands. 26280b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 26290b57cec5SDimitry Andric _mm_sub_si64(__m64 __a, __m64 __b) 26300b57cec5SDimitry Andric { 26310b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); 26320b57cec5SDimitry Andric } 26330b57cec5SDimitry Andric 26340b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors. 26350b57cec5SDimitry Andric /// 26360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26370b57cec5SDimitry Andric /// 26380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. 26390b57cec5SDimitry Andric /// 26400b57cec5SDimitry Andric /// \param __a 26410b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26420b57cec5SDimitry Andric /// \param __b 26430b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26450b57cec5SDimitry Andric /// in the operands. 26460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26470b57cec5SDimitry Andric _mm_sub_epi64(__m128i __a, __m128i __b) 26480b57cec5SDimitry Andric { 26490b57cec5SDimitry Andric return (__m128i)((__v2du)__a - (__v2du)__b); 26500b57cec5SDimitry Andric } 26510b57cec5SDimitry Andric 26520b57cec5SDimitry Andric /// Subtracts corresponding 8-bit signed integer values in the input and 26530b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 26540b57cec5SDimitry Andric /// Differences greater than 0x7F are saturated to 0x7F, and differences less 26550b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. 26560b57cec5SDimitry Andric /// 26570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26580b57cec5SDimitry Andric /// 26590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. 26600b57cec5SDimitry Andric /// 26610b57cec5SDimitry Andric /// \param __a 26620b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26630b57cec5SDimitry Andric /// \param __b 26640b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26650b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26660b57cec5SDimitry Andric /// in the operands. 26670b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26680b57cec5SDimitry Andric _mm_subs_epi8(__m128i __a, __m128i __b) 26690b57cec5SDimitry Andric { 26700b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); 26710b57cec5SDimitry Andric } 26720b57cec5SDimitry Andric 26730b57cec5SDimitry Andric /// Subtracts corresponding 16-bit signed integer values in the input and 26740b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 26750b57cec5SDimitry Andric /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less 26760b57cec5SDimitry Andric /// than 0x8000 are saturated to 0x8000. 26770b57cec5SDimitry Andric /// 26780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26790b57cec5SDimitry Andric /// 26800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. 26810b57cec5SDimitry Andric /// 26820b57cec5SDimitry Andric /// \param __a 26830b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26840b57cec5SDimitry Andric /// \param __b 26850b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26860b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26870b57cec5SDimitry Andric /// in the operands. 26880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 26890b57cec5SDimitry Andric _mm_subs_epi16(__m128i __a, __m128i __b) 26900b57cec5SDimitry Andric { 26910b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); 26920b57cec5SDimitry Andric } 26930b57cec5SDimitry Andric 26940b57cec5SDimitry Andric /// Subtracts corresponding 8-bit unsigned integer values in the input 26950b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 26960b57cec5SDimitry Andric /// destination. Differences less than 0x00 are saturated to 0x00. 26970b57cec5SDimitry Andric /// 26980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26990b57cec5SDimitry Andric /// 27000b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. 27010b57cec5SDimitry Andric /// 27020b57cec5SDimitry Andric /// \param __a 27030b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 27040b57cec5SDimitry Andric /// \param __b 27050b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 27060b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 27070b57cec5SDimitry Andric /// differences of the values in the operands. 27080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27090b57cec5SDimitry Andric _mm_subs_epu8(__m128i __a, __m128i __b) 27100b57cec5SDimitry Andric { 27110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); 27120b57cec5SDimitry Andric } 27130b57cec5SDimitry Andric 27140b57cec5SDimitry Andric /// Subtracts corresponding 16-bit unsigned integer values in the input 27150b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 27160b57cec5SDimitry Andric /// destination. Differences less than 0x0000 are saturated to 0x0000. 27170b57cec5SDimitry Andric /// 27180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27190b57cec5SDimitry Andric /// 27200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. 27210b57cec5SDimitry Andric /// 27220b57cec5SDimitry Andric /// \param __a 27230b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 27240b57cec5SDimitry Andric /// \param __b 27250b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 27260b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 27270b57cec5SDimitry Andric /// differences of the values in the operands. 27280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27290b57cec5SDimitry Andric _mm_subs_epu16(__m128i __a, __m128i __b) 27300b57cec5SDimitry Andric { 27310b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); 27320b57cec5SDimitry Andric } 27330b57cec5SDimitry Andric 27340b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors. 27350b57cec5SDimitry Andric /// 27360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27370b57cec5SDimitry Andric /// 27380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 27390b57cec5SDimitry Andric /// 27400b57cec5SDimitry Andric /// \param __a 27410b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27420b57cec5SDimitry Andric /// \param __b 27430b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values 27450b57cec5SDimitry Andric /// in both operands. 27460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27470b57cec5SDimitry Andric _mm_and_si128(__m128i __a, __m128i __b) 27480b57cec5SDimitry Andric { 27490b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 27500b57cec5SDimitry Andric } 27510b57cec5SDimitry Andric 27520b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the 27530b57cec5SDimitry Andric /// one's complement of the values contained in the first source operand. 27540b57cec5SDimitry Andric /// 27550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27560b57cec5SDimitry Andric /// 27570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 27580b57cec5SDimitry Andric /// 27590b57cec5SDimitry Andric /// \param __a 27600b57cec5SDimitry Andric /// A 128-bit vector containing the left source operand. The one's complement 27610b57cec5SDimitry Andric /// of this value is used in the bitwise AND. 27620b57cec5SDimitry Andric /// \param __b 27630b57cec5SDimitry Andric /// A 128-bit vector containing the right source operand. 27640b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's 27650b57cec5SDimitry Andric /// complement of the first operand and the values in the second operand. 27660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27670b57cec5SDimitry Andric _mm_andnot_si128(__m128i __a, __m128i __b) 27680b57cec5SDimitry Andric { 27690b57cec5SDimitry Andric return (__m128i)(~(__v2du)__a & (__v2du)__b); 27700b57cec5SDimitry Andric } 27710b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors. 27720b57cec5SDimitry Andric /// 27730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27740b57cec5SDimitry Andric /// 27750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 27760b57cec5SDimitry Andric /// 27770b57cec5SDimitry Andric /// \param __a 27780b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27790b57cec5SDimitry Andric /// \param __b 27800b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27810b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values 27820b57cec5SDimitry Andric /// in both operands. 27830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 27840b57cec5SDimitry Andric _mm_or_si128(__m128i __a, __m128i __b) 27850b57cec5SDimitry Andric { 27860b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 27870b57cec5SDimitry Andric } 27880b57cec5SDimitry Andric 27890b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors. 27900b57cec5SDimitry Andric /// 27910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27920b57cec5SDimitry Andric /// 27930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 27940b57cec5SDimitry Andric /// 27950b57cec5SDimitry Andric /// \param __a 27960b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27970b57cec5SDimitry Andric /// \param __b 27980b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27990b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the 28000b57cec5SDimitry Andric /// values in both operands. 28010b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28020b57cec5SDimitry Andric _mm_xor_si128(__m128i __a, __m128i __b) 28030b57cec5SDimitry Andric { 28040b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 28050b57cec5SDimitry Andric } 28060b57cec5SDimitry Andric 28070b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified 28080b57cec5SDimitry Andric /// number of bytes. Low-order bits are cleared. 28090b57cec5SDimitry Andric /// 28100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28110b57cec5SDimitry Andric /// 28120b57cec5SDimitry Andric /// \code 28130b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm); 28140b57cec5SDimitry Andric /// \endcode 28150b57cec5SDimitry Andric /// 28160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. 28170b57cec5SDimitry Andric /// 28180b57cec5SDimitry Andric /// \param a 28190b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28200b57cec5SDimitry Andric /// \param imm 28210b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to left-shift operand 28220b57cec5SDimitry Andric /// \a a. 28230b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value. 28240b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \ 2825*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))) 28260b57cec5SDimitry Andric 28270b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \ 2828*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))) 28290b57cec5SDimitry Andric 28300b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 28310b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28320b57cec5SDimitry Andric /// 28330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28340b57cec5SDimitry Andric /// 28350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 28360b57cec5SDimitry Andric /// 28370b57cec5SDimitry Andric /// \param __a 28380b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28390b57cec5SDimitry Andric /// \param __count 28400b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 28410b57cec5SDimitry Andric /// in operand \a __a. 28420b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28440b57cec5SDimitry Andric _mm_slli_epi16(__m128i __a, int __count) 28450b57cec5SDimitry Andric { 28460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 28470b57cec5SDimitry Andric } 28480b57cec5SDimitry Andric 28490b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 28500b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28510b57cec5SDimitry Andric /// 28520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28530b57cec5SDimitry Andric /// 28540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 28550b57cec5SDimitry Andric /// 28560b57cec5SDimitry Andric /// \param __a 28570b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28580b57cec5SDimitry Andric /// \param __count 28590b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28600b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 28610b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28630b57cec5SDimitry Andric _mm_sll_epi16(__m128i __a, __m128i __count) 28640b57cec5SDimitry Andric { 28650b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 28660b57cec5SDimitry Andric } 28670b57cec5SDimitry Andric 28680b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 28690b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28700b57cec5SDimitry Andric /// 28710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28720b57cec5SDimitry Andric /// 28730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 28740b57cec5SDimitry Andric /// 28750b57cec5SDimitry Andric /// \param __a 28760b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28770b57cec5SDimitry Andric /// \param __count 28780b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 28790b57cec5SDimitry Andric /// in operand \a __a. 28800b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 28810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 28820b57cec5SDimitry Andric _mm_slli_epi32(__m128i __a, int __count) 28830b57cec5SDimitry Andric { 28840b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 28850b57cec5SDimitry Andric } 28860b57cec5SDimitry Andric 28870b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 28880b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28890b57cec5SDimitry Andric /// 28900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28910b57cec5SDimitry Andric /// 28920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 28930b57cec5SDimitry Andric /// 28940b57cec5SDimitry Andric /// \param __a 28950b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28960b57cec5SDimitry Andric /// \param __count 28970b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28980b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 28990b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 29000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29010b57cec5SDimitry Andric _mm_sll_epi32(__m128i __a, __m128i __count) 29020b57cec5SDimitry Andric { 29030b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 29040b57cec5SDimitry Andric } 29050b57cec5SDimitry Andric 29060b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 29070b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 29080b57cec5SDimitry Andric /// 29090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29100b57cec5SDimitry Andric /// 29110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 29120b57cec5SDimitry Andric /// 29130b57cec5SDimitry Andric /// \param __a 29140b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29150b57cec5SDimitry Andric /// \param __count 29160b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 29170b57cec5SDimitry Andric /// in operand \a __a. 29180b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 29190b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29200b57cec5SDimitry Andric _mm_slli_epi64(__m128i __a, int __count) 29210b57cec5SDimitry Andric { 29220b57cec5SDimitry Andric return __builtin_ia32_psllqi128((__v2di)__a, __count); 29230b57cec5SDimitry Andric } 29240b57cec5SDimitry Andric 29250b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 29260b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 29270b57cec5SDimitry Andric /// 29280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29290b57cec5SDimitry Andric /// 29300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 29310b57cec5SDimitry Andric /// 29320b57cec5SDimitry Andric /// \param __a 29330b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29340b57cec5SDimitry Andric /// \param __count 29350b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29360b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 29370b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 29380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29390b57cec5SDimitry Andric _mm_sll_epi64(__m128i __a, __m128i __count) 29400b57cec5SDimitry Andric { 29410b57cec5SDimitry Andric return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); 29420b57cec5SDimitry Andric } 29430b57cec5SDimitry Andric 29440b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 29450b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29460b57cec5SDimitry Andric /// bit of the initial value. 29470b57cec5SDimitry Andric /// 29480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29490b57cec5SDimitry Andric /// 29500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 29510b57cec5SDimitry Andric /// 29520b57cec5SDimitry Andric /// \param __a 29530b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29540b57cec5SDimitry Andric /// \param __count 29550b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29560b57cec5SDimitry Andric /// in operand \a __a. 29570b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 29580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29590b57cec5SDimitry Andric _mm_srai_epi16(__m128i __a, int __count) 29600b57cec5SDimitry Andric { 29610b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 29620b57cec5SDimitry Andric } 29630b57cec5SDimitry Andric 29640b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 29650b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29660b57cec5SDimitry Andric /// bit of the initial value. 29670b57cec5SDimitry Andric /// 29680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29690b57cec5SDimitry Andric /// 29700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 29710b57cec5SDimitry Andric /// 29720b57cec5SDimitry Andric /// \param __a 29730b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29740b57cec5SDimitry Andric /// \param __count 29750b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29760b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29770b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 29780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29790b57cec5SDimitry Andric _mm_sra_epi16(__m128i __a, __m128i __count) 29800b57cec5SDimitry Andric { 29810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 29820b57cec5SDimitry Andric } 29830b57cec5SDimitry Andric 29840b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 29850b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29860b57cec5SDimitry Andric /// bit of the initial value. 29870b57cec5SDimitry Andric /// 29880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29890b57cec5SDimitry Andric /// 29900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 29910b57cec5SDimitry Andric /// 29920b57cec5SDimitry Andric /// \param __a 29930b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29940b57cec5SDimitry Andric /// \param __count 29950b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29960b57cec5SDimitry Andric /// in operand \a __a. 29970b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 29980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 29990b57cec5SDimitry Andric _mm_srai_epi32(__m128i __a, int __count) 30000b57cec5SDimitry Andric { 30010b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 30020b57cec5SDimitry Andric } 30030b57cec5SDimitry Andric 30040b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 30050b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 30060b57cec5SDimitry Andric /// bit of the initial value. 30070b57cec5SDimitry Andric /// 30080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30090b57cec5SDimitry Andric /// 30100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 30110b57cec5SDimitry Andric /// 30120b57cec5SDimitry Andric /// \param __a 30130b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30140b57cec5SDimitry Andric /// \param __count 30150b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30160b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30170b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30180b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30190b57cec5SDimitry Andric _mm_sra_epi32(__m128i __a, __m128i __count) 30200b57cec5SDimitry Andric { 30210b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 30220b57cec5SDimitry Andric } 30230b57cec5SDimitry Andric 30240b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified 30250b57cec5SDimitry Andric /// number of bytes. High-order bits are cleared. 30260b57cec5SDimitry Andric /// 30270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30280b57cec5SDimitry Andric /// 30290b57cec5SDimitry Andric /// \code 30300b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm); 30310b57cec5SDimitry Andric /// \endcode 30320b57cec5SDimitry Andric /// 30330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. 30340b57cec5SDimitry Andric /// 30350b57cec5SDimitry Andric /// \param a 30360b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30370b57cec5SDimitry Andric /// \param imm 30380b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to right-shift operand 30390b57cec5SDimitry Andric /// \a a. 30400b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value. 30410b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \ 3042*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))) 30430b57cec5SDimitry Andric 30440b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \ 3045*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))) 30460b57cec5SDimitry Andric 30470b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 30480b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30490b57cec5SDimitry Andric /// 30500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30510b57cec5SDimitry Andric /// 30520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 30530b57cec5SDimitry Andric /// 30540b57cec5SDimitry Andric /// \param __a 30550b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30560b57cec5SDimitry Andric /// \param __count 30570b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 30580b57cec5SDimitry Andric /// in operand \a __a. 30590b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30610b57cec5SDimitry Andric _mm_srli_epi16(__m128i __a, int __count) 30620b57cec5SDimitry Andric { 30630b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 30640b57cec5SDimitry Andric } 30650b57cec5SDimitry Andric 30660b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 30670b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30680b57cec5SDimitry Andric /// 30690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30700b57cec5SDimitry Andric /// 30710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 30720b57cec5SDimitry Andric /// 30730b57cec5SDimitry Andric /// \param __a 30740b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30750b57cec5SDimitry Andric /// \param __count 30760b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30770b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30780b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30790b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30800b57cec5SDimitry Andric _mm_srl_epi16(__m128i __a, __m128i __count) 30810b57cec5SDimitry Andric { 30820b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 30830b57cec5SDimitry Andric } 30840b57cec5SDimitry Andric 30850b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 30860b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30870b57cec5SDimitry Andric /// 30880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30890b57cec5SDimitry Andric /// 30900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 30910b57cec5SDimitry Andric /// 30920b57cec5SDimitry Andric /// \param __a 30930b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30940b57cec5SDimitry Andric /// \param __count 30950b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 30960b57cec5SDimitry Andric /// in operand \a __a. 30970b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 30980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 30990b57cec5SDimitry Andric _mm_srli_epi32(__m128i __a, int __count) 31000b57cec5SDimitry Andric { 31010b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 31020b57cec5SDimitry Andric } 31030b57cec5SDimitry Andric 31040b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 31050b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 31060b57cec5SDimitry Andric /// 31070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31080b57cec5SDimitry Andric /// 31090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 31100b57cec5SDimitry Andric /// 31110b57cec5SDimitry Andric /// \param __a 31120b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 31130b57cec5SDimitry Andric /// \param __count 31140b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 31150b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 31160b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 31170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31180b57cec5SDimitry Andric _mm_srl_epi32(__m128i __a, __m128i __count) 31190b57cec5SDimitry Andric { 31200b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 31210b57cec5SDimitry Andric } 31220b57cec5SDimitry Andric 31230b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 31240b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 31250b57cec5SDimitry Andric /// 31260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31270b57cec5SDimitry Andric /// 31280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 31290b57cec5SDimitry Andric /// 31300b57cec5SDimitry Andric /// \param __a 31310b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 31320b57cec5SDimitry Andric /// \param __count 31330b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 31340b57cec5SDimitry Andric /// in operand \a __a. 31350b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 31360b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31370b57cec5SDimitry Andric _mm_srli_epi64(__m128i __a, int __count) 31380b57cec5SDimitry Andric { 31390b57cec5SDimitry Andric return __builtin_ia32_psrlqi128((__v2di)__a, __count); 31400b57cec5SDimitry Andric } 31410b57cec5SDimitry Andric 31420b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 31430b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 31440b57cec5SDimitry Andric /// 31450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31460b57cec5SDimitry Andric /// 31470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 31480b57cec5SDimitry Andric /// 31490b57cec5SDimitry Andric /// \param __a 31500b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 31510b57cec5SDimitry Andric /// \param __count 31520b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 31530b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 31540b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 31550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31560b57cec5SDimitry Andric _mm_srl_epi64(__m128i __a, __m128i __count) 31570b57cec5SDimitry Andric { 31580b57cec5SDimitry Andric return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); 31590b57cec5SDimitry Andric } 31600b57cec5SDimitry Andric 31610b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit 31620b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF 31630b57cec5SDimitry Andric /// for true. 31640b57cec5SDimitry Andric /// 31650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31660b57cec5SDimitry Andric /// 31670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. 31680b57cec5SDimitry Andric /// 31690b57cec5SDimitry Andric /// \param __a 31700b57cec5SDimitry Andric /// A 128-bit integer vector. 31710b57cec5SDimitry Andric /// \param __b 31720b57cec5SDimitry Andric /// A 128-bit integer vector. 31730b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 31740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31750b57cec5SDimitry Andric _mm_cmpeq_epi8(__m128i __a, __m128i __b) 31760b57cec5SDimitry Andric { 31770b57cec5SDimitry Andric return (__m128i)((__v16qi)__a == (__v16qi)__b); 31780b57cec5SDimitry Andric } 31790b57cec5SDimitry Andric 31800b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit 31810b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 31820b57cec5SDimitry Andric /// 0xFFFF for true. 31830b57cec5SDimitry Andric /// 31840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31850b57cec5SDimitry Andric /// 31860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. 31870b57cec5SDimitry Andric /// 31880b57cec5SDimitry Andric /// \param __a 31890b57cec5SDimitry Andric /// A 128-bit integer vector. 31900b57cec5SDimitry Andric /// \param __b 31910b57cec5SDimitry Andric /// A 128-bit integer vector. 31920b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 31930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 31940b57cec5SDimitry Andric _mm_cmpeq_epi16(__m128i __a, __m128i __b) 31950b57cec5SDimitry Andric { 31960b57cec5SDimitry Andric return (__m128i)((__v8hi)__a == (__v8hi)__b); 31970b57cec5SDimitry Andric } 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit 32000b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 32010b57cec5SDimitry Andric /// 0xFFFFFFFF for true. 32020b57cec5SDimitry Andric /// 32030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32040b57cec5SDimitry Andric /// 32050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. 32060b57cec5SDimitry Andric /// 32070b57cec5SDimitry Andric /// \param __a 32080b57cec5SDimitry Andric /// A 128-bit integer vector. 32090b57cec5SDimitry Andric /// \param __b 32100b57cec5SDimitry Andric /// A 128-bit integer vector. 32110b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32130b57cec5SDimitry Andric _mm_cmpeq_epi32(__m128i __a, __m128i __b) 32140b57cec5SDimitry Andric { 32150b57cec5SDimitry Andric return (__m128i)((__v4si)__a == (__v4si)__b); 32160b57cec5SDimitry Andric } 32170b57cec5SDimitry Andric 32180b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 32190b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are 32200b57cec5SDimitry Andric /// greater than those in the second operand. Each comparison yields 0x0 for 32210b57cec5SDimitry Andric /// false, 0xFF for true. 32220b57cec5SDimitry Andric /// 32230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32240b57cec5SDimitry Andric /// 32250b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 32260b57cec5SDimitry Andric /// 32270b57cec5SDimitry Andric /// \param __a 32280b57cec5SDimitry Andric /// A 128-bit integer vector. 32290b57cec5SDimitry Andric /// \param __b 32300b57cec5SDimitry Andric /// A 128-bit integer vector. 32310b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32330b57cec5SDimitry Andric _mm_cmpgt_epi8(__m128i __a, __m128i __b) 32340b57cec5SDimitry Andric { 32350b57cec5SDimitry Andric /* This function always performs a signed comparison, but __v16qi is a char 32360b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 32370b57cec5SDimitry Andric return (__m128i)((__v16qs)__a > (__v16qs)__b); 32380b57cec5SDimitry Andric } 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 32410b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 32420b57cec5SDimitry Andric /// are greater than those in the second operand. 32430b57cec5SDimitry Andric /// 32440b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 32450b57cec5SDimitry Andric /// 32460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32470b57cec5SDimitry Andric /// 32480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 32490b57cec5SDimitry Andric /// 32500b57cec5SDimitry Andric /// \param __a 32510b57cec5SDimitry Andric /// A 128-bit integer vector. 32520b57cec5SDimitry Andric /// \param __b 32530b57cec5SDimitry Andric /// A 128-bit integer vector. 32540b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32560b57cec5SDimitry Andric _mm_cmpgt_epi16(__m128i __a, __m128i __b) 32570b57cec5SDimitry Andric { 32580b57cec5SDimitry Andric return (__m128i)((__v8hi)__a > (__v8hi)__b); 32590b57cec5SDimitry Andric } 32600b57cec5SDimitry Andric 32610b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 32620b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 32630b57cec5SDimitry Andric /// are greater than those in the second operand. 32640b57cec5SDimitry Andric /// 32650b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 32660b57cec5SDimitry Andric /// 32670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32680b57cec5SDimitry Andric /// 32690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 32700b57cec5SDimitry Andric /// 32710b57cec5SDimitry Andric /// \param __a 32720b57cec5SDimitry Andric /// A 128-bit integer vector. 32730b57cec5SDimitry Andric /// \param __b 32740b57cec5SDimitry Andric /// A 128-bit integer vector. 32750b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32770b57cec5SDimitry Andric _mm_cmpgt_epi32(__m128i __a, __m128i __b) 32780b57cec5SDimitry Andric { 32790b57cec5SDimitry Andric return (__m128i)((__v4si)__a > (__v4si)__b); 32800b57cec5SDimitry Andric } 32810b57cec5SDimitry Andric 32820b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 32830b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are less 32840b57cec5SDimitry Andric /// than those in the second operand. 32850b57cec5SDimitry Andric /// 32860b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFF for true. 32870b57cec5SDimitry Andric /// 32880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32890b57cec5SDimitry Andric /// 32900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 32910b57cec5SDimitry Andric /// 32920b57cec5SDimitry Andric /// \param __a 32930b57cec5SDimitry Andric /// A 128-bit integer vector. 32940b57cec5SDimitry Andric /// \param __b 32950b57cec5SDimitry Andric /// A 128-bit integer vector. 32960b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 32970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 32980b57cec5SDimitry Andric _mm_cmplt_epi8(__m128i __a, __m128i __b) 32990b57cec5SDimitry Andric { 33000b57cec5SDimitry Andric return _mm_cmpgt_epi8(__b, __a); 33010b57cec5SDimitry Andric } 33020b57cec5SDimitry Andric 33030b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 33040b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 33050b57cec5SDimitry Andric /// are less than those in the second operand. 33060b57cec5SDimitry Andric /// 33070b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 33080b57cec5SDimitry Andric /// 33090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33100b57cec5SDimitry Andric /// 33110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 33120b57cec5SDimitry Andric /// 33130b57cec5SDimitry Andric /// \param __a 33140b57cec5SDimitry Andric /// A 128-bit integer vector. 33150b57cec5SDimitry Andric /// \param __b 33160b57cec5SDimitry Andric /// A 128-bit integer vector. 33170b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 33180b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 33190b57cec5SDimitry Andric _mm_cmplt_epi16(__m128i __a, __m128i __b) 33200b57cec5SDimitry Andric { 33210b57cec5SDimitry Andric return _mm_cmpgt_epi16(__b, __a); 33220b57cec5SDimitry Andric } 33230b57cec5SDimitry Andric 33240b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 33250b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 33260b57cec5SDimitry Andric /// are less than those in the second operand. 33270b57cec5SDimitry Andric /// 33280b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 33290b57cec5SDimitry Andric /// 33300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33310b57cec5SDimitry Andric /// 33320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 33330b57cec5SDimitry Andric /// 33340b57cec5SDimitry Andric /// \param __a 33350b57cec5SDimitry Andric /// A 128-bit integer vector. 33360b57cec5SDimitry Andric /// \param __b 33370b57cec5SDimitry Andric /// A 128-bit integer vector. 33380b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 33390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 33400b57cec5SDimitry Andric _mm_cmplt_epi32(__m128i __a, __m128i __b) 33410b57cec5SDimitry Andric { 33420b57cec5SDimitry Andric return _mm_cmpgt_epi32(__b, __a); 33430b57cec5SDimitry Andric } 33440b57cec5SDimitry Andric 33450b57cec5SDimitry Andric #ifdef __x86_64__ 33460b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a 33470b57cec5SDimitry Andric /// double-precision value and returns it in the lower element of a [2 x 33480b57cec5SDimitry Andric /// double] vector; the upper element of the returned vector is copied from 33490b57cec5SDimitry Andric /// the upper element of the first operand. 33500b57cec5SDimitry Andric /// 33510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33520b57cec5SDimitry Andric /// 33530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 33540b57cec5SDimitry Andric /// 33550b57cec5SDimitry Andric /// \param __a 33560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are 33570b57cec5SDimitry Andric /// copied to the upper 64 bits of the destination. 33580b57cec5SDimitry Andric /// \param __b 33590b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 33600b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 33610b57cec5SDimitry Andric /// converted value of the second operand. The upper 64 bits are copied from 33620b57cec5SDimitry Andric /// the upper 64 bits of the first operand. 33630b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 33640b57cec5SDimitry Andric _mm_cvtsi64_sd(__m128d __a, long long __b) 33650b57cec5SDimitry Andric { 33660b57cec5SDimitry Andric __a[0] = __b; 33670b57cec5SDimitry Andric return __a; 33680b57cec5SDimitry Andric } 33690b57cec5SDimitry Andric 33700b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 33710b57cec5SDimitry Andric /// 64-bit signed integer value, according to the current rounding mode. 33720b57cec5SDimitry Andric /// 33730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33740b57cec5SDimitry Andric /// 33750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 33760b57cec5SDimitry Andric /// 33770b57cec5SDimitry Andric /// \param __a 33780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 33790b57cec5SDimitry Andric /// conversion. 33800b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 33810b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 33820b57cec5SDimitry Andric _mm_cvtsd_si64(__m128d __a) 33830b57cec5SDimitry Andric { 33840b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si64((__v2df)__a); 33850b57cec5SDimitry Andric } 33860b57cec5SDimitry Andric 33870b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 33880b57cec5SDimitry Andric /// 64-bit signed integer value, truncating the result when it is inexact. 33890b57cec5SDimitry Andric /// 33900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33910b57cec5SDimitry Andric /// 33920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 33930b57cec5SDimitry Andric /// instruction. 33940b57cec5SDimitry Andric /// 33950b57cec5SDimitry Andric /// \param __a 33960b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 33970b57cec5SDimitry Andric /// conversion. 33980b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 33990b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 34000b57cec5SDimitry Andric _mm_cvttsd_si64(__m128d __a) 34010b57cec5SDimitry Andric { 34020b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si64((__v2df)__a); 34030b57cec5SDimitry Andric } 34040b57cec5SDimitry Andric #endif 34050b57cec5SDimitry Andric 34060b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float]. 34070b57cec5SDimitry Andric /// 34080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34090b57cec5SDimitry Andric /// 34100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. 34110b57cec5SDimitry Andric /// 34120b57cec5SDimitry Andric /// \param __a 34130b57cec5SDimitry Andric /// A 128-bit integer vector. 34140b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values. 34150b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 34160b57cec5SDimitry Andric _mm_cvtepi32_ps(__m128i __a) 34170b57cec5SDimitry Andric { 34180b57cec5SDimitry Andric return (__m128)__builtin_convertvector((__v4si)__a, __v4sf); 34190b57cec5SDimitry Andric } 34200b57cec5SDimitry Andric 34210b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32]. 34220b57cec5SDimitry Andric /// 34230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34240b57cec5SDimitry Andric /// 34250b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. 34260b57cec5SDimitry Andric /// 34270b57cec5SDimitry Andric /// \param __a 34280b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 34290b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted 34300b57cec5SDimitry Andric /// values. 34310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34320b57cec5SDimitry Andric _mm_cvtps_epi32(__m128 __a) 34330b57cec5SDimitry Andric { 34340b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); 34350b57cec5SDimitry Andric } 34360b57cec5SDimitry Andric 34370b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32], 34380b57cec5SDimitry Andric /// truncating the result when it is inexact. 34390b57cec5SDimitry Andric /// 34400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34410b57cec5SDimitry Andric /// 34420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> 34430b57cec5SDimitry Andric /// instruction. 34440b57cec5SDimitry Andric /// 34450b57cec5SDimitry Andric /// \param __a 34460b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 34470b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values. 34480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34490b57cec5SDimitry Andric _mm_cvttps_epi32(__m128 __a) 34500b57cec5SDimitry Andric { 34510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); 34520b57cec5SDimitry Andric } 34530b57cec5SDimitry Andric 34540b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input 34550b57cec5SDimitry Andric /// operand and the remaining elements are zero. 34560b57cec5SDimitry Andric /// 34570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34580b57cec5SDimitry Andric /// 34590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 34600b57cec5SDimitry Andric /// 34610b57cec5SDimitry Andric /// \param __a 34620b57cec5SDimitry Andric /// A 32-bit signed integer operand. 34630b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32]. 34640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34650b57cec5SDimitry Andric _mm_cvtsi32_si128(int __a) 34660b57cec5SDimitry Andric { 34670b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 }; 34680b57cec5SDimitry Andric } 34690b57cec5SDimitry Andric 34700b57cec5SDimitry Andric #ifdef __x86_64__ 34710b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input 34720b57cec5SDimitry Andric /// operand and the upper element is zero. 34730b57cec5SDimitry Andric /// 34740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34750b57cec5SDimitry Andric /// 34760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 34770b57cec5SDimitry Andric /// 34780b57cec5SDimitry Andric /// \param __a 34790b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 34800b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value. 34810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 34820b57cec5SDimitry Andric _mm_cvtsi64_si128(long long __a) 34830b57cec5SDimitry Andric { 34840b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ __a, 0 }; 34850b57cec5SDimitry Andric } 34860b57cec5SDimitry Andric #endif 34870b57cec5SDimitry Andric 34880b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a 34890b57cec5SDimitry Andric /// 32-bit signed integer value. 34900b57cec5SDimitry Andric /// 34910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34920b57cec5SDimitry Andric /// 34930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 34940b57cec5SDimitry Andric /// 34950b57cec5SDimitry Andric /// \param __a 34960b57cec5SDimitry Andric /// A vector of [4 x i32]. The least significant 32 bits are moved to the 34970b57cec5SDimitry Andric /// destination. 34980b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 34990b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 35000b57cec5SDimitry Andric _mm_cvtsi128_si32(__m128i __a) 35010b57cec5SDimitry Andric { 35020b57cec5SDimitry Andric __v4si __b = (__v4si)__a; 35030b57cec5SDimitry Andric return __b[0]; 35040b57cec5SDimitry Andric } 35050b57cec5SDimitry Andric 35060b57cec5SDimitry Andric #ifdef __x86_64__ 35070b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a 35080b57cec5SDimitry Andric /// 64-bit signed integer value. 35090b57cec5SDimitry Andric /// 35100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35110b57cec5SDimitry Andric /// 35120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 35130b57cec5SDimitry Andric /// 35140b57cec5SDimitry Andric /// \param __a 35150b57cec5SDimitry Andric /// A vector of [2 x i64]. The least significant 64 bits are moved to the 35160b57cec5SDimitry Andric /// destination. 35170b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value. 35180b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 35190b57cec5SDimitry Andric _mm_cvtsi128_si64(__m128i __a) 35200b57cec5SDimitry Andric { 35210b57cec5SDimitry Andric return __a[0]; 35220b57cec5SDimitry Andric } 35230b57cec5SDimitry Andric #endif 35240b57cec5SDimitry Andric 35250b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location 35260b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 35270b57cec5SDimitry Andric /// 35280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35290b57cec5SDimitry Andric /// 35300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. 35310b57cec5SDimitry Andric /// 35320b57cec5SDimitry Andric /// \param __p 35330b57cec5SDimitry Andric /// An aligned pointer to a memory location containing integer values. 35340b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 35350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35360b57cec5SDimitry Andric _mm_load_si128(__m128i const *__p) 35370b57cec5SDimitry Andric { 35380b57cec5SDimitry Andric return *__p; 35390b57cec5SDimitry Andric } 35400b57cec5SDimitry Andric 35410b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location 35420b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 35430b57cec5SDimitry Andric /// 35440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35450b57cec5SDimitry Andric /// 35460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. 35470b57cec5SDimitry Andric /// 35480b57cec5SDimitry Andric /// \param __p 35490b57cec5SDimitry Andric /// A pointer to a memory location containing integer values. 35500b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 35510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35520b57cec5SDimitry Andric _mm_loadu_si128(__m128i_u const *__p) 35530b57cec5SDimitry Andric { 35540b57cec5SDimitry Andric struct __loadu_si128 { 35550b57cec5SDimitry Andric __m128i_u __v; 35560b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3557480093f4SDimitry Andric return ((const struct __loadu_si128*)__p)->__v; 35580b57cec5SDimitry Andric } 35590b57cec5SDimitry Andric 35600b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from 35610b57cec5SDimitry Andric /// the lower element of the operand, and the upper element is zero. 35620b57cec5SDimitry Andric /// 35630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35640b57cec5SDimitry Andric /// 35650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 35660b57cec5SDimitry Andric /// 35670b57cec5SDimitry Andric /// \param __p 35680b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of 35690b57cec5SDimitry Andric /// the destination. 35700b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the 35710b57cec5SDimitry Andric /// moved value. The higher order bits are cleared. 35720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35730b57cec5SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p) 35740b57cec5SDimitry Andric { 35750b57cec5SDimitry Andric struct __mm_loadl_epi64_struct { 35760b57cec5SDimitry Andric long long __u; 35770b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3578480093f4SDimitry Andric return __extension__ (__m128i) { ((const struct __mm_loadl_epi64_struct*)__p)->__u, 0}; 35790b57cec5SDimitry Andric } 35800b57cec5SDimitry Andric 35810b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content. 35820b57cec5SDimitry Andric /// This could be used as an argument to another intrinsic function where the 35830b57cec5SDimitry Andric /// argument is required but the value is not actually used. 35840b57cec5SDimitry Andric /// 35850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35860b57cec5SDimitry Andric /// 35870b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 35880b57cec5SDimitry Andric /// 35890b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content. 35900b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 35910b57cec5SDimitry Andric _mm_undefined_si128(void) 35920b57cec5SDimitry Andric { 35930b57cec5SDimitry Andric return (__m128i)__builtin_ia32_undef128(); 35940b57cec5SDimitry Andric } 35950b57cec5SDimitry Andric 35960b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 35970b57cec5SDimitry Andric /// the specified 64-bit integer values. 35980b57cec5SDimitry Andric /// 35990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36000b57cec5SDimitry Andric /// 36010b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36020b57cec5SDimitry Andric /// instruction. 36030b57cec5SDimitry Andric /// 36040b57cec5SDimitry Andric /// \param __q1 36050b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 36060b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36070b57cec5SDimitry Andric /// \param __q0 36080b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 36090b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36100b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 36110b57cec5SDimitry Andric /// provided in the operands. 36120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36130b57cec5SDimitry Andric _mm_set_epi64x(long long __q1, long long __q0) 36140b57cec5SDimitry Andric { 36150b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 36160b57cec5SDimitry Andric } 36170b57cec5SDimitry Andric 36180b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 36190b57cec5SDimitry Andric /// the specified 64-bit integer values. 36200b57cec5SDimitry Andric /// 36210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36220b57cec5SDimitry Andric /// 36230b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36240b57cec5SDimitry Andric /// instruction. 36250b57cec5SDimitry Andric /// 36260b57cec5SDimitry Andric /// \param __q1 36270b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 36280b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36290b57cec5SDimitry Andric /// \param __q0 36300b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 36310b57cec5SDimitry Andric /// destination vector of [2 x i64]. 36320b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 36330b57cec5SDimitry Andric /// provided in the operands. 36340b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36350b57cec5SDimitry Andric _mm_set_epi64(__m64 __q1, __m64 __q0) 36360b57cec5SDimitry Andric { 36370b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 36380b57cec5SDimitry Andric } 36390b57cec5SDimitry Andric 36400b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with 36410b57cec5SDimitry Andric /// the specified 32-bit integer values. 36420b57cec5SDimitry Andric /// 36430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36440b57cec5SDimitry Andric /// 36450b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36460b57cec5SDimitry Andric /// instruction. 36470b57cec5SDimitry Andric /// 36480b57cec5SDimitry Andric /// \param __i3 36490b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [127:96] of the 36500b57cec5SDimitry Andric /// destination vector. 36510b57cec5SDimitry Andric /// \param __i2 36520b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [95:64] of the destination 36530b57cec5SDimitry Andric /// vector. 36540b57cec5SDimitry Andric /// \param __i1 36550b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [63:32] of the destination 36560b57cec5SDimitry Andric /// vector. 36570b57cec5SDimitry Andric /// \param __i0 36580b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [31:0] of the destination 36590b57cec5SDimitry Andric /// vector. 36600b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values 36610b57cec5SDimitry Andric /// provided in the operands. 36620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 36630b57cec5SDimitry Andric _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) 36640b57cec5SDimitry Andric { 36650b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3}; 36660b57cec5SDimitry Andric } 36670b57cec5SDimitry Andric 36680b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with 36690b57cec5SDimitry Andric /// the specified 16-bit integer values. 36700b57cec5SDimitry Andric /// 36710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36720b57cec5SDimitry Andric /// 36730b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36740b57cec5SDimitry Andric /// instruction. 36750b57cec5SDimitry Andric /// 36760b57cec5SDimitry Andric /// \param __w7 36770b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [127:112] of the 36780b57cec5SDimitry Andric /// destination vector. 36790b57cec5SDimitry Andric /// \param __w6 36800b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [111:96] of the 36810b57cec5SDimitry Andric /// destination vector. 36820b57cec5SDimitry Andric /// \param __w5 36830b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [95:80] of the destination 36840b57cec5SDimitry Andric /// vector. 36850b57cec5SDimitry Andric /// \param __w4 36860b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [79:64] of the destination 36870b57cec5SDimitry Andric /// vector. 36880b57cec5SDimitry Andric /// \param __w3 36890b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the destination 36900b57cec5SDimitry Andric /// vector. 36910b57cec5SDimitry Andric /// \param __w2 36920b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the destination 36930b57cec5SDimitry Andric /// vector. 36940b57cec5SDimitry Andric /// \param __w1 36950b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the destination 36960b57cec5SDimitry Andric /// vector. 36970b57cec5SDimitry Andric /// \param __w0 36980b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the destination 36990b57cec5SDimitry Andric /// vector. 37000b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values 37010b57cec5SDimitry Andric /// provided in the operands. 37020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37030b57cec5SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) 37040b57cec5SDimitry Andric { 37050b57cec5SDimitry Andric return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 }; 37060b57cec5SDimitry Andric } 37070b57cec5SDimitry Andric 37080b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with 37090b57cec5SDimitry Andric /// the specified 8-bit integer values. 37100b57cec5SDimitry Andric /// 37110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37120b57cec5SDimitry Andric /// 37130b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37140b57cec5SDimitry Andric /// instruction. 37150b57cec5SDimitry Andric /// 37160b57cec5SDimitry Andric /// \param __b15 37170b57cec5SDimitry Andric /// Initializes bits [127:120] of the destination vector. 37180b57cec5SDimitry Andric /// \param __b14 37190b57cec5SDimitry Andric /// Initializes bits [119:112] of the destination vector. 37200b57cec5SDimitry Andric /// \param __b13 37210b57cec5SDimitry Andric /// Initializes bits [111:104] of the destination vector. 37220b57cec5SDimitry Andric /// \param __b12 37230b57cec5SDimitry Andric /// Initializes bits [103:96] of the destination vector. 37240b57cec5SDimitry Andric /// \param __b11 37250b57cec5SDimitry Andric /// Initializes bits [95:88] of the destination vector. 37260b57cec5SDimitry Andric /// \param __b10 37270b57cec5SDimitry Andric /// Initializes bits [87:80] of the destination vector. 37280b57cec5SDimitry Andric /// \param __b9 37290b57cec5SDimitry Andric /// Initializes bits [79:72] of the destination vector. 37300b57cec5SDimitry Andric /// \param __b8 37310b57cec5SDimitry Andric /// Initializes bits [71:64] of the destination vector. 37320b57cec5SDimitry Andric /// \param __b7 37330b57cec5SDimitry Andric /// Initializes bits [63:56] of the destination vector. 37340b57cec5SDimitry Andric /// \param __b6 37350b57cec5SDimitry Andric /// Initializes bits [55:48] of the destination vector. 37360b57cec5SDimitry Andric /// \param __b5 37370b57cec5SDimitry Andric /// Initializes bits [47:40] of the destination vector. 37380b57cec5SDimitry Andric /// \param __b4 37390b57cec5SDimitry Andric /// Initializes bits [39:32] of the destination vector. 37400b57cec5SDimitry Andric /// \param __b3 37410b57cec5SDimitry Andric /// Initializes bits [31:24] of the destination vector. 37420b57cec5SDimitry Andric /// \param __b2 37430b57cec5SDimitry Andric /// Initializes bits [23:16] of the destination vector. 37440b57cec5SDimitry Andric /// \param __b1 37450b57cec5SDimitry Andric /// Initializes bits [15:8] of the destination vector. 37460b57cec5SDimitry Andric /// \param __b0 37470b57cec5SDimitry Andric /// Initializes bits [7:0] of the destination vector. 37480b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values 37490b57cec5SDimitry Andric /// provided in the operands. 37500b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37510b57cec5SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) 37520b57cec5SDimitry Andric { 37530b57cec5SDimitry Andric return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 }; 37540b57cec5SDimitry Andric } 37550b57cec5SDimitry Andric 37560b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the 37570b57cec5SDimitry Andric /// specified 64-bit integer value. 37580b57cec5SDimitry Andric /// 37590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37600b57cec5SDimitry Andric /// 37610b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37620b57cec5SDimitry Andric /// instruction. 37630b57cec5SDimitry Andric /// 37640b57cec5SDimitry Andric /// \param __q 37650b57cec5SDimitry Andric /// Integer value used to initialize the elements of the destination integer 37660b57cec5SDimitry Andric /// vector. 37670b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both 37680b57cec5SDimitry Andric /// elements containing the value provided in the operand. 37690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37700b57cec5SDimitry Andric _mm_set1_epi64x(long long __q) 37710b57cec5SDimitry Andric { 37720b57cec5SDimitry Andric return _mm_set_epi64x(__q, __q); 37730b57cec5SDimitry Andric } 37740b57cec5SDimitry Andric 37750b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the 37760b57cec5SDimitry Andric /// specified 64-bit value. 37770b57cec5SDimitry Andric /// 37780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37790b57cec5SDimitry Andric /// 37800b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37810b57cec5SDimitry Andric /// instruction. 37820b57cec5SDimitry Andric /// 37830b57cec5SDimitry Andric /// \param __q 37840b57cec5SDimitry Andric /// A 64-bit value used to initialize the elements of the destination integer 37850b57cec5SDimitry Andric /// vector. 37860b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements 37870b57cec5SDimitry Andric /// containing the value provided in the operand. 37880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 37890b57cec5SDimitry Andric _mm_set1_epi64(__m64 __q) 37900b57cec5SDimitry Andric { 37910b57cec5SDimitry Andric return _mm_set_epi64(__q, __q); 37920b57cec5SDimitry Andric } 37930b57cec5SDimitry Andric 37940b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the 37950b57cec5SDimitry Andric /// specified 32-bit value. 37960b57cec5SDimitry Andric /// 37970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37980b57cec5SDimitry Andric /// 37990b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38000b57cec5SDimitry Andric /// instruction. 38010b57cec5SDimitry Andric /// 38020b57cec5SDimitry Andric /// \param __i 38030b57cec5SDimitry Andric /// A 32-bit value used to initialize the elements of the destination integer 38040b57cec5SDimitry Andric /// vector. 38050b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements 38060b57cec5SDimitry Andric /// containing the value provided in the operand. 38070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38080b57cec5SDimitry Andric _mm_set1_epi32(int __i) 38090b57cec5SDimitry Andric { 38100b57cec5SDimitry Andric return _mm_set_epi32(__i, __i, __i, __i); 38110b57cec5SDimitry Andric } 38120b57cec5SDimitry Andric 38130b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the 38140b57cec5SDimitry Andric /// specified 16-bit value. 38150b57cec5SDimitry Andric /// 38160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38170b57cec5SDimitry Andric /// 38180b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38190b57cec5SDimitry Andric /// instruction. 38200b57cec5SDimitry Andric /// 38210b57cec5SDimitry Andric /// \param __w 38220b57cec5SDimitry Andric /// A 16-bit value used to initialize the elements of the destination integer 38230b57cec5SDimitry Andric /// vector. 38240b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements 38250b57cec5SDimitry Andric /// containing the value provided in the operand. 38260b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38270b57cec5SDimitry Andric _mm_set1_epi16(short __w) 38280b57cec5SDimitry Andric { 38290b57cec5SDimitry Andric return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); 38300b57cec5SDimitry Andric } 38310b57cec5SDimitry Andric 38320b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the 38330b57cec5SDimitry Andric /// specified 8-bit value. 38340b57cec5SDimitry Andric /// 38350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38360b57cec5SDimitry Andric /// 38370b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38380b57cec5SDimitry Andric /// instruction. 38390b57cec5SDimitry Andric /// 38400b57cec5SDimitry Andric /// \param __b 38410b57cec5SDimitry Andric /// An 8-bit value used to initialize the elements of the destination integer 38420b57cec5SDimitry Andric /// vector. 38430b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements 38440b57cec5SDimitry Andric /// containing the value provided in the operand. 38450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38460b57cec5SDimitry Andric _mm_set1_epi8(char __b) 38470b57cec5SDimitry Andric { 38480b57cec5SDimitry Andric return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); 38490b57cec5SDimitry Andric } 38500b57cec5SDimitry Andric 38510b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38520b57cec5SDimitry Andric /// with the specified 64-bit integral values. 38530b57cec5SDimitry Andric /// 38540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38550b57cec5SDimitry Andric /// 38560b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 38570b57cec5SDimitry Andric /// 38580b57cec5SDimitry Andric /// \param __q0 38590b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the lower 64 bits of the 38600b57cec5SDimitry Andric /// result. 38610b57cec5SDimitry Andric /// \param __q1 38620b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the upper 64 bits of the 38630b57cec5SDimitry Andric /// result. 38640b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 38650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38660b57cec5SDimitry Andric _mm_setr_epi64(__m64 __q0, __m64 __q1) 38670b57cec5SDimitry Andric { 38680b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 38690b57cec5SDimitry Andric } 38700b57cec5SDimitry Andric 38710b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38720b57cec5SDimitry Andric /// with the specified 32-bit integral values. 38730b57cec5SDimitry Andric /// 38740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38750b57cec5SDimitry Andric /// 38760b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38770b57cec5SDimitry Andric /// instruction. 38780b57cec5SDimitry Andric /// 38790b57cec5SDimitry Andric /// \param __i0 38800b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [31:0] of the result. 38810b57cec5SDimitry Andric /// \param __i1 38820b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [63:32] of the result. 38830b57cec5SDimitry Andric /// \param __i2 38840b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [95:64] of the result. 38850b57cec5SDimitry Andric /// \param __i3 38860b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [127:96] of the result. 38870b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 38880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 38890b57cec5SDimitry Andric _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) 38900b57cec5SDimitry Andric { 38910b57cec5SDimitry Andric return _mm_set_epi32(__i3, __i2, __i1, __i0); 38920b57cec5SDimitry Andric } 38930b57cec5SDimitry Andric 38940b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38950b57cec5SDimitry Andric /// with the specified 16-bit integral values. 38960b57cec5SDimitry Andric /// 38970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38980b57cec5SDimitry Andric /// 38990b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 39000b57cec5SDimitry Andric /// instruction. 39010b57cec5SDimitry Andric /// 39020b57cec5SDimitry Andric /// \param __w0 39030b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [15:0] of the result. 39040b57cec5SDimitry Andric /// \param __w1 39050b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [31:16] of the result. 39060b57cec5SDimitry Andric /// \param __w2 39070b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [47:32] of the result. 39080b57cec5SDimitry Andric /// \param __w3 39090b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [63:48] of the result. 39100b57cec5SDimitry Andric /// \param __w4 39110b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [79:64] of the result. 39120b57cec5SDimitry Andric /// \param __w5 39130b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [95:80] of the result. 39140b57cec5SDimitry Andric /// \param __w6 39150b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [111:96] of the result. 39160b57cec5SDimitry Andric /// \param __w7 39170b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [127:112] of the result. 39180b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 39190b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 39200b57cec5SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) 39210b57cec5SDimitry Andric { 39220b57cec5SDimitry Andric return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); 39230b57cec5SDimitry Andric } 39240b57cec5SDimitry Andric 39250b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 39260b57cec5SDimitry Andric /// with the specified 8-bit integral values. 39270b57cec5SDimitry Andric /// 39280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39290b57cec5SDimitry Andric /// 39300b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 39310b57cec5SDimitry Andric /// instruction. 39320b57cec5SDimitry Andric /// 39330b57cec5SDimitry Andric /// \param __b0 39340b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [7:0] of the result. 39350b57cec5SDimitry Andric /// \param __b1 39360b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [15:8] of the result. 39370b57cec5SDimitry Andric /// \param __b2 39380b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [23:16] of the result. 39390b57cec5SDimitry Andric /// \param __b3 39400b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [31:24] of the result. 39410b57cec5SDimitry Andric /// \param __b4 39420b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [39:32] of the result. 39430b57cec5SDimitry Andric /// \param __b5 39440b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [47:40] of the result. 39450b57cec5SDimitry Andric /// \param __b6 39460b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [55:48] of the result. 39470b57cec5SDimitry Andric /// \param __b7 39480b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [63:56] of the result. 39490b57cec5SDimitry Andric /// \param __b8 39500b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [71:64] of the result. 39510b57cec5SDimitry Andric /// \param __b9 39520b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [79:72] of the result. 39530b57cec5SDimitry Andric /// \param __b10 39540b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [87:80] of the result. 39550b57cec5SDimitry Andric /// \param __b11 39560b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [95:88] of the result. 39570b57cec5SDimitry Andric /// \param __b12 39580b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [103:96] of the result. 39590b57cec5SDimitry Andric /// \param __b13 39600b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [111:104] of the result. 39610b57cec5SDimitry Andric /// \param __b14 39620b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [119:112] of the result. 39630b57cec5SDimitry Andric /// \param __b15 39640b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [127:120] of the result. 39650b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 39660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 39670b57cec5SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) 39680b57cec5SDimitry Andric { 39690b57cec5SDimitry Andric return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 39700b57cec5SDimitry Andric } 39710b57cec5SDimitry Andric 39720b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero. 39730b57cec5SDimitry Andric /// 39740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39750b57cec5SDimitry Andric /// 39760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 39770b57cec5SDimitry Andric /// 39780b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to 39790b57cec5SDimitry Andric /// zero. 39800b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 39810b57cec5SDimitry Andric _mm_setzero_si128(void) 39820b57cec5SDimitry Andric { 39830b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ 0LL, 0LL }; 39840b57cec5SDimitry Andric } 39850b57cec5SDimitry Andric 39860b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a 39870b57cec5SDimitry Andric /// 128-bit boundary. 39880b57cec5SDimitry Andric /// 39890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39900b57cec5SDimitry Andric /// 39910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. 39920b57cec5SDimitry Andric /// 39930b57cec5SDimitry Andric /// \param __p 39940b57cec5SDimitry Andric /// A pointer to an aligned memory location that will receive the integer 39950b57cec5SDimitry Andric /// values. 39960b57cec5SDimitry Andric /// \param __b 39970b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 39980b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 39990b57cec5SDimitry Andric _mm_store_si128(__m128i *__p, __m128i __b) 40000b57cec5SDimitry Andric { 40010b57cec5SDimitry Andric *__p = __b; 40020b57cec5SDimitry Andric } 40030b57cec5SDimitry Andric 40040b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location. 40050b57cec5SDimitry Andric /// 40060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40070b57cec5SDimitry Andric /// 40080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. 40090b57cec5SDimitry Andric /// 40100b57cec5SDimitry Andric /// \param __p 40110b57cec5SDimitry Andric /// A pointer to a memory location that will receive the integer values. 40120b57cec5SDimitry Andric /// \param __b 40130b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 40140b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40150b57cec5SDimitry Andric _mm_storeu_si128(__m128i_u *__p, __m128i __b) 40160b57cec5SDimitry Andric { 40170b57cec5SDimitry Andric struct __storeu_si128 { 40180b57cec5SDimitry Andric __m128i_u __v; 40190b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40200b57cec5SDimitry Andric ((struct __storeu_si128*)__p)->__v = __b; 40210b57cec5SDimitry Andric } 40220b57cec5SDimitry Andric 40230b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer 40240b57cec5SDimitry Andric /// vector. 40250b57cec5SDimitry Andric /// 40260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40270b57cec5SDimitry Andric /// 40280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 40290b57cec5SDimitry Andric /// 40300b57cec5SDimitry Andric /// \param __p 40310b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 4032e8d8bef9SDimitry Andric /// location does not have to be aligned. 40330b57cec5SDimitry Andric /// \param __b 40340b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 40350b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40360b57cec5SDimitry Andric _mm_storeu_si64(void *__p, __m128i __b) 40370b57cec5SDimitry Andric { 40380b57cec5SDimitry Andric struct __storeu_si64 { 40390b57cec5SDimitry Andric long long __v; 40400b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40410b57cec5SDimitry Andric ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0]; 40420b57cec5SDimitry Andric } 40430b57cec5SDimitry Andric 40440b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer 40450b57cec5SDimitry Andric /// vector. 40460b57cec5SDimitry Andric /// 40470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40480b57cec5SDimitry Andric /// 40490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 40500b57cec5SDimitry Andric /// 40510b57cec5SDimitry Andric /// \param __p 40520b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 40530b57cec5SDimitry Andric /// location does not have to be aligned. 40540b57cec5SDimitry Andric /// \param __b 40550b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 40560b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40570b57cec5SDimitry Andric _mm_storeu_si32(void *__p, __m128i __b) 40580b57cec5SDimitry Andric { 40590b57cec5SDimitry Andric struct __storeu_si32 { 40600b57cec5SDimitry Andric int __v; 40610b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40620b57cec5SDimitry Andric ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0]; 40630b57cec5SDimitry Andric } 40640b57cec5SDimitry Andric 40650b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer 40660b57cec5SDimitry Andric /// vector. 40670b57cec5SDimitry Andric /// 40680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40690b57cec5SDimitry Andric /// 40700b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 40710b57cec5SDimitry Andric /// 40720b57cec5SDimitry Andric /// \param __p 40730b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 40740b57cec5SDimitry Andric /// location does not have to be aligned. 40750b57cec5SDimitry Andric /// \param __b 40760b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 40770b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 40780b57cec5SDimitry Andric _mm_storeu_si16(void *__p, __m128i __b) 40790b57cec5SDimitry Andric { 40800b57cec5SDimitry Andric struct __storeu_si16 { 40810b57cec5SDimitry Andric short __v; 40820b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40830b57cec5SDimitry Andric ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0]; 40840b57cec5SDimitry Andric } 40850b57cec5SDimitry Andric 40860b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the 40870b57cec5SDimitry Andric /// specified unaligned memory location. When a mask bit is 1, the 40880b57cec5SDimitry Andric /// corresponding byte is written, otherwise it is not written. 40890b57cec5SDimitry Andric /// 40900b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 40910b57cec5SDimitry Andric /// used again soon). Exception and trap behavior for elements not selected 40920b57cec5SDimitry Andric /// for storage to memory are implementation dependent. 40930b57cec5SDimitry Andric /// 40940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40950b57cec5SDimitry Andric /// 40960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> 40970b57cec5SDimitry Andric /// instruction. 40980b57cec5SDimitry Andric /// 40990b57cec5SDimitry Andric /// \param __d 41000b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 41010b57cec5SDimitry Andric /// \param __n 41020b57cec5SDimitry Andric /// A 128-bit integer vector containing the mask. The most significant bit of 41030b57cec5SDimitry Andric /// each byte represents the mask bits. 41040b57cec5SDimitry Andric /// \param __p 41050b57cec5SDimitry Andric /// A pointer to an unaligned 128-bit memory location where the specified 41060b57cec5SDimitry Andric /// values are moved. 41070b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41080b57cec5SDimitry Andric _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) 41090b57cec5SDimitry Andric { 41100b57cec5SDimitry Andric __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 41110b57cec5SDimitry Andric } 41120b57cec5SDimitry Andric 41130b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to 41140b57cec5SDimitry Andric /// a memory location. 41150b57cec5SDimitry Andric /// 41160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41170b57cec5SDimitry Andric /// 41180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. 41190b57cec5SDimitry Andric /// 41200b57cec5SDimitry Andric /// \param __p 41210b57cec5SDimitry Andric /// A pointer to a 64-bit memory location that will receive the lower 64 bits 41220b57cec5SDimitry Andric /// of the integer vector parameter. 41230b57cec5SDimitry Andric /// \param __a 41240b57cec5SDimitry Andric /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the 41250b57cec5SDimitry Andric /// value to be stored. 41260b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41270b57cec5SDimitry Andric _mm_storel_epi64(__m128i_u *__p, __m128i __a) 41280b57cec5SDimitry Andric { 41290b57cec5SDimitry Andric struct __mm_storel_epi64_struct { 41300b57cec5SDimitry Andric long long __u; 41310b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 41320b57cec5SDimitry Andric ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; 41330b57cec5SDimitry Andric } 41340b57cec5SDimitry Andric 41350b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit 41360b57cec5SDimitry Andric /// aligned memory location. 41370b57cec5SDimitry Andric /// 41380b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41390b57cec5SDimitry Andric /// used again soon). 41400b57cec5SDimitry Andric /// 41410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41420b57cec5SDimitry Andric /// 41430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 41440b57cec5SDimitry Andric /// 41450b57cec5SDimitry Andric /// \param __p 41460b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 41470b57cec5SDimitry Andric /// \param __a 41480b57cec5SDimitry Andric /// A vector of [2 x double] containing the 64-bit values to be stored. 41490b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41500b57cec5SDimitry Andric _mm_stream_pd(double *__p, __m128d __a) 41510b57cec5SDimitry Andric { 41520b57cec5SDimitry Andric __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p); 41530b57cec5SDimitry Andric } 41540b57cec5SDimitry Andric 41550b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location. 41560b57cec5SDimitry Andric /// 41570b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41580b57cec5SDimitry Andric /// used again soon). 41590b57cec5SDimitry Andric /// 41600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41610b57cec5SDimitry Andric /// 41620b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 41630b57cec5SDimitry Andric /// 41640b57cec5SDimitry Andric /// \param __p 41650b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 41660b57cec5SDimitry Andric /// \param __a 41670b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be stored. 41680b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 41690b57cec5SDimitry Andric _mm_stream_si128(__m128i *__p, __m128i __a) 41700b57cec5SDimitry Andric { 41710b57cec5SDimitry Andric __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p); 41720b57cec5SDimitry Andric } 41730b57cec5SDimitry Andric 41740b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location. 41750b57cec5SDimitry Andric /// 41760b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41770b57cec5SDimitry Andric /// used again soon). 41780b57cec5SDimitry Andric /// 41790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41800b57cec5SDimitry Andric /// 41810b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. 41820b57cec5SDimitry Andric /// 41830b57cec5SDimitry Andric /// \param __p 41840b57cec5SDimitry Andric /// A pointer to the 32-bit memory location used to store the value. 41850b57cec5SDimitry Andric /// \param __a 41860b57cec5SDimitry Andric /// A 32-bit integer containing the value to be stored. 41870b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 41880b57cec5SDimitry Andric _mm_stream_si32(int *__p, int __a) 41890b57cec5SDimitry Andric { 41900b57cec5SDimitry Andric __builtin_ia32_movnti(__p, __a); 41910b57cec5SDimitry Andric } 41920b57cec5SDimitry Andric 41930b57cec5SDimitry Andric #ifdef __x86_64__ 41940b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location. 41950b57cec5SDimitry Andric /// 41960b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 41970b57cec5SDimitry Andric /// used again soon). 41980b57cec5SDimitry Andric /// 41990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42000b57cec5SDimitry Andric /// 42010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. 42020b57cec5SDimitry Andric /// 42030b57cec5SDimitry Andric /// \param __p 42040b57cec5SDimitry Andric /// A pointer to the 64-bit memory location used to store the value. 42050b57cec5SDimitry Andric /// \param __a 42060b57cec5SDimitry Andric /// A 64-bit integer containing the value to be stored. 42070b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 42080b57cec5SDimitry Andric _mm_stream_si64(long long *__p, long long __a) 42090b57cec5SDimitry Andric { 42100b57cec5SDimitry Andric __builtin_ia32_movnti64(__p, __a); 42110b57cec5SDimitry Andric } 42120b57cec5SDimitry Andric #endif 42130b57cec5SDimitry Andric 42140b57cec5SDimitry Andric #if defined(__cplusplus) 42150b57cec5SDimitry Andric extern "C" { 42160b57cec5SDimitry Andric #endif 42170b57cec5SDimitry Andric 42180b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all 42190b57cec5SDimitry Andric /// caches in the coherency domain. 42200b57cec5SDimitry Andric /// 42210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42220b57cec5SDimitry Andric /// 42230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. 42240b57cec5SDimitry Andric /// 42250b57cec5SDimitry Andric /// \param __p 42260b57cec5SDimitry Andric /// A pointer to the memory location used to identify the cache line to be 42270b57cec5SDimitry Andric /// flushed. 42280b57cec5SDimitry Andric void _mm_clflush(void const * __p); 42290b57cec5SDimitry Andric 42300b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load 42310b57cec5SDimitry Andric /// instructions preceding this instruction and load instructions following 42320b57cec5SDimitry Andric /// this instruction, ensuring the system completes all previous loads before 42330b57cec5SDimitry Andric /// executing subsequent loads. 42340b57cec5SDimitry Andric /// 42350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42360b57cec5SDimitry Andric /// 42370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction. 42380b57cec5SDimitry Andric /// 42390b57cec5SDimitry Andric void _mm_lfence(void); 42400b57cec5SDimitry Andric 42410b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store 42420b57cec5SDimitry Andric /// instructions preceding this instruction and load and store instructions 42430b57cec5SDimitry Andric /// following this instruction, ensuring that the system completes all 42440b57cec5SDimitry Andric /// previous memory accesses before executing subsequent memory accesses. 42450b57cec5SDimitry Andric /// 42460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42470b57cec5SDimitry Andric /// 42480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction. 42490b57cec5SDimitry Andric /// 42500b57cec5SDimitry Andric void _mm_mfence(void); 42510b57cec5SDimitry Andric 42520b57cec5SDimitry Andric #if defined(__cplusplus) 42530b57cec5SDimitry Andric } // extern "C" 42540b57cec5SDimitry Andric #endif 42550b57cec5SDimitry Andric 42560b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 42570b57cec5SDimitry Andric /// operands into 8-bit signed integers, and packs the results into the 42580b57cec5SDimitry Andric /// destination. Positive values greater than 0x7F are saturated to 0x7F. 42590b57cec5SDimitry Andric /// Negative values less than 0x80 are saturated to 0x80. 42600b57cec5SDimitry Andric /// 42610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42620b57cec5SDimitry Andric /// 42630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. 42640b57cec5SDimitry Andric /// 42650b57cec5SDimitry Andric /// \param __a 42660b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 42670b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 42680b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 42690b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 42700b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 42710b57cec5SDimitry Andric /// \param __b 42720b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 42730b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 42740b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 42750b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 42760b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 42770b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 42780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 42790b57cec5SDimitry Andric _mm_packs_epi16(__m128i __a, __m128i __b) 42800b57cec5SDimitry Andric { 42810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 42820b57cec5SDimitry Andric } 42830b57cec5SDimitry Andric 42840b57cec5SDimitry Andric /// Converts 32-bit signed integers from both 128-bit integer vector 42850b57cec5SDimitry Andric /// operands into 16-bit signed integers, and packs the results into the 42860b57cec5SDimitry Andric /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. 42870b57cec5SDimitry Andric /// Negative values less than 0x8000 are saturated to 0x8000. 42880b57cec5SDimitry Andric /// 42890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42900b57cec5SDimitry Andric /// 42910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. 42920b57cec5SDimitry Andric /// 42930b57cec5SDimitry Andric /// \param __a 42940b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 42950b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 42960b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 42970b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 42980b57cec5SDimitry Andric /// are written to the lower 64 bits of the result. 42990b57cec5SDimitry Andric /// \param __b 43000b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 43010b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 43020b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 43030b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 43040b57cec5SDimitry Andric /// are written to the higher 64 bits of the result. 43050b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values. 43060b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 43070b57cec5SDimitry Andric _mm_packs_epi32(__m128i __a, __m128i __b) 43080b57cec5SDimitry Andric { 43090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 43100b57cec5SDimitry Andric } 43110b57cec5SDimitry Andric 43120b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 43130b57cec5SDimitry Andric /// operands into 8-bit unsigned integers, and packs the results into the 43140b57cec5SDimitry Andric /// destination. Values greater than 0xFF are saturated to 0xFF. Values less 43150b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. 43160b57cec5SDimitry Andric /// 43170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43180b57cec5SDimitry Andric /// 43190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. 43200b57cec5SDimitry Andric /// 43210b57cec5SDimitry Andric /// \param __a 43220b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 43230b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 43240b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 43250b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 43260b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 43270b57cec5SDimitry Andric /// \param __b 43280b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 43290b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 43300b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 43310b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 43320b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 43330b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 43340b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 43350b57cec5SDimitry Andric _mm_packus_epi16(__m128i __a, __m128i __b) 43360b57cec5SDimitry Andric { 43370b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 43380b57cec5SDimitry Andric } 43390b57cec5SDimitry Andric 43400b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using 43410b57cec5SDimitry Andric /// the immediate-value parameter as a selector. 43420b57cec5SDimitry Andric /// 43430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43440b57cec5SDimitry Andric /// 43450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. 43460b57cec5SDimitry Andric /// 43470b57cec5SDimitry Andric /// \param __a 43480b57cec5SDimitry Andric /// A 128-bit integer vector. 43490b57cec5SDimitry Andric /// \param __imm 43500b57cec5SDimitry Andric /// An immediate value. Bits [2:0] selects values from \a __a to be assigned 43510b57cec5SDimitry Andric /// to bits[15:0] of the result. \n 43520b57cec5SDimitry Andric /// 000: assign values from bits [15:0] of \a __a. \n 43530b57cec5SDimitry Andric /// 001: assign values from bits [31:16] of \a __a. \n 43540b57cec5SDimitry Andric /// 010: assign values from bits [47:32] of \a __a. \n 43550b57cec5SDimitry Andric /// 011: assign values from bits [63:48] of \a __a. \n 43560b57cec5SDimitry Andric /// 100: assign values from bits [79:64] of \a __a. \n 43570b57cec5SDimitry Andric /// 101: assign values from bits [95:80] of \a __a. \n 43580b57cec5SDimitry Andric /// 110: assign values from bits [111:96] of \a __a. \n 43590b57cec5SDimitry Andric /// 111: assign values from bits [127:112] of \a __a. 43600b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit 43610b57cec5SDimitry Andric /// integer vector parameter and the remaining bits are assigned zeros. 43620b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \ 4363*349cc55cSDimitry Andric ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ 4364*349cc55cSDimitry Andric (int)(imm))) 43650b57cec5SDimitry Andric 43660b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the 43670b57cec5SDimitry Andric /// 128-bit integer vector parameter, and then inserting the lower 16 bits 43680b57cec5SDimitry Andric /// of an integer parameter into an offset specified by the immediate-value 43690b57cec5SDimitry Andric /// parameter. 43700b57cec5SDimitry Andric /// 43710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43720b57cec5SDimitry Andric /// 43730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. 43740b57cec5SDimitry Andric /// 43750b57cec5SDimitry Andric /// \param __a 43760b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. This vector is copied to the 43770b57cec5SDimitry Andric /// result and then one of the eight elements in the result is replaced by 43780b57cec5SDimitry Andric /// the lower 16 bits of \a __b. 43790b57cec5SDimitry Andric /// \param __b 43800b57cec5SDimitry Andric /// An integer. The lower 16 bits of this parameter are written to the 43810b57cec5SDimitry Andric /// result beginning at an offset specified by \a __imm. 43820b57cec5SDimitry Andric /// \param __imm 43830b57cec5SDimitry Andric /// An immediate value specifying the bit offset in the result at which the 43840b57cec5SDimitry Andric /// lower 16 bits of \a __b are written. 43850b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values. 43860b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \ 4387*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ 4388*349cc55cSDimitry Andric (int)(imm))) 43890b57cec5SDimitry Andric 43900b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit 43910b57cec5SDimitry Andric /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask 43920b57cec5SDimitry Andric /// value, zero-extends the value, and writes it to the destination. 43930b57cec5SDimitry Andric /// 43940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43950b57cec5SDimitry Andric /// 43960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. 43970b57cec5SDimitry Andric /// 43980b57cec5SDimitry Andric /// \param __a 43990b57cec5SDimitry Andric /// A 128-bit integer vector containing the values with bits to be extracted. 44000b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a, 44010b57cec5SDimitry Andric /// written to bits [15:0]. The other bits are assigned zeros. 44020b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 44030b57cec5SDimitry Andric _mm_movemask_epi8(__m128i __a) 44040b57cec5SDimitry Andric { 44050b57cec5SDimitry Andric return __builtin_ia32_pmovmskb128((__v16qi)__a); 44060b57cec5SDimitry Andric } 44070b57cec5SDimitry Andric 44080b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit 44090b57cec5SDimitry Andric /// elements of a 128-bit integer vector parameter, using the immediate-value 44100b57cec5SDimitry Andric /// parameter as a specifier. 44110b57cec5SDimitry Andric /// 44120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44130b57cec5SDimitry Andric /// 44140b57cec5SDimitry Andric /// \code 44150b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); 44160b57cec5SDimitry Andric /// \endcode 44170b57cec5SDimitry Andric /// 44180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 44190b57cec5SDimitry Andric /// 44200b57cec5SDimitry Andric /// \param a 44210b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 44220b57cec5SDimitry Andric /// \param imm 44230b57cec5SDimitry Andric /// An immediate value containing an 8-bit value specifying which elements to 44240b57cec5SDimitry Andric /// copy from a. The destinations within the 128-bit destination are assigned 44250b57cec5SDimitry Andric /// values as follows: \n 44260b57cec5SDimitry Andric /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n 44270b57cec5SDimitry Andric /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n 44280b57cec5SDimitry Andric /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n 44290b57cec5SDimitry Andric /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n 44300b57cec5SDimitry Andric /// Bit value assignments: \n 44310b57cec5SDimitry Andric /// 00: assign values from bits [31:0] of \a a. \n 44320b57cec5SDimitry Andric /// 01: assign values from bits [63:32] of \a a. \n 44330b57cec5SDimitry Andric /// 10: assign values from bits [95:64] of \a a. \n 44340b57cec5SDimitry Andric /// 11: assign values from bits [127:96] of \a a. 44350b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 44360b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \ 4437*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) 44380b57cec5SDimitry Andric 44390b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit 44400b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 44410b57cec5SDimitry Andric /// value parameter as a specifier. 44420b57cec5SDimitry Andric /// 44430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44440b57cec5SDimitry Andric /// 44450b57cec5SDimitry Andric /// \code 44460b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); 44470b57cec5SDimitry Andric /// \endcode 44480b57cec5SDimitry Andric /// 44490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 44500b57cec5SDimitry Andric /// 44510b57cec5SDimitry Andric /// \param a 44520b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits 44530b57cec5SDimitry Andric /// [127:64] of the result. 44540b57cec5SDimitry Andric /// \param imm 44550b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 44560b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n 44570b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n 44580b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n 44590b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n 44600b57cec5SDimitry Andric /// Bit value assignments: \n 44610b57cec5SDimitry Andric /// 00: assign values from bits [15:0] of \a a. \n 44620b57cec5SDimitry Andric /// 01: assign values from bits [31:16] of \a a. \n 44630b57cec5SDimitry Andric /// 10: assign values from bits [47:32] of \a a. \n 44640b57cec5SDimitry Andric /// 11: assign values from bits [63:48] of \a a. \n 44650b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 44660b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \ 4467*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) 44680b57cec5SDimitry Andric 44690b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit 44700b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 44710b57cec5SDimitry Andric /// value parameter as a specifier. 44720b57cec5SDimitry Andric /// 44730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44740b57cec5SDimitry Andric /// 44750b57cec5SDimitry Andric /// \code 44760b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); 44770b57cec5SDimitry Andric /// \endcode 44780b57cec5SDimitry Andric /// 44790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. 44800b57cec5SDimitry Andric /// 44810b57cec5SDimitry Andric /// \param a 44820b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits 44830b57cec5SDimitry Andric /// [63:0] of the result. 44840b57cec5SDimitry Andric /// \param imm 44850b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 44860b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n 44870b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n 44880b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n 44890b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n 44900b57cec5SDimitry Andric /// Bit value assignments: \n 44910b57cec5SDimitry Andric /// 00: assign values from bits [79:64] of \a a. \n 44920b57cec5SDimitry Andric /// 01: assign values from bits [95:80] of \a a. \n 44930b57cec5SDimitry Andric /// 10: assign values from bits [111:96] of \a a. \n 44940b57cec5SDimitry Andric /// 11: assign values from bits [127:112] of \a a. \n 44950b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 44960b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \ 4497*349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) 44980b57cec5SDimitry Andric 44990b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors 45000b57cec5SDimitry Andric /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 45010b57cec5SDimitry Andric /// 45020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45030b57cec5SDimitry Andric /// 45040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> 45050b57cec5SDimitry Andric /// instruction. 45060b57cec5SDimitry Andric /// 45070b57cec5SDimitry Andric /// \param __a 45080b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 45090b57cec5SDimitry Andric /// Bits [71:64] are written to bits [7:0] of the result. \n 45100b57cec5SDimitry Andric /// Bits [79:72] are written to bits [23:16] of the result. \n 45110b57cec5SDimitry Andric /// Bits [87:80] are written to bits [39:32] of the result. \n 45120b57cec5SDimitry Andric /// Bits [95:88] are written to bits [55:48] of the result. \n 45130b57cec5SDimitry Andric /// Bits [103:96] are written to bits [71:64] of the result. \n 45140b57cec5SDimitry Andric /// Bits [111:104] are written to bits [87:80] of the result. \n 45150b57cec5SDimitry Andric /// Bits [119:112] are written to bits [103:96] of the result. \n 45160b57cec5SDimitry Andric /// Bits [127:120] are written to bits [119:112] of the result. 45170b57cec5SDimitry Andric /// \param __b 45180b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 45190b57cec5SDimitry Andric /// Bits [71:64] are written to bits [15:8] of the result. \n 45200b57cec5SDimitry Andric /// Bits [79:72] are written to bits [31:24] of the result. \n 45210b57cec5SDimitry Andric /// Bits [87:80] are written to bits [47:40] of the result. \n 45220b57cec5SDimitry Andric /// Bits [95:88] are written to bits [63:56] of the result. \n 45230b57cec5SDimitry Andric /// Bits [103:96] are written to bits [79:72] of the result. \n 45240b57cec5SDimitry Andric /// Bits [111:104] are written to bits [95:88] of the result. \n 45250b57cec5SDimitry Andric /// Bits [119:112] are written to bits [111:104] of the result. \n 45260b57cec5SDimitry Andric /// Bits [127:120] are written to bits [127:120] of the result. 45270b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 45280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45290b57cec5SDimitry Andric _mm_unpackhi_epi8(__m128i __a, __m128i __b) 45300b57cec5SDimitry Andric { 45310b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 45320b57cec5SDimitry Andric } 45330b57cec5SDimitry Andric 45340b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of 45350b57cec5SDimitry Andric /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. 45360b57cec5SDimitry Andric /// 45370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45380b57cec5SDimitry Andric /// 45390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> 45400b57cec5SDimitry Andric /// instruction. 45410b57cec5SDimitry Andric /// 45420b57cec5SDimitry Andric /// \param __a 45430b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 45440b57cec5SDimitry Andric /// Bits [79:64] are written to bits [15:0] of the result. \n 45450b57cec5SDimitry Andric /// Bits [95:80] are written to bits [47:32] of the result. \n 45460b57cec5SDimitry Andric /// Bits [111:96] are written to bits [79:64] of the result. \n 45470b57cec5SDimitry Andric /// Bits [127:112] are written to bits [111:96] of the result. 45480b57cec5SDimitry Andric /// \param __b 45490b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 45500b57cec5SDimitry Andric /// Bits [79:64] are written to bits [31:16] of the result. \n 45510b57cec5SDimitry Andric /// Bits [95:80] are written to bits [63:48] of the result. \n 45520b57cec5SDimitry Andric /// Bits [111:96] are written to bits [95:80] of the result. \n 45530b57cec5SDimitry Andric /// Bits [127:112] are written to bits [127:112] of the result. 45540b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 45550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45560b57cec5SDimitry Andric _mm_unpackhi_epi16(__m128i __a, __m128i __b) 45570b57cec5SDimitry Andric { 45580b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); 45590b57cec5SDimitry Andric } 45600b57cec5SDimitry Andric 45610b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of 45620b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 45630b57cec5SDimitry Andric /// 45640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45650b57cec5SDimitry Andric /// 45660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> 45670b57cec5SDimitry Andric /// instruction. 45680b57cec5SDimitry Andric /// 45690b57cec5SDimitry Andric /// \param __a 45700b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 45710b57cec5SDimitry Andric /// Bits [95:64] are written to bits [31:0] of the destination. \n 45720b57cec5SDimitry Andric /// Bits [127:96] are written to bits [95:64] of the destination. 45730b57cec5SDimitry Andric /// \param __b 45740b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 45750b57cec5SDimitry Andric /// Bits [95:64] are written to bits [64:32] of the destination. \n 45760b57cec5SDimitry Andric /// Bits [127:96] are written to bits [127:96] of the destination. 45770b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 45780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 45790b57cec5SDimitry Andric _mm_unpackhi_epi32(__m128i __a, __m128i __b) 45800b57cec5SDimitry Andric { 45810b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); 45820b57cec5SDimitry Andric } 45830b57cec5SDimitry Andric 45840b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 45850b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 45860b57cec5SDimitry Andric /// 45870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45880b57cec5SDimitry Andric /// 45890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> 45900b57cec5SDimitry Andric /// instruction. 45910b57cec5SDimitry Andric /// 45920b57cec5SDimitry Andric /// \param __a 45930b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45940b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 45950b57cec5SDimitry Andric /// \param __b 45960b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45970b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 45980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 45990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46000b57cec5SDimitry Andric _mm_unpackhi_epi64(__m128i __a, __m128i __b) 46010b57cec5SDimitry Andric { 46020b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1); 46030b57cec5SDimitry Andric } 46040b57cec5SDimitry Andric 46050b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of 46060b57cec5SDimitry Andric /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 46070b57cec5SDimitry Andric /// 46080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46090b57cec5SDimitry Andric /// 46100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> 46110b57cec5SDimitry Andric /// instruction. 46120b57cec5SDimitry Andric /// 46130b57cec5SDimitry Andric /// \param __a 46140b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 46150b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 46160b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 46170b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 46180b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. \n 46190b57cec5SDimitry Andric /// Bits [39:32] are written to bits [71:64] of the result. \n 46200b57cec5SDimitry Andric /// Bits [47:40] are written to bits [87:80] of the result. \n 46210b57cec5SDimitry Andric /// Bits [55:48] are written to bits [103:96] of the result. \n 46220b57cec5SDimitry Andric /// Bits [63:56] are written to bits [119:112] of the result. 46230b57cec5SDimitry Andric /// \param __b 46240b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 46250b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 46260b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 46270b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 46280b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. \n 46290b57cec5SDimitry Andric /// Bits [39:32] are written to bits [79:72] of the result. \n 46300b57cec5SDimitry Andric /// Bits [47:40] are written to bits [95:88] of the result. \n 46310b57cec5SDimitry Andric /// Bits [55:48] are written to bits [111:104] of the result. \n 46320b57cec5SDimitry Andric /// Bits [63:56] are written to bits [127:120] of the result. 46330b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 46340b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46350b57cec5SDimitry Andric _mm_unpacklo_epi8(__m128i __a, __m128i __b) 46360b57cec5SDimitry Andric { 46370b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); 46380b57cec5SDimitry Andric } 46390b57cec5SDimitry Andric 46400b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit 46410b57cec5SDimitry Andric /// vectors of [8 x i16] and interleaves them into a 128-bit vector of 46420b57cec5SDimitry Andric /// [8 x i16]. 46430b57cec5SDimitry Andric /// 46440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46450b57cec5SDimitry Andric /// 46460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> 46470b57cec5SDimitry Andric /// instruction. 46480b57cec5SDimitry Andric /// 46490b57cec5SDimitry Andric /// \param __a 46500b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 46510b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 46520b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. \n 46530b57cec5SDimitry Andric /// Bits [47:32] are written to bits [79:64] of the result. \n 46540b57cec5SDimitry Andric /// Bits [63:48] are written to bits [111:96] of the result. 46550b57cec5SDimitry Andric /// \param __b 46560b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 46570b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 46580b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. \n 46590b57cec5SDimitry Andric /// Bits [47:32] are written to bits [95:80] of the result. \n 46600b57cec5SDimitry Andric /// Bits [63:48] are written to bits [127:112] of the result. 46610b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 46620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46630b57cec5SDimitry Andric _mm_unpacklo_epi16(__m128i __a, __m128i __b) 46640b57cec5SDimitry Andric { 46650b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); 46660b57cec5SDimitry Andric } 46670b57cec5SDimitry Andric 46680b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of 46690b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 46700b57cec5SDimitry Andric /// 46710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46720b57cec5SDimitry Andric /// 46730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> 46740b57cec5SDimitry Andric /// instruction. 46750b57cec5SDimitry Andric /// 46760b57cec5SDimitry Andric /// \param __a 46770b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 46780b57cec5SDimitry Andric /// Bits [31:0] are written to bits [31:0] of the destination. \n 46790b57cec5SDimitry Andric /// Bits [63:32] are written to bits [95:64] of the destination. 46800b57cec5SDimitry Andric /// \param __b 46810b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 46820b57cec5SDimitry Andric /// Bits [31:0] are written to bits [64:32] of the destination. \n 46830b57cec5SDimitry Andric /// Bits [63:32] are written to bits [127:96] of the destination. 46840b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 46850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 46860b57cec5SDimitry Andric _mm_unpacklo_epi32(__m128i __a, __m128i __b) 46870b57cec5SDimitry Andric { 46880b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); 46890b57cec5SDimitry Andric } 46900b57cec5SDimitry Andric 46910b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of 46920b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 46930b57cec5SDimitry Andric /// 46940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46950b57cec5SDimitry Andric /// 46960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> 46970b57cec5SDimitry Andric /// instruction. 46980b57cec5SDimitry Andric /// 46990b57cec5SDimitry Andric /// \param __a 47000b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 47010b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. \n 47020b57cec5SDimitry Andric /// \param __b 47030b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 47040b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. \n 47050b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 47060b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 47070b57cec5SDimitry Andric _mm_unpacklo_epi64(__m128i __a, __m128i __b) 47080b57cec5SDimitry Andric { 47090b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0); 47100b57cec5SDimitry Andric } 47110b57cec5SDimitry Andric 47120b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit 47130b57cec5SDimitry Andric /// integer. 47140b57cec5SDimitry Andric /// 47150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47160b57cec5SDimitry Andric /// 47170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. 47180b57cec5SDimitry Andric /// 47190b57cec5SDimitry Andric /// \param __a 47200b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 47210b57cec5SDimitry Andric /// destination. 47220b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter. 47230b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 47240b57cec5SDimitry Andric _mm_movepi64_pi64(__m128i __a) 47250b57cec5SDimitry Andric { 47260b57cec5SDimitry Andric return (__m64)__a[0]; 47270b57cec5SDimitry Andric } 47280b57cec5SDimitry Andric 47290b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the 47300b57cec5SDimitry Andric /// upper bits. 47310b57cec5SDimitry Andric /// 47320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47330b57cec5SDimitry Andric /// 47340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. 47350b57cec5SDimitry Andric /// 47360b57cec5SDimitry Andric /// \param __a 47370b57cec5SDimitry Andric /// A 64-bit value. 47380b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 47390b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 47400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 47410b57cec5SDimitry Andric _mm_movpi64_epi64(__m64 __a) 47420b57cec5SDimitry Andric { 47430b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ (long long)__a, 0 }; 47440b57cec5SDimitry Andric } 47450b57cec5SDimitry Andric 47460b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit 47470b57cec5SDimitry Andric /// integer vector, zeroing the upper bits. 47480b57cec5SDimitry Andric /// 47490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47500b57cec5SDimitry Andric /// 47510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 47520b57cec5SDimitry Andric /// 47530b57cec5SDimitry Andric /// \param __a 47540b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 47550b57cec5SDimitry Andric /// destination. 47560b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 47570b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 47580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 47590b57cec5SDimitry Andric _mm_move_epi64(__m128i __a) 47600b57cec5SDimitry Andric { 47610b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); 47620b57cec5SDimitry Andric } 47630b57cec5SDimitry Andric 47640b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 47650b57cec5SDimitry Andric /// [2 x double] and interleaves them into a 128-bit vector of [2 x 47660b57cec5SDimitry Andric /// double]. 47670b57cec5SDimitry Andric /// 47680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47690b57cec5SDimitry Andric /// 47700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. 47710b57cec5SDimitry Andric /// 47720b57cec5SDimitry Andric /// \param __a 47730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47740b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 47750b57cec5SDimitry Andric /// \param __b 47760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47770b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 47780b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 47790b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 47800b57cec5SDimitry Andric _mm_unpackhi_pd(__m128d __a, __m128d __b) 47810b57cec5SDimitry Andric { 47820b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1); 47830b57cec5SDimitry Andric } 47840b57cec5SDimitry Andric 47850b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors 47860b57cec5SDimitry Andric /// of [2 x double] and interleaves them into a 128-bit vector of [2 x 47870b57cec5SDimitry Andric /// double]. 47880b57cec5SDimitry Andric /// 47890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47900b57cec5SDimitry Andric /// 47910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 47920b57cec5SDimitry Andric /// 47930b57cec5SDimitry Andric /// \param __a 47940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47950b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. 47960b57cec5SDimitry Andric /// \param __b 47970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 47980b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. 47990b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 48000b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 48010b57cec5SDimitry Andric _mm_unpacklo_pd(__m128d __a, __m128d __b) 48020b57cec5SDimitry Andric { 48030b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0); 48040b57cec5SDimitry Andric } 48050b57cec5SDimitry Andric 48060b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit 48070b57cec5SDimitry Andric /// vector of [2 x double], zero-extends the value, and writes it to the 48080b57cec5SDimitry Andric /// low-order bits of the destination. 48090b57cec5SDimitry Andric /// 48100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48110b57cec5SDimitry Andric /// 48120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. 48130b57cec5SDimitry Andric /// 48140b57cec5SDimitry Andric /// \param __a 48150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values with sign bits to 48160b57cec5SDimitry Andric /// be extracted. 48170b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a, 48180b57cec5SDimitry Andric /// written to bits [1:0]. The remaining bits are assigned values of zero. 48190b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 48200b57cec5SDimitry Andric _mm_movemask_pd(__m128d __a) 48210b57cec5SDimitry Andric { 48220b57cec5SDimitry Andric return __builtin_ia32_movmskpd((__v2df)__a); 48230b57cec5SDimitry Andric } 48240b57cec5SDimitry Andric 48250b57cec5SDimitry Andric 48260b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two 48270b57cec5SDimitry Andric /// 128-bit vector parameters of [2 x double], using the immediate-value 48280b57cec5SDimitry Andric /// parameter as a specifier. 48290b57cec5SDimitry Andric /// 48300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48310b57cec5SDimitry Andric /// 48320b57cec5SDimitry Andric /// \code 48330b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); 48340b57cec5SDimitry Andric /// \endcode 48350b57cec5SDimitry Andric /// 48360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. 48370b57cec5SDimitry Andric /// 48380b57cec5SDimitry Andric /// \param a 48390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 48400b57cec5SDimitry Andric /// \param b 48410b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 48420b57cec5SDimitry Andric /// \param i 48430b57cec5SDimitry Andric /// An 8-bit immediate value. The least significant two bits specify which 48440b57cec5SDimitry Andric /// elements to copy from \a a and \a b: \n 48450b57cec5SDimitry Andric /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n 48460b57cec5SDimitry Andric /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n 48470b57cec5SDimitry Andric /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n 48480b57cec5SDimitry Andric /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n 48490b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values. 48500b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \ 4851*349cc55cSDimitry Andric ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 4852*349cc55cSDimitry Andric (int)(i))) 48530b57cec5SDimitry Andric 48540b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 48550b57cec5SDimitry Andric /// floating-point vector of [4 x float]. 48560b57cec5SDimitry Andric /// 48570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48580b57cec5SDimitry Andric /// 48590b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 48600b57cec5SDimitry Andric /// 48610b57cec5SDimitry Andric /// \param __a 48620b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 48630b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 48640b57cec5SDimitry Andric /// bitwise pattern as the parameter. 48650b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 48660b57cec5SDimitry Andric _mm_castpd_ps(__m128d __a) 48670b57cec5SDimitry Andric { 48680b57cec5SDimitry Andric return (__m128)__a; 48690b57cec5SDimitry Andric } 48700b57cec5SDimitry Andric 48710b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 48720b57cec5SDimitry Andric /// integer vector. 48730b57cec5SDimitry Andric /// 48740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48750b57cec5SDimitry Andric /// 48760b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 48770b57cec5SDimitry Andric /// 48780b57cec5SDimitry Andric /// \param __a 48790b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 48800b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 48810b57cec5SDimitry Andric /// parameter. 48820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 48830b57cec5SDimitry Andric _mm_castpd_si128(__m128d __a) 48840b57cec5SDimitry Andric { 48850b57cec5SDimitry Andric return (__m128i)__a; 48860b57cec5SDimitry Andric } 48870b57cec5SDimitry Andric 48880b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 48890b57cec5SDimitry Andric /// floating-point vector of [2 x double]. 48900b57cec5SDimitry Andric /// 48910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48920b57cec5SDimitry Andric /// 48930b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 48940b57cec5SDimitry Andric /// 48950b57cec5SDimitry Andric /// \param __a 48960b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 48970b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 48980b57cec5SDimitry Andric /// bitwise pattern as the parameter. 48990b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 49000b57cec5SDimitry Andric _mm_castps_pd(__m128 __a) 49010b57cec5SDimitry Andric { 49020b57cec5SDimitry Andric return (__m128d)__a; 49030b57cec5SDimitry Andric } 49040b57cec5SDimitry Andric 49050b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 49060b57cec5SDimitry Andric /// integer vector. 49070b57cec5SDimitry Andric /// 49080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49090b57cec5SDimitry Andric /// 49100b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 49110b57cec5SDimitry Andric /// 49120b57cec5SDimitry Andric /// \param __a 49130b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 49140b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 49150b57cec5SDimitry Andric /// parameter. 49160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 49170b57cec5SDimitry Andric _mm_castps_si128(__m128 __a) 49180b57cec5SDimitry Andric { 49190b57cec5SDimitry Andric return (__m128i)__a; 49200b57cec5SDimitry Andric } 49210b57cec5SDimitry Andric 49220b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 49230b57cec5SDimitry Andric /// of [4 x float]. 49240b57cec5SDimitry Andric /// 49250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49260b57cec5SDimitry Andric /// 49270b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 49280b57cec5SDimitry Andric /// 49290b57cec5SDimitry Andric /// \param __a 49300b57cec5SDimitry Andric /// A 128-bit integer vector. 49310b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 49320b57cec5SDimitry Andric /// bitwise pattern as the parameter. 49330b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 49340b57cec5SDimitry Andric _mm_castsi128_ps(__m128i __a) 49350b57cec5SDimitry Andric { 49360b57cec5SDimitry Andric return (__m128)__a; 49370b57cec5SDimitry Andric } 49380b57cec5SDimitry Andric 49390b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 49400b57cec5SDimitry Andric /// of [2 x double]. 49410b57cec5SDimitry Andric /// 49420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49430b57cec5SDimitry Andric /// 49440b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 49450b57cec5SDimitry Andric /// 49460b57cec5SDimitry Andric /// \param __a 49470b57cec5SDimitry Andric /// A 128-bit integer vector. 49480b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 49490b57cec5SDimitry Andric /// bitwise pattern as the parameter. 49500b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 49510b57cec5SDimitry Andric _mm_castsi128_pd(__m128i __a) 49520b57cec5SDimitry Andric { 49530b57cec5SDimitry Andric return (__m128d)__a; 49540b57cec5SDimitry Andric } 49550b57cec5SDimitry Andric 49560b57cec5SDimitry Andric #if defined(__cplusplus) 49570b57cec5SDimitry Andric extern "C" { 49580b57cec5SDimitry Andric #endif 49590b57cec5SDimitry Andric 49600b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of 49610b57cec5SDimitry Andric /// optimizing power consumption during the loop. 49620b57cec5SDimitry Andric /// 49630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 49640b57cec5SDimitry Andric /// 49650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction. 49660b57cec5SDimitry Andric /// 49670b57cec5SDimitry Andric void _mm_pause(void); 49680b57cec5SDimitry Andric 49690b57cec5SDimitry Andric #if defined(__cplusplus) 49700b57cec5SDimitry Andric } // extern "C" 49710b57cec5SDimitry Andric #endif 49720b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 49730b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 49740b57cec5SDimitry Andric 49750b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 49760b57cec5SDimitry Andric 49775ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_ON (0x0040U) 49785ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_OFF (0x0000U) 49790b57cec5SDimitry Andric 49805ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040U) 49810b57cec5SDimitry Andric 49820b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 49830b57cec5SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 49840b57cec5SDimitry Andric 49850b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */ 4986