10b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __EMMINTRIN_H 110b57cec5SDimitry Andric #define __EMMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <xmmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); 200b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); 2381ad6265SDimitry Andric typedef long long __m128i_u 2481ad6265SDimitry Andric __attribute__((__vector_size__(16), __aligned__(1))); 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric /* Type defines. */ 270b57cec5SDimitry Andric typedef double __v2df __attribute__((__vector_size__(16))); 280b57cec5SDimitry Andric typedef long long __v2di __attribute__((__vector_size__(16))); 290b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16))); 300b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16))); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric /* Unsigned types */ 330b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__((__vector_size__(16))); 340b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16))); 350b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16))); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 380b57cec5SDimitry Andric * appear in the interface though. */ 390b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16))); 400b57cec5SDimitry Andric 41*bdd1243dSDimitry Andric #ifdef __SSE2__ 42*bdd1243dSDimitry Andric /* Both _Float16 and __bf16 require SSE2 being enabled. */ 43*bdd1243dSDimitry Andric typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); 44*bdd1243dSDimitry Andric typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16))); 45*bdd1243dSDimitry Andric typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1))); 46*bdd1243dSDimitry Andric 47*bdd1243dSDimitry Andric typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16))); 48*bdd1243dSDimitry Andric typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); 49*bdd1243dSDimitry Andric #endif 50*bdd1243dSDimitry Andric 510b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 5281ad6265SDimitry Andric #define __DEFAULT_FN_ATTRS \ 5381ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ 5481ad6265SDimitry Andric __min_vector_width__(128))) 5581ad6265SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX \ 5681ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \ 5781ad6265SDimitry Andric __min_vector_width__(64))) 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the 600b57cec5SDimitry Andric /// sum in the lower 64 bits of the result. The upper 64 bits of the result 610b57cec5SDimitry Andric /// are copied from the upper double-precision value of the first operand. 620b57cec5SDimitry Andric /// 630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 640b57cec5SDimitry Andric /// 650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. 660b57cec5SDimitry Andric /// 670b57cec5SDimitry Andric /// \param __a 680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 690b57cec5SDimitry Andric /// \param __b 700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 710b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 720b57cec5SDimitry Andric /// sum of the lower 64 bits of both operands. The upper 64 bits are copied 730b57cec5SDimitry Andric /// from the upper 64 bits of the first source operand. 7481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, 7581ad6265SDimitry Andric __m128d __b) { 760b57cec5SDimitry Andric __a[0] += __b[0]; 770b57cec5SDimitry Andric return __a; 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double]. 810b57cec5SDimitry Andric /// 820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 830b57cec5SDimitry Andric /// 840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. 850b57cec5SDimitry Andric /// 860b57cec5SDimitry Andric /// \param __a 870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 880b57cec5SDimitry Andric /// \param __b 890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both 910b57cec5SDimitry Andric /// operands. 9281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, 9381ad6265SDimitry Andric __m128d __b) { 940b57cec5SDimitry Andric return (__m128d)((__v2df)__a + (__v2df)__b); 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand 980b57cec5SDimitry Andric /// from the lower double-precision value of the first operand and returns 990b57cec5SDimitry Andric /// the difference in the lower 64 bits of the result. The upper 64 bits of 1000b57cec5SDimitry Andric /// the result are copied from the upper double-precision value of the first 1010b57cec5SDimitry Andric /// operand. 1020b57cec5SDimitry Andric /// 1030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1040b57cec5SDimitry Andric /// 1050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. 1060b57cec5SDimitry Andric /// 1070b57cec5SDimitry Andric /// \param __a 1080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1090b57cec5SDimitry Andric /// \param __b 1100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1110b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1120b57cec5SDimitry Andric /// difference of the lower 64 bits of both operands. The upper 64 bits are 1130b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 11481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, 11581ad6265SDimitry Andric __m128d __b) { 1160b57cec5SDimitry Andric __a[0] -= __b[0]; 1170b57cec5SDimitry Andric return __a; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double]. 1210b57cec5SDimitry Andric /// 1220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1230b57cec5SDimitry Andric /// 1240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. 1250b57cec5SDimitry Andric /// 1260b57cec5SDimitry Andric /// \param __a 1270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1280b57cec5SDimitry Andric /// \param __b 1290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1300b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between 1310b57cec5SDimitry Andric /// both operands. 13281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, 13381ad6265SDimitry Andric __m128d __b) { 1340b57cec5SDimitry Andric return (__m128d)((__v2df)__a - (__v2df)__b); 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns 1380b57cec5SDimitry Andric /// the product in the lower 64 bits of the result. The upper 64 bits of the 1390b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1400b57cec5SDimitry Andric /// operand. 1410b57cec5SDimitry Andric /// 1420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1430b57cec5SDimitry Andric /// 1440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. 1450b57cec5SDimitry Andric /// 1460b57cec5SDimitry Andric /// \param __a 1470b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1480b57cec5SDimitry Andric /// \param __b 1490b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1500b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1510b57cec5SDimitry Andric /// product of the lower 64 bits of both operands. The upper 64 bits are 1520b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 15381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, 15481ad6265SDimitry Andric __m128d __b) { 1550b57cec5SDimitry Andric __a[0] *= __b[0]; 1560b57cec5SDimitry Andric return __a; 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double]. 1600b57cec5SDimitry Andric /// 1610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1620b57cec5SDimitry Andric /// 1630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. 1640b57cec5SDimitry Andric /// 1650b57cec5SDimitry Andric /// \param __a 1660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1670b57cec5SDimitry Andric /// \param __b 1680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1690b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both 1700b57cec5SDimitry Andric /// operands. 17181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, 17281ad6265SDimitry Andric __m128d __b) { 1730b57cec5SDimitry Andric return (__m128d)((__v2df)__a * (__v2df)__b); 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the 1770b57cec5SDimitry Andric /// lower double-precision value of the second operand and returns the 1780b57cec5SDimitry Andric /// quotient in the lower 64 bits of the result. The upper 64 bits of the 1790b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1800b57cec5SDimitry Andric /// operand. 1810b57cec5SDimitry Andric /// 1820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1830b57cec5SDimitry Andric /// 1840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. 1850b57cec5SDimitry Andric /// 1860b57cec5SDimitry Andric /// \param __a 1870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1880b57cec5SDimitry Andric /// \param __b 1890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing divisor. 1900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1910b57cec5SDimitry Andric /// quotient of the lower 64 bits of both operands. The upper 64 bits are 1920b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 19381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, 19481ad6265SDimitry Andric __m128d __b) { 1950b57cec5SDimitry Andric __a[0] /= __b[0]; 1960b57cec5SDimitry Andric return __a; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of 2000b57cec5SDimitry Andric /// [2 x double]. 2010b57cec5SDimitry Andric /// 2020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2030b57cec5SDimitry Andric /// 2040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. 2050b57cec5SDimitry Andric /// 2060b57cec5SDimitry Andric /// \param __a 2070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 2080b57cec5SDimitry Andric /// \param __b 2090b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the divisor. 2100b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both 2110b57cec5SDimitry Andric /// operands. 21281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, 21381ad6265SDimitry Andric __m128d __b) { 2140b57cec5SDimitry Andric return (__m128d)((__v2df)__a / (__v2df)__b); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of 2180b57cec5SDimitry Andric /// the second operand and returns it in the lower 64 bits of the result. 2190b57cec5SDimitry Andric /// The upper 64 bits of the result are copied from the upper 2200b57cec5SDimitry Andric /// double-precision value of the first operand. 2210b57cec5SDimitry Andric /// 2220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2230b57cec5SDimitry Andric /// 2240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. 2250b57cec5SDimitry Andric /// 2260b57cec5SDimitry Andric /// \param __a 2270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2280b57cec5SDimitry Andric /// upper 64 bits of this operand are copied to the upper 64 bits of the 2290b57cec5SDimitry Andric /// result. 2300b57cec5SDimitry Andric /// \param __b 2310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2320b57cec5SDimitry Andric /// square root is calculated using the lower 64 bits of this operand. 2330b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2340b57cec5SDimitry Andric /// square root of the lower 64 bits of operand \a __b, and whose upper 64 2350b57cec5SDimitry Andric /// bits are copied from the upper 64 bits of operand \a __a. 23681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, 23781ad6265SDimitry Andric __m128d __b) { 2380b57cec5SDimitry Andric __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); 2390b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a 2430b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 2440b57cec5SDimitry Andric /// 2450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2460b57cec5SDimitry Andric /// 2470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. 2480b57cec5SDimitry Andric /// 2490b57cec5SDimitry Andric /// \param __a 2500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 2510b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the 2520b57cec5SDimitry Andric /// values in the operand. 25381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { 2540b57cec5SDimitry Andric return __builtin_ia32_sqrtpd((__v2df)__a); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2580b57cec5SDimitry Andric /// returns the lesser of the pair of values in the lower 64-bits of the 2590b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2600b57cec5SDimitry Andric /// double-precision value of the first operand. 2610b57cec5SDimitry Andric /// 2620b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2630b57cec5SDimitry Andric /// 2640b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. 2650b57cec5SDimitry Andric /// 2660b57cec5SDimitry Andric /// \param __a 2670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2680b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2690b57cec5SDimitry Andric /// \param __b 2700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2710b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2720b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2730b57cec5SDimitry Andric /// minimum value between both operands. The upper 64 bits are copied from 2740b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 27581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, 27681ad6265SDimitry Andric __m128d __b) { 2770b57cec5SDimitry Andric return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 2810b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the lesser of each pair of 2820b57cec5SDimitry Andric /// values. 2830b57cec5SDimitry Andric /// 2840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2850b57cec5SDimitry Andric /// 2860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. 2870b57cec5SDimitry Andric /// 2880b57cec5SDimitry Andric /// \param __a 2890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2900b57cec5SDimitry Andric /// \param __b 2910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2920b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values 2930b57cec5SDimitry Andric /// between both operands. 29481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, 29581ad6265SDimitry Andric __m128d __b) { 2960b57cec5SDimitry Andric return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 3000b57cec5SDimitry Andric /// returns the greater of the pair of values in the lower 64-bits of the 3010b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 3020b57cec5SDimitry Andric /// double-precision value of the first operand. 3030b57cec5SDimitry Andric /// 3040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3050b57cec5SDimitry Andric /// 3060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. 3070b57cec5SDimitry Andric /// 3080b57cec5SDimitry Andric /// \param __a 3090b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3100b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3110b57cec5SDimitry Andric /// \param __b 3120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3130b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3140b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 3150b57cec5SDimitry Andric /// maximum value between both operands. The upper 64 bits are copied from 3160b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 31781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, 31881ad6265SDimitry Andric __m128d __b) { 3190b57cec5SDimitry Andric return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 3230b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the greater of each pair 3240b57cec5SDimitry Andric /// of values. 3250b57cec5SDimitry Andric /// 3260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3270b57cec5SDimitry Andric /// 3280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. 3290b57cec5SDimitry Andric /// 3300b57cec5SDimitry Andric /// \param __a 3310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3320b57cec5SDimitry Andric /// \param __b 3330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3340b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values 3350b57cec5SDimitry Andric /// between both operands. 33681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, 33781ad6265SDimitry Andric __m128d __b) { 3380b57cec5SDimitry Andric return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); 3390b57cec5SDimitry Andric } 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. 3420b57cec5SDimitry Andric /// 3430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3440b57cec5SDimitry Andric /// 3450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 3460b57cec5SDimitry Andric /// 3470b57cec5SDimitry Andric /// \param __a 3480b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3490b57cec5SDimitry Andric /// \param __b 3500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3510b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3520b57cec5SDimitry Andric /// values between both operands. 35381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, 35481ad6265SDimitry Andric __m128d __b) { 3550b57cec5SDimitry Andric return (__m128d)((__v2du)__a & (__v2du)__b); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using 3590b57cec5SDimitry Andric /// the one's complement of the values contained in the first source operand. 3600b57cec5SDimitry Andric /// 3610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3620b57cec5SDimitry Andric /// 3630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 3640b57cec5SDimitry Andric /// 3650b57cec5SDimitry Andric /// \param __a 3660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. The 3670b57cec5SDimitry Andric /// one's complement of this value is used in the bitwise AND. 3680b57cec5SDimitry Andric /// \param __b 3690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 3700b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3710b57cec5SDimitry Andric /// values in the second operand and the one's complement of the first 3720b57cec5SDimitry Andric /// operand. 37381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, 37481ad6265SDimitry Andric __m128d __b) { 3750b57cec5SDimitry Andric return (__m128d)(~(__v2du)__a & (__v2du)__b); 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. 3790b57cec5SDimitry Andric /// 3800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3810b57cec5SDimitry Andric /// 3820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 3830b57cec5SDimitry Andric /// 3840b57cec5SDimitry Andric /// \param __a 3850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3860b57cec5SDimitry Andric /// \param __b 3870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3880b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the 3890b57cec5SDimitry Andric /// values between both operands. 39081ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, 39181ad6265SDimitry Andric __m128d __b) { 3920b57cec5SDimitry Andric return (__m128d)((__v2du)__a | (__v2du)__b); 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. 3960b57cec5SDimitry Andric /// 3970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3980b57cec5SDimitry Andric /// 3990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 4000b57cec5SDimitry Andric /// 4010b57cec5SDimitry Andric /// \param __a 4020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4030b57cec5SDimitry Andric /// \param __b 4040b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4050b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the 4060b57cec5SDimitry Andric /// values between both operands. 40781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, 40881ad6265SDimitry Andric __m128d __b) { 4090b57cec5SDimitry Andric return (__m128d)((__v2du)__a ^ (__v2du)__b); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4130b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 4140b57cec5SDimitry Andric /// for false, 0xFFFFFFFFFFFFFFFF for true. 4150b57cec5SDimitry Andric /// 4160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4170b57cec5SDimitry Andric /// 4180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. 4190b57cec5SDimitry Andric /// 4200b57cec5SDimitry Andric /// \param __a 4210b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4220b57cec5SDimitry Andric /// \param __b 4230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4240b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 42581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, 42681ad6265SDimitry Andric __m128d __b) { 4270b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4310b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4320b57cec5SDimitry Andric /// operand are less than those in the second operand. Each comparison 4330b57cec5SDimitry Andric /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4340b57cec5SDimitry Andric /// 4350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4360b57cec5SDimitry Andric /// 4370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4380b57cec5SDimitry Andric /// 4390b57cec5SDimitry Andric /// \param __a 4400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4410b57cec5SDimitry Andric /// \param __b 4420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4430b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 44481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, 44581ad6265SDimitry Andric __m128d __b) { 4460b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4500b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4510b57cec5SDimitry Andric /// operand are less than or equal to those in the second operand. 4520b57cec5SDimitry Andric /// 4530b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4540b57cec5SDimitry Andric /// 4550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4560b57cec5SDimitry Andric /// 4570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4580b57cec5SDimitry Andric /// 4590b57cec5SDimitry Andric /// \param __a 4600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4610b57cec5SDimitry Andric /// \param __b 4620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4630b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 46481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, 46581ad6265SDimitry Andric __m128d __b) { 4660b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4700b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4710b57cec5SDimitry Andric /// operand are greater than those in the second operand. 4720b57cec5SDimitry Andric /// 4730b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4740b57cec5SDimitry Andric /// 4750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4760b57cec5SDimitry Andric /// 4770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4780b57cec5SDimitry Andric /// 4790b57cec5SDimitry Andric /// \param __a 4800b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4810b57cec5SDimitry Andric /// \param __b 4820b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4830b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 48481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, 48581ad6265SDimitry Andric __m128d __b) { 4860b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); 4870b57cec5SDimitry Andric } 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4900b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4910b57cec5SDimitry Andric /// operand are greater than or equal to those in the second operand. 4920b57cec5SDimitry Andric /// 4930b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4940b57cec5SDimitry Andric /// 4950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4960b57cec5SDimitry Andric /// 4970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4980b57cec5SDimitry Andric /// 4990b57cec5SDimitry Andric /// \param __a 5000b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5010b57cec5SDimitry Andric /// \param __b 5020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5030b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 50481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, 50581ad6265SDimitry Andric __m128d __b) { 5060b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5100b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5110b57cec5SDimitry Andric /// operand are ordered with respect to those in the second operand. 5120b57cec5SDimitry Andric /// 5130b57cec5SDimitry Andric /// A pair of double-precision values are "ordered" with respect to each 5140b57cec5SDimitry Andric /// other if neither value is a NaN. Each comparison yields 0x0 for false, 5150b57cec5SDimitry Andric /// 0xFFFFFFFFFFFFFFFF for true. 5160b57cec5SDimitry Andric /// 5170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5180b57cec5SDimitry Andric /// 5190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. 5200b57cec5SDimitry Andric /// 5210b57cec5SDimitry Andric /// \param __a 5220b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5230b57cec5SDimitry Andric /// \param __b 5240b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5250b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 52681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, 52781ad6265SDimitry Andric __m128d __b) { 5280b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5320b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5330b57cec5SDimitry Andric /// operand are unordered with respect to those in the second operand. 5340b57cec5SDimitry Andric /// 5350b57cec5SDimitry Andric /// A pair of double-precision values are "unordered" with respect to each 5360b57cec5SDimitry Andric /// other if one or both values are NaN. Each comparison yields 0x0 for 5370b57cec5SDimitry Andric /// false, 0xFFFFFFFFFFFFFFFF for true. 5380b57cec5SDimitry Andric /// 5390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5400b57cec5SDimitry Andric /// 5410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> 5420b57cec5SDimitry Andric /// instruction. 5430b57cec5SDimitry Andric /// 5440b57cec5SDimitry Andric /// \param __a 5450b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5460b57cec5SDimitry Andric /// \param __b 5470b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5480b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 54981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, 55081ad6265SDimitry Andric __m128d __b) { 5510b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5550b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5560b57cec5SDimitry Andric /// operand are unequal to those in the second operand. 5570b57cec5SDimitry Andric /// 5580b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5590b57cec5SDimitry Andric /// 5600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5610b57cec5SDimitry Andric /// 5620b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. 5630b57cec5SDimitry Andric /// 5640b57cec5SDimitry Andric /// \param __a 5650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5660b57cec5SDimitry Andric /// \param __b 5670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5680b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 56981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, 57081ad6265SDimitry Andric __m128d __b) { 5710b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5750b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5760b57cec5SDimitry Andric /// operand are not less than those in the second operand. 5770b57cec5SDimitry Andric /// 5780b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5790b57cec5SDimitry Andric /// 5800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5810b57cec5SDimitry Andric /// 5820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 5830b57cec5SDimitry Andric /// 5840b57cec5SDimitry Andric /// \param __a 5850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5860b57cec5SDimitry Andric /// \param __b 5870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5880b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 58981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, 59081ad6265SDimitry Andric __m128d __b) { 5910b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5950b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5960b57cec5SDimitry Andric /// operand are not less than or equal to those in the second operand. 5970b57cec5SDimitry Andric /// 5980b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5990b57cec5SDimitry Andric /// 6000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6010b57cec5SDimitry Andric /// 6020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6030b57cec5SDimitry Andric /// 6040b57cec5SDimitry Andric /// \param __a 6050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6060b57cec5SDimitry Andric /// \param __b 6070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6080b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 60981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, 61081ad6265SDimitry Andric __m128d __b) { 6110b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric 6140b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6150b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6160b57cec5SDimitry Andric /// operand are not greater than those in the second operand. 6170b57cec5SDimitry Andric /// 6180b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6190b57cec5SDimitry Andric /// 6200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6210b57cec5SDimitry Andric /// 6220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 6230b57cec5SDimitry Andric /// 6240b57cec5SDimitry Andric /// \param __a 6250b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6260b57cec5SDimitry Andric /// \param __b 6270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6280b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 62981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, 63081ad6265SDimitry Andric __m128d __b) { 6310b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); 6320b57cec5SDimitry Andric } 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6350b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6360b57cec5SDimitry Andric /// operand are not greater than or equal to those in the second operand. 6370b57cec5SDimitry Andric /// 6380b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6390b57cec5SDimitry Andric /// 6400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6410b57cec5SDimitry Andric /// 6420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6430b57cec5SDimitry Andric /// 6440b57cec5SDimitry Andric /// \param __a 6450b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6460b57cec5SDimitry Andric /// \param __b 6470b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6480b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 64981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, 65081ad6265SDimitry Andric __m128d __b) { 6510b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6550b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 6560b57cec5SDimitry Andric /// 6570b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6580b57cec5SDimitry Andric /// 6590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6600b57cec5SDimitry Andric /// 6610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. 6620b57cec5SDimitry Andric /// 6630b57cec5SDimitry Andric /// \param __a 6640b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6650b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6660b57cec5SDimitry Andric /// \param __b 6670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6680b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6690b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6700b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 67181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, 67281ad6265SDimitry Andric __m128d __b) { 6730b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6770b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 6780b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 6790b57cec5SDimitry Andric /// the second parameter. 6800b57cec5SDimitry Andric /// 6810b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6820b57cec5SDimitry Andric /// 6830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6840b57cec5SDimitry Andric /// 6850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 6860b57cec5SDimitry Andric /// 6870b57cec5SDimitry Andric /// \param __a 6880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6900b57cec5SDimitry Andric /// \param __b 6910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6920b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6930b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6940b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 69581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, 69681ad6265SDimitry Andric __m128d __b) { 6970b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7010b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7020b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 7030b57cec5SDimitry Andric /// corresponding value in the second parameter. 7040b57cec5SDimitry Andric /// 7050b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7060b57cec5SDimitry Andric /// 7070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7080b57cec5SDimitry Andric /// 7090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7100b57cec5SDimitry Andric /// 7110b57cec5SDimitry Andric /// \param __a 7120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7130b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7140b57cec5SDimitry Andric /// \param __b 7150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7160b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7170b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7180b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 71981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, 72081ad6265SDimitry Andric __m128d __b) { 7210b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7250b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7260b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 7270b57cec5SDimitry Andric /// in the second parameter. 7280b57cec5SDimitry Andric /// 7290b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7300b57cec5SDimitry Andric /// 7310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7320b57cec5SDimitry Andric /// 7330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7340b57cec5SDimitry Andric /// 7350b57cec5SDimitry Andric /// \param __a 7360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7380b57cec5SDimitry Andric /// \param __b 7390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7400b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7410b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7420b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 74381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, 74481ad6265SDimitry Andric __m128d __b) { 7450b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); 7460b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7500b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7510b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 7520b57cec5SDimitry Andric /// corresponding value in the second parameter. 7530b57cec5SDimitry Andric /// 7540b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7550b57cec5SDimitry Andric /// 7560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7570b57cec5SDimitry Andric /// 7580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7590b57cec5SDimitry Andric /// 7600b57cec5SDimitry Andric /// \param __a 7610b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7620b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7630b57cec5SDimitry Andric /// \param __b 7640b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7650b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7660b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7670b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 76881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, 76981ad6265SDimitry Andric __m128d __b) { 7700b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); 7710b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 7720b57cec5SDimitry Andric } 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7750b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7760b57cec5SDimitry Andric /// the value in the first parameter is "ordered" with respect to the 7770b57cec5SDimitry Andric /// corresponding value in the second parameter. 7780b57cec5SDimitry Andric /// 7790b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 7800b57cec5SDimitry Andric /// of double-precision values are "ordered" with respect to each other if 7810b57cec5SDimitry Andric /// neither value is a NaN. 7820b57cec5SDimitry Andric /// 7830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7840b57cec5SDimitry Andric /// 7850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. 7860b57cec5SDimitry Andric /// 7870b57cec5SDimitry Andric /// \param __a 7880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7900b57cec5SDimitry Andric /// \param __b 7910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7920b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7930b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7940b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 79581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, 79681ad6265SDimitry Andric __m128d __b) { 7970b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8010b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8020b57cec5SDimitry Andric /// the value in the first parameter is "unordered" with respect to the 8030b57cec5SDimitry Andric /// corresponding value in the second parameter. 8040b57cec5SDimitry Andric /// 8050b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 8060b57cec5SDimitry Andric /// of double-precision values are "unordered" with respect to each other if 8070b57cec5SDimitry Andric /// one or both values are NaN. 8080b57cec5SDimitry Andric /// 8090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8100b57cec5SDimitry Andric /// 8110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> 8120b57cec5SDimitry Andric /// instruction. 8130b57cec5SDimitry Andric /// 8140b57cec5SDimitry Andric /// \param __a 8150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8160b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8170b57cec5SDimitry Andric /// \param __b 8180b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8190b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8200b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8210b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 82281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, 82381ad6265SDimitry Andric __m128d __b) { 8240b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); 8250b57cec5SDimitry Andric } 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8280b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8290b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 8300b57cec5SDimitry Andric /// the second parameter. 8310b57cec5SDimitry Andric /// 8320b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8330b57cec5SDimitry Andric /// 8340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8350b57cec5SDimitry Andric /// 8360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. 8370b57cec5SDimitry Andric /// 8380b57cec5SDimitry Andric /// \param __a 8390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8400b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8410b57cec5SDimitry Andric /// \param __b 8420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8430b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8440b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8450b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 84681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, 84781ad6265SDimitry Andric __m128d __b) { 8480b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); 8490b57cec5SDimitry Andric } 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8520b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8530b57cec5SDimitry Andric /// the value in the first parameter is not less than the corresponding 8540b57cec5SDimitry Andric /// value in the second parameter. 8550b57cec5SDimitry Andric /// 8560b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8570b57cec5SDimitry Andric /// 8580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8590b57cec5SDimitry Andric /// 8600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 8610b57cec5SDimitry Andric /// 8620b57cec5SDimitry Andric /// \param __a 8630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8640b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8650b57cec5SDimitry Andric /// \param __b 8660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8670b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8680b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8690b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 87081ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, 87181ad6265SDimitry Andric __m128d __b) { 8720b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8760b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8770b57cec5SDimitry Andric /// the value in the first parameter is not less than or equal to the 8780b57cec5SDimitry Andric /// corresponding value in the second parameter. 8790b57cec5SDimitry Andric /// 8800b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8810b57cec5SDimitry Andric /// 8820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8830b57cec5SDimitry Andric /// 8840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 8850b57cec5SDimitry Andric /// 8860b57cec5SDimitry Andric /// \param __a 8870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8880b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8890b57cec5SDimitry Andric /// \param __b 8900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8910b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8920b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8930b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 89481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, 89581ad6265SDimitry Andric __m128d __b) { 8960b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9000b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9010b57cec5SDimitry Andric /// the value in the first parameter is not greater than the corresponding 9020b57cec5SDimitry Andric /// value in the second parameter. 9030b57cec5SDimitry Andric /// 9040b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9050b57cec5SDimitry Andric /// 9060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9070b57cec5SDimitry Andric /// 9080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 9090b57cec5SDimitry Andric /// 9100b57cec5SDimitry Andric /// \param __a 9110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9120b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9130b57cec5SDimitry Andric /// \param __b 9140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9150b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9160b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9170b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 91881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, 91981ad6265SDimitry Andric __m128d __b) { 9200b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); 9210b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric 9240b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9250b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9260b57cec5SDimitry Andric /// the value in the first parameter is not greater than or equal to the 9270b57cec5SDimitry Andric /// corresponding value in the second parameter. 9280b57cec5SDimitry Andric /// 9290b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9300b57cec5SDimitry Andric /// 9310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9320b57cec5SDimitry Andric /// 9330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9340b57cec5SDimitry Andric /// 9350b57cec5SDimitry Andric /// \param __a 9360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9380b57cec5SDimitry Andric /// \param __b 9390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9400b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9410b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9420b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 94381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, 94481ad6265SDimitry Andric __m128d __b) { 9450b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); 9460b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 9470b57cec5SDimitry Andric } 9480b57cec5SDimitry Andric 9490b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9500b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 9510b57cec5SDimitry Andric /// 9520b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9530b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9540b57cec5SDimitry Andric /// 9550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9560b57cec5SDimitry Andric /// 9570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9580b57cec5SDimitry Andric /// 9590b57cec5SDimitry Andric /// \param __a 9600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9610b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9620b57cec5SDimitry Andric /// \param __b 9630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9640b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9650b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 9660b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 96781ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, 96881ad6265SDimitry Andric __m128d __b) { 9690b57cec5SDimitry Andric return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); 9700b57cec5SDimitry Andric } 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9730b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9740b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 9750b57cec5SDimitry Andric /// the second parameter. 9760b57cec5SDimitry Andric /// 9770b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9780b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9790b57cec5SDimitry Andric /// 9800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9810b57cec5SDimitry Andric /// 9820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9830b57cec5SDimitry Andric /// 9840b57cec5SDimitry Andric /// \param __a 9850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9860b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9870b57cec5SDimitry Andric /// \param __b 9880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9900b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 9910b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 99281ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, 99381ad6265SDimitry Andric __m128d __b) { 9940b57cec5SDimitry Andric return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); 9950b57cec5SDimitry Andric } 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9980b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9990b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 10000b57cec5SDimitry Andric /// corresponding value in the second parameter. 10010b57cec5SDimitry Andric /// 10020b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10030b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10040b57cec5SDimitry Andric /// 10050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10060b57cec5SDimitry Andric /// 10070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10080b57cec5SDimitry Andric /// 10090b57cec5SDimitry Andric /// \param __a 10100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10110b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10120b57cec5SDimitry Andric /// \param __b 10130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10140b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10150b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10160b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 101781ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, 101881ad6265SDimitry Andric __m128d __b) { 10190b57cec5SDimitry Andric return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); 10200b57cec5SDimitry Andric } 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10230b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10240b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 10250b57cec5SDimitry Andric /// in the second parameter. 10260b57cec5SDimitry Andric /// 10270b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10280b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10290b57cec5SDimitry Andric /// 10300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10310b57cec5SDimitry Andric /// 10320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10330b57cec5SDimitry Andric /// 10340b57cec5SDimitry Andric /// \param __a 10350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10360b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10370b57cec5SDimitry Andric /// \param __b 10380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10390b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10400b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10410b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 104281ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, 104381ad6265SDimitry Andric __m128d __b) { 10440b57cec5SDimitry Andric return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); 10450b57cec5SDimitry Andric } 10460b57cec5SDimitry Andric 10470b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10480b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10490b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 10500b57cec5SDimitry Andric /// corresponding value in the second parameter. 10510b57cec5SDimitry Andric /// 10520b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10530b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10540b57cec5SDimitry Andric /// 10550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10560b57cec5SDimitry Andric /// 10570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10580b57cec5SDimitry Andric /// 10590b57cec5SDimitry Andric /// \param __a 10600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10610b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10620b57cec5SDimitry Andric /// \param __b 10630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10640b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10650b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10660b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 106781ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, 106881ad6265SDimitry Andric __m128d __b) { 10690b57cec5SDimitry Andric return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); 10700b57cec5SDimitry Andric } 10710b57cec5SDimitry Andric 10720b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10730b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10740b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 10750b57cec5SDimitry Andric /// the second parameter. 10760b57cec5SDimitry Andric /// 10770b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10780b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 10790b57cec5SDimitry Andric /// 10800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10810b57cec5SDimitry Andric /// 10820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10830b57cec5SDimitry Andric /// 10840b57cec5SDimitry Andric /// \param __a 10850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10860b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10870b57cec5SDimitry Andric /// \param __b 10880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10900b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10910b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 109281ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, 109381ad6265SDimitry Andric __m128d __b) { 10940b57cec5SDimitry Andric return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); 10950b57cec5SDimitry Andric } 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10980b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. The 10990b57cec5SDimitry Andric /// comparison yields 0 for false, 1 for true. 11000b57cec5SDimitry Andric /// 11010b57cec5SDimitry Andric /// If either of the two lower double-precision values is NaN, 0 is returned. 11020b57cec5SDimitry Andric /// 11030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11040b57cec5SDimitry Andric /// 11050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11060b57cec5SDimitry Andric /// 11070b57cec5SDimitry Andric /// \param __a 11080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11090b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11100b57cec5SDimitry Andric /// \param __b 11110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11120b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11130b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11140b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 111581ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, 111681ad6265SDimitry Andric __m128d __b) { 11170b57cec5SDimitry Andric return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); 11180b57cec5SDimitry Andric } 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11210b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11220b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 11230b57cec5SDimitry Andric /// the second parameter. 11240b57cec5SDimitry Andric /// 11250b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11260b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11270b57cec5SDimitry Andric /// 11280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11290b57cec5SDimitry Andric /// 11300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11310b57cec5SDimitry Andric /// 11320b57cec5SDimitry Andric /// \param __a 11330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11340b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11350b57cec5SDimitry Andric /// \param __b 11360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11380b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11390b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 114081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, 114181ad6265SDimitry Andric __m128d __b) { 11420b57cec5SDimitry Andric return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); 11430b57cec5SDimitry Andric } 11440b57cec5SDimitry Andric 11450b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11460b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11470b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 11480b57cec5SDimitry Andric /// corresponding value in the second parameter. 11490b57cec5SDimitry Andric /// 11500b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11510b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11520b57cec5SDimitry Andric /// 11530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11540b57cec5SDimitry Andric /// 11550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11560b57cec5SDimitry Andric /// 11570b57cec5SDimitry Andric /// \param __a 11580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11590b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11600b57cec5SDimitry Andric /// \param __b 11610b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11620b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11630b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11640b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 116581ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, 116681ad6265SDimitry Andric __m128d __b) { 11670b57cec5SDimitry Andric return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); 11680b57cec5SDimitry Andric } 11690b57cec5SDimitry Andric 11700b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11710b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11720b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 11730b57cec5SDimitry Andric /// in the second parameter. 11740b57cec5SDimitry Andric /// 11750b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11760b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11770b57cec5SDimitry Andric /// 11780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11790b57cec5SDimitry Andric /// 11800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11810b57cec5SDimitry Andric /// 11820b57cec5SDimitry Andric /// \param __a 11830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11840b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11850b57cec5SDimitry Andric /// \param __b 11860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11870b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11880b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11890b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 119081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, 119181ad6265SDimitry Andric __m128d __b) { 11920b57cec5SDimitry Andric return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); 11930b57cec5SDimitry Andric } 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11960b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11970b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 11980b57cec5SDimitry Andric /// corresponding value in the second parameter. 11990b57cec5SDimitry Andric /// 12000b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 12010b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 12020b57cec5SDimitry Andric /// 12030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12040b57cec5SDimitry Andric /// 12050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12060b57cec5SDimitry Andric /// 12070b57cec5SDimitry Andric /// \param __a 12080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12090b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12100b57cec5SDimitry Andric /// \param __b 12110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12120b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12130b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12140b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 121581ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, 121681ad6265SDimitry Andric __m128d __b) { 12170b57cec5SDimitry Andric return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); 12180b57cec5SDimitry Andric } 12190b57cec5SDimitry Andric 12200b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12210b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12220b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 12230b57cec5SDimitry Andric /// the second parameter. 12240b57cec5SDimitry Andric /// 12250b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 12260b57cec5SDimitry Andric /// double-precision values is NaN, 1 is returned. 12270b57cec5SDimitry Andric /// 12280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12290b57cec5SDimitry Andric /// 12300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12310b57cec5SDimitry Andric /// 12320b57cec5SDimitry Andric /// \param __a 12330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12340b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12350b57cec5SDimitry Andric /// \param __b 12360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12370b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12380b57cec5SDimitry Andric /// \returns An integer containing the comparison result. If either of the two 12390b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 124081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, 124181ad6265SDimitry Andric __m128d __b) { 12420b57cec5SDimitry Andric return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); 12430b57cec5SDimitry Andric } 12440b57cec5SDimitry Andric 12450b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 12460b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two single-precision floating-point 12470b57cec5SDimitry Andric /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. 12480b57cec5SDimitry Andric /// The upper 64 bits of the result vector are set to zero. 12490b57cec5SDimitry Andric /// 12500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12510b57cec5SDimitry Andric /// 12520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. 12530b57cec5SDimitry Andric /// 12540b57cec5SDimitry Andric /// \param __a 12550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 12560b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the 12570b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 125881ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { 12590b57cec5SDimitry Andric return __builtin_ia32_cvtpd2ps((__v2df)__a); 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a 12630b57cec5SDimitry Andric /// 128-bit vector of [4 x float] into two double-precision floating-point 12640b57cec5SDimitry Andric /// values, returned in a 128-bit vector of [2 x double]. The upper two 12650b57cec5SDimitry Andric /// elements of the input vector are unused. 12660b57cec5SDimitry Andric /// 12670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12680b57cec5SDimitry Andric /// 12690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. 12700b57cec5SDimitry Andric /// 12710b57cec5SDimitry Andric /// \param __a 12720b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower two single-precision 12730b57cec5SDimitry Andric /// floating-point elements are converted to double-precision values. The 12740b57cec5SDimitry Andric /// upper two elements are unused. 12750b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 127681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { 12770b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 12780b57cec5SDimitry Andric __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of 12820b57cec5SDimitry Andric /// [4 x i32] into two double-precision floating-point values, returned in a 12830b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 12840b57cec5SDimitry Andric /// 12850b57cec5SDimitry Andric /// The upper two elements of the input vector are unused. 12860b57cec5SDimitry Andric /// 12870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12880b57cec5SDimitry Andric /// 12890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. 12900b57cec5SDimitry Andric /// 12910b57cec5SDimitry Andric /// \param __a 12920b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are 12930b57cec5SDimitry Andric /// converted to double-precision values. 12940b57cec5SDimitry Andric /// 12950b57cec5SDimitry Andric /// The upper two elements are unused. 12960b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 129781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { 12980b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 12990b57cec5SDimitry Andric __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 13030b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 13040b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper 13050b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 13060b57cec5SDimitry Andric /// 13070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13080b57cec5SDimitry Andric /// 13090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. 13100b57cec5SDimitry Andric /// 13110b57cec5SDimitry Andric /// \param __a 13120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 13130b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 13140b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 131581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { 13160b57cec5SDimitry Andric return __builtin_ia32_cvtpd2dq((__v2df)__a); 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double] 13200b57cec5SDimitry Andric /// into a 32-bit signed integer value. 13210b57cec5SDimitry Andric /// 13220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13230b57cec5SDimitry Andric /// 13240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 13250b57cec5SDimitry Andric /// 13260b57cec5SDimitry Andric /// \param __a 13270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 13280b57cec5SDimitry Andric /// conversion. 13290b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 133081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { 13310b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si((__v2df)__a); 13320b57cec5SDimitry Andric } 13330b57cec5SDimitry Andric 13340b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a 13350b57cec5SDimitry Andric /// 128-bit vector of [2 x double], in the second parameter, into a 13360b57cec5SDimitry Andric /// single-precision floating-point value, returned in the lower 32 bits of a 13370b57cec5SDimitry Andric /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are 13380b57cec5SDimitry Andric /// copied from the upper 96 bits of the first parameter. 13390b57cec5SDimitry Andric /// 13400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13410b57cec5SDimitry Andric /// 13420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. 13430b57cec5SDimitry Andric /// 13440b57cec5SDimitry Andric /// \param __a 13450b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are 13460b57cec5SDimitry Andric /// copied to the upper 96 bits of the result. 13470b57cec5SDimitry Andric /// \param __b 13480b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision 13490b57cec5SDimitry Andric /// floating-point element is used in the conversion. 13500b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the 13510b57cec5SDimitry Andric /// converted value from the second parameter. The upper 96 bits are copied 13520b57cec5SDimitry Andric /// from the upper 96 bits of the first parameter. 135381ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, 135481ad6265SDimitry Andric __m128d __b) { 13550b57cec5SDimitry Andric return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); 13560b57cec5SDimitry Andric } 13570b57cec5SDimitry Andric 13580b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into 13590b57cec5SDimitry Andric /// a double-precision floating-point value, returned in the lower 64 bits of 13600b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 13610b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 13620b57cec5SDimitry Andric /// 13630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13640b57cec5SDimitry Andric /// 13650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 13660b57cec5SDimitry Andric /// 13670b57cec5SDimitry Andric /// \param __a 13680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 13690b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 13700b57cec5SDimitry Andric /// \param __b 13710b57cec5SDimitry Andric /// A 32-bit signed integer containing the value to be converted. 13720b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 13730b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 13740b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 137581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, 137681ad6265SDimitry Andric int __b) { 13770b57cec5SDimitry Andric __a[0] = __b; 13780b57cec5SDimitry Andric return __a; 13790b57cec5SDimitry Andric } 13800b57cec5SDimitry Andric 13810b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a 13820b57cec5SDimitry Andric /// 128-bit vector of [4 x float], in the second parameter, into a 13830b57cec5SDimitry Andric /// double-precision floating-point value, returned in the lower 64 bits of 13840b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 13850b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 13860b57cec5SDimitry Andric /// 13870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13880b57cec5SDimitry Andric /// 13890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. 13900b57cec5SDimitry Andric /// 13910b57cec5SDimitry Andric /// \param __a 13920b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 13930b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 13940b57cec5SDimitry Andric /// \param __b 13950b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower single-precision 13960b57cec5SDimitry Andric /// floating-point element is used in the conversion. 13970b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 13980b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 13990b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 140081ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, 140181ad6265SDimitry Andric __m128 __b) { 14020b57cec5SDimitry Andric __a[0] = __b[0]; 14030b57cec5SDimitry Andric return __a; 14040b57cec5SDimitry Andric } 14050b57cec5SDimitry Andric 14060b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14070b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14080b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. 14090b57cec5SDimitry Andric /// 14100b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 14110b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. The upper 14120b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 14130b57cec5SDimitry Andric /// 14140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14150b57cec5SDimitry Andric /// 14160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> 14170b57cec5SDimitry Andric /// instruction. 14180b57cec5SDimitry Andric /// 14190b57cec5SDimitry Andric /// \param __a 14200b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14210b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 14220b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 142381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { 14240b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); 14250b57cec5SDimitry Andric } 14260b57cec5SDimitry Andric 14270b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit 14280b57cec5SDimitry Andric /// signed integer value, truncating the result when it is inexact. 14290b57cec5SDimitry Andric /// 14300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14310b57cec5SDimitry Andric /// 14320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 14330b57cec5SDimitry Andric /// instruction. 14340b57cec5SDimitry Andric /// 14350b57cec5SDimitry Andric /// \param __a 14360b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 14370b57cec5SDimitry Andric /// conversion. 14380b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 143981ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { 14400b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si((__v2df)__a); 14410b57cec5SDimitry Andric } 14420b57cec5SDimitry Andric 14430b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14440b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14450b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 14460b57cec5SDimitry Andric /// 14470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14480b57cec5SDimitry Andric /// 14490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. 14500b57cec5SDimitry Andric /// 14510b57cec5SDimitry Andric /// \param __a 14520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14530b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 145481ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { 14550b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); 14560b57cec5SDimitry Andric } 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14590b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14600b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 14610b57cec5SDimitry Andric /// 14620b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 14630b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. 14640b57cec5SDimitry Andric /// 14650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14660b57cec5SDimitry Andric /// 14670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. 14680b57cec5SDimitry Andric /// 14690b57cec5SDimitry Andric /// \param __a 14700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14710b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 147281ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { 14730b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); 14740b57cec5SDimitry Andric } 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of 14770b57cec5SDimitry Andric /// [2 x i32] into two double-precision floating-point values, returned in a 14780b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 14790b57cec5SDimitry Andric /// 14800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14810b57cec5SDimitry Andric /// 14820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. 14830b57cec5SDimitry Andric /// 14840b57cec5SDimitry Andric /// \param __a 14850b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 14860b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 148781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { 14880b57cec5SDimitry Andric return __builtin_ia32_cvtpi2pd((__v2si)__a); 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric 14910b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as 14920b57cec5SDimitry Andric /// a double-precision floating-point value. 14930b57cec5SDimitry Andric /// 14940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14950b57cec5SDimitry Andric /// 14960b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 14970b57cec5SDimitry Andric /// 14980b57cec5SDimitry Andric /// \param __a 14990b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. 15000b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64 15010b57cec5SDimitry Andric /// bits of \a __a. 150281ad6265SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { 15030b57cec5SDimitry Andric return __a[0]; 15040b57cec5SDimitry Andric } 15050b57cec5SDimitry Andric 15060b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned 15070b57cec5SDimitry Andric /// memory location. 15080b57cec5SDimitry Andric /// 15090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15100b57cec5SDimitry Andric /// 15110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. 15120b57cec5SDimitry Andric /// 15130b57cec5SDimitry Andric /// \param __dp 15140b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15150b57cec5SDimitry Andric /// location has to be 16-byte aligned. 15160b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 151781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { 1518480093f4SDimitry Andric return *(const __m128d *)__dp; 15190b57cec5SDimitry Andric } 15200b57cec5SDimitry Andric 15210b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory 15220b57cec5SDimitry Andric /// location and duplicates it to both vector elements of a 128-bit vector of 15230b57cec5SDimitry Andric /// [2 x double]. 15240b57cec5SDimitry Andric /// 15250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15260b57cec5SDimitry Andric /// 15270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. 15280b57cec5SDimitry Andric /// 15290b57cec5SDimitry Andric /// \param __dp 15300b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 15310b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and 15320b57cec5SDimitry Andric /// duplicated values. 153381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { 15340b57cec5SDimitry Andric struct __mm_load1_pd_struct { 15350b57cec5SDimitry Andric double __u; 15360b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1537480093f4SDimitry Andric double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u; 15380b57cec5SDimitry Andric return __extension__(__m128d){__u, __u}; 15390b57cec5SDimitry Andric } 15400b57cec5SDimitry Andric 15410b57cec5SDimitry Andric #define _mm_load_pd1(dp) _mm_load1_pd(dp) 15420b57cec5SDimitry Andric 15430b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned 15440b57cec5SDimitry Andric /// memory location into a 128-bit vector of [2 x double]. 15450b57cec5SDimitry Andric /// 15460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15470b57cec5SDimitry Andric /// 15480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + 15490b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined 15500b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. 15510b57cec5SDimitry Andric /// 15520b57cec5SDimitry Andric /// \param __dp 15530b57cec5SDimitry Andric /// A 16-byte aligned pointer to an array of double-precision values to be 15540b57cec5SDimitry Andric /// loaded in reverse order. 15550b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded 15560b57cec5SDimitry Andric /// values. 155781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { 1558480093f4SDimitry Andric __m128d __u = *(const __m128d *)__dp; 15590b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); 15600b57cec5SDimitry Andric } 15610b57cec5SDimitry Andric 15620b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an 15630b57cec5SDimitry Andric /// unaligned memory location. 15640b57cec5SDimitry Andric /// 15650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15660b57cec5SDimitry Andric /// 15670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 15680b57cec5SDimitry Andric /// 15690b57cec5SDimitry Andric /// \param __dp 15700b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15710b57cec5SDimitry Andric /// location does not have to be aligned. 15720b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 157381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { 15740b57cec5SDimitry Andric struct __loadu_pd { 15750b57cec5SDimitry Andric __m128d_u __v; 15760b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1577480093f4SDimitry Andric return ((const struct __loadu_pd *)__dp)->__v; 15780b57cec5SDimitry Andric } 15790b57cec5SDimitry Andric 15800b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer 15810b57cec5SDimitry Andric /// vector and clears the upper element. 15820b57cec5SDimitry Andric /// 15830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15840b57cec5SDimitry Andric /// 15850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 15860b57cec5SDimitry Andric /// 15870b57cec5SDimitry Andric /// \param __a 15880b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 15890b57cec5SDimitry Andric /// location does not have to be aligned. 15900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value. 159181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { 15920b57cec5SDimitry Andric struct __loadu_si64 { 15930b57cec5SDimitry Andric long long __v; 15940b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1595480093f4SDimitry Andric long long __u = ((const struct __loadu_si64 *)__a)->__v; 15960b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__u, 0LL}; 15970b57cec5SDimitry Andric } 15980b57cec5SDimitry Andric 15990b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer 16000b57cec5SDimitry Andric /// vector and clears the upper element. 16010b57cec5SDimitry Andric /// 16020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16030b57cec5SDimitry Andric /// 16040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 16050b57cec5SDimitry Andric /// 16060b57cec5SDimitry Andric /// \param __a 16070b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 16080b57cec5SDimitry Andric /// location does not have to be aligned. 16090b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value. 161081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) { 16110b57cec5SDimitry Andric struct __loadu_si32 { 16120b57cec5SDimitry Andric int __v; 16130b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1614480093f4SDimitry Andric int __u = ((const struct __loadu_si32 *)__a)->__v; 16150b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__u, 0, 0, 0}; 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer 16190b57cec5SDimitry Andric /// vector and clears the upper element. 16200b57cec5SDimitry Andric /// 16210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16220b57cec5SDimitry Andric /// 16230b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 16240b57cec5SDimitry Andric /// 16250b57cec5SDimitry Andric /// \param __a 16260b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 16270b57cec5SDimitry Andric /// location does not have to be aligned. 16280b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value. 162981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) { 16300b57cec5SDimitry Andric struct __loadu_si16 { 16310b57cec5SDimitry Andric short __v; 16320b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1633480093f4SDimitry Andric short __u = ((const struct __loadu_si16 *)__a)->__v; 16340b57cec5SDimitry Andric return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric 16370b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a 16380b57cec5SDimitry Andric /// 128-bit integer vector and clears the upper element. 16390b57cec5SDimitry Andric /// 16400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16410b57cec5SDimitry Andric /// 16420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 16430b57cec5SDimitry Andric /// 16440b57cec5SDimitry Andric /// \param __dp 16450b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 16460b57cec5SDimitry Andric /// The address of the memory location does not have to be aligned. 16470b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value. 164881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { 16490b57cec5SDimitry Andric struct __mm_load_sd_struct { 16500b57cec5SDimitry Andric double __u; 16510b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1652480093f4SDimitry Andric double __u = ((const struct __mm_load_sd_struct *)__dp)->__u; 16530b57cec5SDimitry Andric return __extension__(__m128d){__u, 0}; 16540b57cec5SDimitry Andric } 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit 16570b57cec5SDimitry Andric /// vector of [2 x double]. The low-order bits are copied from the low-order 16580b57cec5SDimitry Andric /// bits of the first operand. 16590b57cec5SDimitry Andric /// 16600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16610b57cec5SDimitry Andric /// 16620b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 16630b57cec5SDimitry Andric /// 16640b57cec5SDimitry Andric /// \param __a 16650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 16660b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the result. 16670b57cec5SDimitry Andric /// \param __dp 16680b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 16690b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 16700b57cec5SDimitry Andric /// [127:64] of the result. The address of the memory location does not have 16710b57cec5SDimitry Andric /// to be aligned. 16720b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 167381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, 167481ad6265SDimitry Andric double const *__dp) { 16750b57cec5SDimitry Andric struct __mm_loadh_pd_struct { 16760b57cec5SDimitry Andric double __u; 16770b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1678480093f4SDimitry Andric double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u; 16790b57cec5SDimitry Andric return __extension__(__m128d){__a[0], __u}; 16800b57cec5SDimitry Andric } 16810b57cec5SDimitry Andric 16820b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit 16830b57cec5SDimitry Andric /// vector of [2 x double]. The high-order bits are copied from the 16840b57cec5SDimitry Andric /// high-order bits of the first operand. 16850b57cec5SDimitry Andric /// 16860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16870b57cec5SDimitry Andric /// 16880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 16890b57cec5SDimitry Andric /// 16900b57cec5SDimitry Andric /// \param __a 16910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 16920b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the result. 16930b57cec5SDimitry Andric /// \param __dp 16940b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 16950b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 16960b57cec5SDimitry Andric /// [63:0] of the result. The address of the memory location does not have to 16970b57cec5SDimitry Andric /// be aligned. 16980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 169981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, 170081ad6265SDimitry Andric double const *__dp) { 17010b57cec5SDimitry Andric struct __mm_loadl_pd_struct { 17020b57cec5SDimitry Andric double __u; 17030b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1704480093f4SDimitry Andric double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u; 17050b57cec5SDimitry Andric return __extension__(__m128d){__u, __a[1]}; 17060b57cec5SDimitry Andric } 17070b57cec5SDimitry Andric 17080b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with 17090b57cec5SDimitry Andric /// unspecified content. This could be used as an argument to another 17100b57cec5SDimitry Andric /// intrinsic function where the argument is required but the value is not 17110b57cec5SDimitry Andric /// actually used. 17120b57cec5SDimitry Andric /// 17130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17140b57cec5SDimitry Andric /// 17150b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 17160b57cec5SDimitry Andric /// 17170b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified 17180b57cec5SDimitry Andric /// content. 171981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { 17200b57cec5SDimitry Andric return (__m128d)__builtin_ia32_undef128(); 17210b57cec5SDimitry Andric } 17220b57cec5SDimitry Andric 17230b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 17240b57cec5SDimitry Andric /// 64 bits of the vector are initialized with the specified double-precision 17250b57cec5SDimitry Andric /// floating-point value. The upper 64 bits are set to zero. 17260b57cec5SDimitry Andric /// 17270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17280b57cec5SDimitry Andric /// 17290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 17300b57cec5SDimitry Andric /// 17310b57cec5SDimitry Andric /// \param __w 17320b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 17330b57cec5SDimitry Andric /// bits of the result. 17340b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The 17350b57cec5SDimitry Andric /// lower 64 bits contain the value of the parameter. The upper 64 bits are 17360b57cec5SDimitry Andric /// set to zero. 173781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { 17380b57cec5SDimitry Andric return __extension__(__m128d){__w, 0}; 17390b57cec5SDimitry Andric } 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 17420b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 17430b57cec5SDimitry Andric /// specified double-precision floating-point value. 17440b57cec5SDimitry Andric /// 17450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17460b57cec5SDimitry Andric /// 17470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 17480b57cec5SDimitry Andric /// 17490b57cec5SDimitry Andric /// \param __w 17500b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 17510b57cec5SDimitry Andric /// element of the result. 17520b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 175381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { 17540b57cec5SDimitry Andric return __extension__(__m128d){__w, __w}; 17550b57cec5SDimitry Andric } 17560b57cec5SDimitry Andric 17570b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 17580b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 17590b57cec5SDimitry Andric /// specified double-precision floating-point value. 17600b57cec5SDimitry Andric /// 17610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17620b57cec5SDimitry Andric /// 17630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 17640b57cec5SDimitry Andric /// 17650b57cec5SDimitry Andric /// \param __w 17660b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 17670b57cec5SDimitry Andric /// element of the result. 17680b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 176981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { 17700b57cec5SDimitry Andric return _mm_set1_pd(__w); 17710b57cec5SDimitry Andric } 17720b57cec5SDimitry Andric 17730b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 17740b57cec5SDimitry Andric /// initialized with the specified double-precision floating-point values. 17750b57cec5SDimitry Andric /// 17760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17770b57cec5SDimitry Andric /// 17780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 17790b57cec5SDimitry Andric /// 17800b57cec5SDimitry Andric /// \param __w 17810b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 17820b57cec5SDimitry Andric /// bits of the result. 17830b57cec5SDimitry Andric /// \param __x 17840b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 17850b57cec5SDimitry Andric /// bits of the result. 17860b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 178781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, 178881ad6265SDimitry Andric double __x) { 17890b57cec5SDimitry Andric return __extension__(__m128d){__x, __w}; 17900b57cec5SDimitry Andric } 17910b57cec5SDimitry Andric 17920b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], 17930b57cec5SDimitry Andric /// initialized in reverse order with the specified double-precision 17940b57cec5SDimitry Andric /// floating-point values. 17950b57cec5SDimitry Andric /// 17960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17970b57cec5SDimitry Andric /// 17980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 17990b57cec5SDimitry Andric /// 18000b57cec5SDimitry Andric /// \param __w 18010b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18020b57cec5SDimitry Andric /// bits of the result. 18030b57cec5SDimitry Andric /// \param __x 18040b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18050b57cec5SDimitry Andric /// bits of the result. 18060b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 180781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, 180881ad6265SDimitry Andric double __x) { 18090b57cec5SDimitry Andric return __extension__(__m128d){__w, __x}; 18100b57cec5SDimitry Andric } 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 18130b57cec5SDimitry Andric /// initialized to zero. 18140b57cec5SDimitry Andric /// 18150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18160b57cec5SDimitry Andric /// 18170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 18180b57cec5SDimitry Andric /// 18190b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with 18200b57cec5SDimitry Andric /// all elements set to zero. 182181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { 1822*bdd1243dSDimitry Andric return __extension__(__m128d){0.0, 0.0}; 18230b57cec5SDimitry Andric } 18240b57cec5SDimitry Andric 18250b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 18260b57cec5SDimitry Andric /// 64 bits are set to the lower 64 bits of the second parameter. The upper 18270b57cec5SDimitry Andric /// 64 bits are set to the upper 64 bits of the first parameter. 18280b57cec5SDimitry Andric /// 18290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18300b57cec5SDimitry Andric /// 18310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 18320b57cec5SDimitry Andric /// 18330b57cec5SDimitry Andric /// \param __a 18340b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the 18350b57cec5SDimitry Andric /// upper 64 bits of the result. 18360b57cec5SDimitry Andric /// \param __b 18370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the 18380b57cec5SDimitry Andric /// lower 64 bits of the result. 18390b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 184081ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, 184181ad6265SDimitry Andric __m128d __b) { 18420b57cec5SDimitry Andric __a[0] = __b[0]; 18430b57cec5SDimitry Andric return __a; 18440b57cec5SDimitry Andric } 18450b57cec5SDimitry Andric 18460b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 18470b57cec5SDimitry Andric /// memory location. 18480b57cec5SDimitry Andric /// 18490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18500b57cec5SDimitry Andric /// 18510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 18520b57cec5SDimitry Andric /// 18530b57cec5SDimitry Andric /// \param __dp 18540b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 18550b57cec5SDimitry Andric /// \param __a 18560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 185781ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, 185881ad6265SDimitry Andric __m128d __a) { 18590b57cec5SDimitry Andric struct __mm_store_sd_struct { 18600b57cec5SDimitry Andric double __u; 18610b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 18620b57cec5SDimitry Andric ((struct __mm_store_sd_struct *)__dp)->__u = __a[0]; 18630b57cec5SDimitry Andric } 18640b57cec5SDimitry Andric 18650b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of 18660b57cec5SDimitry Andric /// [2 x double] to a memory location. 18670b57cec5SDimitry Andric /// 18680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18690b57cec5SDimitry Andric /// 18700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. 18710b57cec5SDimitry Andric /// 18720b57cec5SDimitry Andric /// \param __dp 18730b57cec5SDimitry Andric /// A pointer to an aligned memory location that can store two 18740b57cec5SDimitry Andric /// double-precision values. 18750b57cec5SDimitry Andric /// \param __a 18760b57cec5SDimitry Andric /// A packed 128-bit vector of [2 x double] containing the values to be 18770b57cec5SDimitry Andric /// moved. 187881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, 187981ad6265SDimitry Andric __m128d __a) { 18800b57cec5SDimitry Andric *(__m128d *)__dp = __a; 18810b57cec5SDimitry Andric } 18820b57cec5SDimitry Andric 18830b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 18840b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 18850b57cec5SDimitry Andric /// 18860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18870b57cec5SDimitry Andric /// 18880b57cec5SDimitry Andric /// This intrinsic corresponds to the 18890b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 18900b57cec5SDimitry Andric /// 18910b57cec5SDimitry Andric /// \param __dp 18920b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 18930b57cec5SDimitry Andric /// values. 18940b57cec5SDimitry Andric /// \param __a 18950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 18960b57cec5SDimitry Andric /// of the values in \a __dp. 189781ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, 189881ad6265SDimitry Andric __m128d __a) { 18990b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 19000b57cec5SDimitry Andric _mm_store_pd(__dp, __a); 19010b57cec5SDimitry Andric } 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 19040b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 19050b57cec5SDimitry Andric /// 19060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19070b57cec5SDimitry Andric /// 19080b57cec5SDimitry Andric /// This intrinsic corresponds to the 19090b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 19100b57cec5SDimitry Andric /// 19110b57cec5SDimitry Andric /// \param __dp 19120b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 19130b57cec5SDimitry Andric /// values. 19140b57cec5SDimitry Andric /// \param __a 19150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 19160b57cec5SDimitry Andric /// of the values in \a __dp. 191781ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, 191881ad6265SDimitry Andric __m128d __a) { 19190b57cec5SDimitry Andric _mm_store1_pd(__dp, __a); 19200b57cec5SDimitry Andric } 19210b57cec5SDimitry Andric 19220b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory 19230b57cec5SDimitry Andric /// location. 19240b57cec5SDimitry Andric /// 19250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19260b57cec5SDimitry Andric /// 19270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 19280b57cec5SDimitry Andric /// 19290b57cec5SDimitry Andric /// \param __dp 19300b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 19310b57cec5SDimitry Andric /// location does not have to be aligned. 19320b57cec5SDimitry Andric /// \param __a 19330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be stored. 193481ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, 193581ad6265SDimitry Andric __m128d __a) { 19360b57cec5SDimitry Andric struct __storeu_pd { 19370b57cec5SDimitry Andric __m128d_u __v; 19380b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19390b57cec5SDimitry Andric ((struct __storeu_pd *)__dp)->__v = __a; 19400b57cec5SDimitry Andric } 19410b57cec5SDimitry Andric 19420b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit 19430b57cec5SDimitry Andric /// vector of [2 x double] to a 16-byte aligned memory location. 19440b57cec5SDimitry Andric /// 19450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19460b57cec5SDimitry Andric /// 19470b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a 19480b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction. 19490b57cec5SDimitry Andric /// 19500b57cec5SDimitry Andric /// \param __dp 19510b57cec5SDimitry Andric /// A pointer to a 16-byte aligned memory location that can store two 19520b57cec5SDimitry Andric /// double-precision values. 19530b57cec5SDimitry Andric /// \param __a 19540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be reversed and 19550b57cec5SDimitry Andric /// stored. 195681ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, 195781ad6265SDimitry Andric __m128d __a) { 19580b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); 19590b57cec5SDimitry Andric *(__m128d *)__dp = __a; 19600b57cec5SDimitry Andric } 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a 19630b57cec5SDimitry Andric /// memory location. 19640b57cec5SDimitry Andric /// 19650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19660b57cec5SDimitry Andric /// 19670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 19680b57cec5SDimitry Andric /// 19690b57cec5SDimitry Andric /// \param __dp 19700b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 19710b57cec5SDimitry Andric /// \param __a 19720b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 197381ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, 197481ad6265SDimitry Andric __m128d __a) { 19750b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 19760b57cec5SDimitry Andric double __u; 19770b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19780b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1]; 19790b57cec5SDimitry Andric } 19800b57cec5SDimitry Andric 19810b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 19820b57cec5SDimitry Andric /// memory location. 19830b57cec5SDimitry Andric /// 19840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19850b57cec5SDimitry Andric /// 19860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 19870b57cec5SDimitry Andric /// 19880b57cec5SDimitry Andric /// \param __dp 19890b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 19900b57cec5SDimitry Andric /// \param __a 19910b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 199281ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, 199381ad6265SDimitry Andric __m128d __a) { 19940b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 19950b57cec5SDimitry Andric double __u; 19960b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19970b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0]; 19980b57cec5SDimitry Andric } 19990b57cec5SDimitry Andric 20000b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], 20010b57cec5SDimitry Andric /// saving the lower 8 bits of each sum in the corresponding element of a 20020b57cec5SDimitry Andric /// 128-bit result vector of [16 x i8]. 20030b57cec5SDimitry Andric /// 20040b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20050b57cec5SDimitry Andric /// 20060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20070b57cec5SDimitry Andric /// 20080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. 20090b57cec5SDimitry Andric /// 20100b57cec5SDimitry Andric /// \param __a 20110b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 20120b57cec5SDimitry Andric /// \param __b 20130b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 20140b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both 20150b57cec5SDimitry Andric /// parameters. 201681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, 201781ad6265SDimitry Andric __m128i __b) { 20180b57cec5SDimitry Andric return (__m128i)((__v16qu)__a + (__v16qu)__b); 20190b57cec5SDimitry Andric } 20200b57cec5SDimitry Andric 20210b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], 20220b57cec5SDimitry Andric /// saving the lower 16 bits of each sum in the corresponding element of a 20230b57cec5SDimitry Andric /// 128-bit result vector of [8 x i16]. 20240b57cec5SDimitry Andric /// 20250b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20260b57cec5SDimitry Andric /// 20270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20280b57cec5SDimitry Andric /// 20290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. 20300b57cec5SDimitry Andric /// 20310b57cec5SDimitry Andric /// \param __a 20320b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 20330b57cec5SDimitry Andric /// \param __b 20340b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 20350b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both 20360b57cec5SDimitry Andric /// parameters. 203781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, 203881ad6265SDimitry Andric __m128i __b) { 20390b57cec5SDimitry Andric return (__m128i)((__v8hu)__a + (__v8hu)__b); 20400b57cec5SDimitry Andric } 20410b57cec5SDimitry Andric 20420b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], 20430b57cec5SDimitry Andric /// saving the lower 32 bits of each sum in the corresponding element of a 20440b57cec5SDimitry Andric /// 128-bit result vector of [4 x i32]. 20450b57cec5SDimitry Andric /// 20460b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20470b57cec5SDimitry Andric /// 20480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20490b57cec5SDimitry Andric /// 20500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. 20510b57cec5SDimitry Andric /// 20520b57cec5SDimitry Andric /// \param __a 20530b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 20540b57cec5SDimitry Andric /// \param __b 20550b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 20560b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both 20570b57cec5SDimitry Andric /// parameters. 205881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, 205981ad6265SDimitry Andric __m128i __b) { 20600b57cec5SDimitry Andric return (__m128i)((__v4su)__a + (__v4su)__b); 20610b57cec5SDimitry Andric } 20620b57cec5SDimitry Andric 20630b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the 20640b57cec5SDimitry Andric /// lower 64 bits of the sum. 20650b57cec5SDimitry Andric /// 20660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20670b57cec5SDimitry Andric /// 20680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction. 20690b57cec5SDimitry Andric /// 20700b57cec5SDimitry Andric /// \param __a 20710b57cec5SDimitry Andric /// A 64-bit integer. 20720b57cec5SDimitry Andric /// \param __b 20730b57cec5SDimitry Andric /// A 64-bit integer. 20740b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters. 207581ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, 207681ad6265SDimitry Andric __m64 __b) { 20770b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); 20780b57cec5SDimitry Andric } 20790b57cec5SDimitry Andric 20800b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], 20810b57cec5SDimitry Andric /// saving the lower 64 bits of each sum in the corresponding element of a 20820b57cec5SDimitry Andric /// 128-bit result vector of [2 x i64]. 20830b57cec5SDimitry Andric /// 20840b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20850b57cec5SDimitry Andric /// 20860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20870b57cec5SDimitry Andric /// 20880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. 20890b57cec5SDimitry Andric /// 20900b57cec5SDimitry Andric /// \param __a 20910b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 20920b57cec5SDimitry Andric /// \param __b 20930b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 20940b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both 20950b57cec5SDimitry Andric /// parameters. 209681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, 209781ad6265SDimitry Andric __m128i __b) { 20980b57cec5SDimitry Andric return (__m128i)((__v2du)__a + (__v2du)__b); 20990b57cec5SDimitry Andric } 21000b57cec5SDimitry Andric 21010b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21020b57cec5SDimitry Andric /// signed [16 x i8] vectors, saving each sum in the corresponding element of 21030b57cec5SDimitry Andric /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are 21040b57cec5SDimitry Andric /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. 21050b57cec5SDimitry Andric /// 21060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21070b57cec5SDimitry Andric /// 21080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. 21090b57cec5SDimitry Andric /// 21100b57cec5SDimitry Andric /// \param __a 21110b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 21120b57cec5SDimitry Andric /// \param __b 21130b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 21140b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of 21150b57cec5SDimitry Andric /// both parameters. 211681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, 211781ad6265SDimitry Andric __m128i __b) { 211881ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); 21190b57cec5SDimitry Andric } 21200b57cec5SDimitry Andric 21210b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21220b57cec5SDimitry Andric /// signed [8 x i16] vectors, saving each sum in the corresponding element of 21230b57cec5SDimitry Andric /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF 21240b57cec5SDimitry Andric /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 21250b57cec5SDimitry Andric /// 0x8000. 21260b57cec5SDimitry Andric /// 21270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21280b57cec5SDimitry Andric /// 21290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. 21300b57cec5SDimitry Andric /// 21310b57cec5SDimitry Andric /// \param __a 21320b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 21330b57cec5SDimitry Andric /// \param __b 21340b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 21350b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of 21360b57cec5SDimitry Andric /// both parameters. 213781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, 213881ad6265SDimitry Andric __m128i __b) { 213981ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); 21400b57cec5SDimitry Andric } 21410b57cec5SDimitry Andric 21420b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21430b57cec5SDimitry Andric /// unsigned [16 x i8] vectors, saving each sum in the corresponding element 21440b57cec5SDimitry Andric /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF 21450b57cec5SDimitry Andric /// are saturated to 0xFF. Negative sums are saturated to 0x00. 21460b57cec5SDimitry Andric /// 21470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21480b57cec5SDimitry Andric /// 21490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 21500b57cec5SDimitry Andric /// 21510b57cec5SDimitry Andric /// \param __a 21520b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21530b57cec5SDimitry Andric /// \param __b 21540b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21550b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums 21560b57cec5SDimitry Andric /// of both parameters. 215781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, 215881ad6265SDimitry Andric __m128i __b) { 215981ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); 21600b57cec5SDimitry Andric } 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21630b57cec5SDimitry Andric /// unsigned [8 x i16] vectors, saving each sum in the corresponding element 21640b57cec5SDimitry Andric /// of a 128-bit result vector of [8 x i16]. Positive sums greater than 21650b57cec5SDimitry Andric /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. 21660b57cec5SDimitry Andric /// 21670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21680b57cec5SDimitry Andric /// 21690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 21700b57cec5SDimitry Andric /// 21710b57cec5SDimitry Andric /// \param __a 21720b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 21730b57cec5SDimitry Andric /// \param __b 21740b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 21750b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums 21760b57cec5SDimitry Andric /// of both parameters. 217781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, 217881ad6265SDimitry Andric __m128i __b) { 217981ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); 21800b57cec5SDimitry Andric } 21810b57cec5SDimitry Andric 2182480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 21830b57cec5SDimitry Andric /// 128-bit unsigned [16 x i8] vectors, saving each result in the 21840b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 21850b57cec5SDimitry Andric /// 21860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21870b57cec5SDimitry Andric /// 21880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. 21890b57cec5SDimitry Andric /// 21900b57cec5SDimitry Andric /// \param __a 21910b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21920b57cec5SDimitry Andric /// \param __b 21930b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21940b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded 21950b57cec5SDimitry Andric /// averages of both parameters. 219681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, 219781ad6265SDimitry Andric __m128i __b) { 21980b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 21990b57cec5SDimitry Andric } 22000b57cec5SDimitry Andric 2201480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 22020b57cec5SDimitry Andric /// 128-bit unsigned [8 x i16] vectors, saving each result in the 22030b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22040b57cec5SDimitry Andric /// 22050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22060b57cec5SDimitry Andric /// 22070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. 22080b57cec5SDimitry Andric /// 22090b57cec5SDimitry Andric /// \param __a 22100b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22110b57cec5SDimitry Andric /// \param __b 22120b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22130b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded 22140b57cec5SDimitry Andric /// averages of both parameters. 221581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, 221681ad6265SDimitry Andric __m128i __b) { 22170b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 22180b57cec5SDimitry Andric } 22190b57cec5SDimitry Andric 22200b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] 22210b57cec5SDimitry Andric /// vectors, producing eight intermediate 32-bit signed integer products, and 22220b57cec5SDimitry Andric /// adds the consecutive pairs of 32-bit products to form a 128-bit signed 22230b57cec5SDimitry Andric /// [4 x i32] vector. 22240b57cec5SDimitry Andric /// 22250b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied producing a 22260b57cec5SDimitry Andric /// 32-bit product, bits [31:16] of both parameters are multiplied producing 22270b57cec5SDimitry Andric /// a 32-bit product, and the sum of those two products becomes bits [31:0] 22280b57cec5SDimitry Andric /// of the result. 22290b57cec5SDimitry Andric /// 22300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22310b57cec5SDimitry Andric /// 22320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. 22330b57cec5SDimitry Andric /// 22340b57cec5SDimitry Andric /// \param __a 22350b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22360b57cec5SDimitry Andric /// \param __b 22370b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22380b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products 22390b57cec5SDimitry Andric /// of both parameters. 224081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, 224181ad6265SDimitry Andric __m128i __b) { 22420b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 22430b57cec5SDimitry Andric } 22440b57cec5SDimitry Andric 22450b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 22460b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 22470b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22480b57cec5SDimitry Andric /// 22490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22500b57cec5SDimitry Andric /// 22510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. 22520b57cec5SDimitry Andric /// 22530b57cec5SDimitry Andric /// \param __a 22540b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22550b57cec5SDimitry Andric /// \param __b 22560b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22570b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of 22580b57cec5SDimitry Andric /// each comparison. 225981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, 226081ad6265SDimitry Andric __m128i __b) { 226104eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); 22620b57cec5SDimitry Andric } 22630b57cec5SDimitry Andric 22640b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 22650b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 22660b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 22670b57cec5SDimitry Andric /// 22680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22690b57cec5SDimitry Andric /// 22700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. 22710b57cec5SDimitry Andric /// 22720b57cec5SDimitry Andric /// \param __a 22730b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22740b57cec5SDimitry Andric /// \param __b 22750b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22760b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of 22770b57cec5SDimitry Andric /// each comparison. 227881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, 227981ad6265SDimitry Andric __m128i __b) { 228004eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric 22830b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 22840b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 22850b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22860b57cec5SDimitry Andric /// 22870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22880b57cec5SDimitry Andric /// 22890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. 22900b57cec5SDimitry Andric /// 22910b57cec5SDimitry Andric /// \param __a 22920b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22930b57cec5SDimitry Andric /// \param __b 22940b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22950b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of 22960b57cec5SDimitry Andric /// each comparison. 229781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, 229881ad6265SDimitry Andric __m128i __b) { 229904eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); 23000b57cec5SDimitry Andric } 23010b57cec5SDimitry Andric 23020b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 23030b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 23040b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 23050b57cec5SDimitry Andric /// 23060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23070b57cec5SDimitry Andric /// 23080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. 23090b57cec5SDimitry Andric /// 23100b57cec5SDimitry Andric /// \param __a 23110b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23120b57cec5SDimitry Andric /// \param __b 23130b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23140b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of 23150b57cec5SDimitry Andric /// each comparison. 231681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, 231781ad6265SDimitry Andric __m128i __b) { 231804eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); 23190b57cec5SDimitry Andric } 23200b57cec5SDimitry Andric 23210b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 23220b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 23230b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 23240b57cec5SDimitry Andric /// 23250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23260b57cec5SDimitry Andric /// 23270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. 23280b57cec5SDimitry Andric /// 23290b57cec5SDimitry Andric /// \param __a 23300b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23310b57cec5SDimitry Andric /// \param __b 23320b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23330b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of 23340b57cec5SDimitry Andric /// each of the eight 32-bit products. 233581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, 233681ad6265SDimitry Andric __m128i __b) { 23370b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 23380b57cec5SDimitry Andric } 23390b57cec5SDimitry Andric 23400b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16] 23410b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 23420b57cec5SDimitry Andric /// corresponding element of a 128-bit unsigned [8 x i16] result vector. 23430b57cec5SDimitry Andric /// 23440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23450b57cec5SDimitry Andric /// 23460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. 23470b57cec5SDimitry Andric /// 23480b57cec5SDimitry Andric /// \param __a 23490b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23500b57cec5SDimitry Andric /// \param __b 23510b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23520b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits 23530b57cec5SDimitry Andric /// of each of the eight 32-bit products. 235481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, 235581ad6265SDimitry Andric __m128i __b) { 23560b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 23570b57cec5SDimitry Andric } 23580b57cec5SDimitry Andric 23590b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 23600b57cec5SDimitry Andric /// vectors, saving the lower 16 bits of each 32-bit product in the 23610b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 23620b57cec5SDimitry Andric /// 23630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23640b57cec5SDimitry Andric /// 23650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. 23660b57cec5SDimitry Andric /// 23670b57cec5SDimitry Andric /// \param __a 23680b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23690b57cec5SDimitry Andric /// \param __b 23700b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23710b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of 23720b57cec5SDimitry Andric /// each of the eight 32-bit products. 237381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, 237481ad6265SDimitry Andric __m128i __b) { 23750b57cec5SDimitry Andric return (__m128i)((__v8hu)__a * (__v8hu)__b); 23760b57cec5SDimitry Andric } 23770b57cec5SDimitry Andric 23780b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits 23790b57cec5SDimitry Andric /// of the two 64-bit integer vectors and returns the 64-bit unsigned 23800b57cec5SDimitry Andric /// product. 23810b57cec5SDimitry Andric /// 23820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23830b57cec5SDimitry Andric /// 23840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. 23850b57cec5SDimitry Andric /// 23860b57cec5SDimitry Andric /// \param __a 23870b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 23880b57cec5SDimitry Andric /// \param __b 23890b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 23900b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands. 239181ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, 239281ad6265SDimitry Andric __m64 __b) { 23930b57cec5SDimitry Andric return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric 23960b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower 23970b57cec5SDimitry Andric /// bits of the corresponding elements of two [2 x i64] vectors, and returns 23980b57cec5SDimitry Andric /// the 64-bit products in the corresponding elements of a [2 x i64] vector. 23990b57cec5SDimitry Andric /// 24000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24010b57cec5SDimitry Andric /// 24020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. 24030b57cec5SDimitry Andric /// 24040b57cec5SDimitry Andric /// \param __a 24050b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 24060b57cec5SDimitry Andric /// \param __b 24070b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 24080b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands. 240981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, 241081ad6265SDimitry Andric __m128i __b) { 24110b57cec5SDimitry Andric return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 24120b57cec5SDimitry Andric } 24130b57cec5SDimitry Andric 24140b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer 24150b57cec5SDimitry Andric /// values in two 128-bit vectors. Sums the first 8 absolute differences, and 24160b57cec5SDimitry Andric /// separately sums the second 8 absolute differences. Packs these two 24170b57cec5SDimitry Andric /// unsigned 16-bit integer sums into the upper and lower elements of a 24180b57cec5SDimitry Andric /// [2 x i64] vector. 24190b57cec5SDimitry Andric /// 24200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24210b57cec5SDimitry Andric /// 24220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. 24230b57cec5SDimitry Andric /// 24240b57cec5SDimitry Andric /// \param __a 24250b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 24260b57cec5SDimitry Andric /// \param __b 24270b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 24280b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute 24290b57cec5SDimitry Andric /// differences between both operands. 243081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, 243181ad6265SDimitry Andric __m128i __b) { 24320b57cec5SDimitry Andric return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 24330b57cec5SDimitry Andric } 24340b57cec5SDimitry Andric 24350b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands. 24360b57cec5SDimitry Andric /// 24370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24380b57cec5SDimitry Andric /// 24390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. 24400b57cec5SDimitry Andric /// 24410b57cec5SDimitry Andric /// \param __a 24420b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24430b57cec5SDimitry Andric /// \param __b 24440b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24450b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24460b57cec5SDimitry Andric /// in the operands. 244781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, 244881ad6265SDimitry Andric __m128i __b) { 24490b57cec5SDimitry Andric return (__m128i)((__v16qu)__a - (__v16qu)__b); 24500b57cec5SDimitry Andric } 24510b57cec5SDimitry Andric 24520b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands. 24530b57cec5SDimitry Andric /// 24540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24550b57cec5SDimitry Andric /// 24560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. 24570b57cec5SDimitry Andric /// 24580b57cec5SDimitry Andric /// \param __a 24590b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24600b57cec5SDimitry Andric /// \param __b 24610b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24620b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24630b57cec5SDimitry Andric /// in the operands. 246481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, 246581ad6265SDimitry Andric __m128i __b) { 24660b57cec5SDimitry Andric return (__m128i)((__v8hu)__a - (__v8hu)__b); 24670b57cec5SDimitry Andric } 24680b57cec5SDimitry Andric 24690b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands. 24700b57cec5SDimitry Andric /// 24710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24720b57cec5SDimitry Andric /// 24730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. 24740b57cec5SDimitry Andric /// 24750b57cec5SDimitry Andric /// \param __a 24760b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24770b57cec5SDimitry Andric /// \param __b 24780b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24790b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24800b57cec5SDimitry Andric /// in the operands. 248181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, 248281ad6265SDimitry Andric __m128i __b) { 24830b57cec5SDimitry Andric return (__m128i)((__v4su)__a - (__v4su)__b); 24840b57cec5SDimitry Andric } 24850b57cec5SDimitry Andric 24860b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the 24870b57cec5SDimitry Andric /// difference to the corresponding bits in the destination. 24880b57cec5SDimitry Andric /// 24890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24900b57cec5SDimitry Andric /// 24910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. 24920b57cec5SDimitry Andric /// 24930b57cec5SDimitry Andric /// \param __a 24940b57cec5SDimitry Andric /// A 64-bit integer vector containing the minuend. 24950b57cec5SDimitry Andric /// \param __b 24960b57cec5SDimitry Andric /// A 64-bit integer vector containing the subtrahend. 24970b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in 24980b57cec5SDimitry Andric /// the operands. 249981ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, 250081ad6265SDimitry Andric __m64 __b) { 25010b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); 25020b57cec5SDimitry Andric } 25030b57cec5SDimitry Andric 25040b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors. 25050b57cec5SDimitry Andric /// 25060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25070b57cec5SDimitry Andric /// 25080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. 25090b57cec5SDimitry Andric /// 25100b57cec5SDimitry Andric /// \param __a 25110b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25120b57cec5SDimitry Andric /// \param __b 25130b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25140b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25150b57cec5SDimitry Andric /// in the operands. 251681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, 251781ad6265SDimitry Andric __m128i __b) { 25180b57cec5SDimitry Andric return (__m128i)((__v2du)__a - (__v2du)__b); 25190b57cec5SDimitry Andric } 25200b57cec5SDimitry Andric 25210b57cec5SDimitry Andric /// Subtracts corresponding 8-bit signed integer values in the input and 25220b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 25230b57cec5SDimitry Andric /// Differences greater than 0x7F are saturated to 0x7F, and differences less 25240b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. 25250b57cec5SDimitry Andric /// 25260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25270b57cec5SDimitry Andric /// 25280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. 25290b57cec5SDimitry Andric /// 25300b57cec5SDimitry Andric /// \param __a 25310b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25320b57cec5SDimitry Andric /// \param __b 25330b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25340b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25350b57cec5SDimitry Andric /// in the operands. 253681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, 253781ad6265SDimitry Andric __m128i __b) { 253881ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); 25390b57cec5SDimitry Andric } 25400b57cec5SDimitry Andric 25410b57cec5SDimitry Andric /// Subtracts corresponding 16-bit signed integer values in the input and 25420b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 25430b57cec5SDimitry Andric /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less 25440b57cec5SDimitry Andric /// than 0x8000 are saturated to 0x8000. 25450b57cec5SDimitry Andric /// 25460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25470b57cec5SDimitry Andric /// 25480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. 25490b57cec5SDimitry Andric /// 25500b57cec5SDimitry Andric /// \param __a 25510b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25520b57cec5SDimitry Andric /// \param __b 25530b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25540b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25550b57cec5SDimitry Andric /// in the operands. 255681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, 255781ad6265SDimitry Andric __m128i __b) { 255881ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); 25590b57cec5SDimitry Andric } 25600b57cec5SDimitry Andric 25610b57cec5SDimitry Andric /// Subtracts corresponding 8-bit unsigned integer values in the input 25620b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 25630b57cec5SDimitry Andric /// destination. Differences less than 0x00 are saturated to 0x00. 25640b57cec5SDimitry Andric /// 25650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25660b57cec5SDimitry Andric /// 25670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. 25680b57cec5SDimitry Andric /// 25690b57cec5SDimitry Andric /// \param __a 25700b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25710b57cec5SDimitry Andric /// \param __b 25720b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25730b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 25740b57cec5SDimitry Andric /// differences of the values in the operands. 257581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, 257681ad6265SDimitry Andric __m128i __b) { 257781ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); 25780b57cec5SDimitry Andric } 25790b57cec5SDimitry Andric 25800b57cec5SDimitry Andric /// Subtracts corresponding 16-bit unsigned integer values in the input 25810b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 25820b57cec5SDimitry Andric /// destination. Differences less than 0x0000 are saturated to 0x0000. 25830b57cec5SDimitry Andric /// 25840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25850b57cec5SDimitry Andric /// 25860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. 25870b57cec5SDimitry Andric /// 25880b57cec5SDimitry Andric /// \param __a 25890b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25900b57cec5SDimitry Andric /// \param __b 25910b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25920b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 25930b57cec5SDimitry Andric /// differences of the values in the operands. 259481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, 259581ad6265SDimitry Andric __m128i __b) { 259681ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); 25970b57cec5SDimitry Andric } 25980b57cec5SDimitry Andric 25990b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors. 26000b57cec5SDimitry Andric /// 26010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26020b57cec5SDimitry Andric /// 26030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 26040b57cec5SDimitry Andric /// 26050b57cec5SDimitry Andric /// \param __a 26060b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26070b57cec5SDimitry Andric /// \param __b 26080b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26090b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values 26100b57cec5SDimitry Andric /// in both operands. 261181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, 261281ad6265SDimitry Andric __m128i __b) { 26130b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 26140b57cec5SDimitry Andric } 26150b57cec5SDimitry Andric 26160b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the 26170b57cec5SDimitry Andric /// one's complement of the values contained in the first source operand. 26180b57cec5SDimitry Andric /// 26190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26200b57cec5SDimitry Andric /// 26210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 26220b57cec5SDimitry Andric /// 26230b57cec5SDimitry Andric /// \param __a 26240b57cec5SDimitry Andric /// A 128-bit vector containing the left source operand. The one's complement 26250b57cec5SDimitry Andric /// of this value is used in the bitwise AND. 26260b57cec5SDimitry Andric /// \param __b 26270b57cec5SDimitry Andric /// A 128-bit vector containing the right source operand. 26280b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's 26290b57cec5SDimitry Andric /// complement of the first operand and the values in the second operand. 263081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, 263181ad6265SDimitry Andric __m128i __b) { 26320b57cec5SDimitry Andric return (__m128i)(~(__v2du)__a & (__v2du)__b); 26330b57cec5SDimitry Andric } 26340b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors. 26350b57cec5SDimitry Andric /// 26360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26370b57cec5SDimitry Andric /// 26380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 26390b57cec5SDimitry Andric /// 26400b57cec5SDimitry Andric /// \param __a 26410b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26420b57cec5SDimitry Andric /// \param __b 26430b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values 26450b57cec5SDimitry Andric /// in both operands. 264681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, 264781ad6265SDimitry Andric __m128i __b) { 26480b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 26490b57cec5SDimitry Andric } 26500b57cec5SDimitry Andric 26510b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors. 26520b57cec5SDimitry Andric /// 26530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26540b57cec5SDimitry Andric /// 26550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 26560b57cec5SDimitry Andric /// 26570b57cec5SDimitry Andric /// \param __a 26580b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26590b57cec5SDimitry Andric /// \param __b 26600b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26610b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the 26620b57cec5SDimitry Andric /// values in both operands. 266381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, 266481ad6265SDimitry Andric __m128i __b) { 26650b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 26660b57cec5SDimitry Andric } 26670b57cec5SDimitry Andric 26680b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified 26690b57cec5SDimitry Andric /// number of bytes. Low-order bits are cleared. 26700b57cec5SDimitry Andric /// 26710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26720b57cec5SDimitry Andric /// 26730b57cec5SDimitry Andric /// \code 26740b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm); 26750b57cec5SDimitry Andric /// \endcode 26760b57cec5SDimitry Andric /// 26770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. 26780b57cec5SDimitry Andric /// 26790b57cec5SDimitry Andric /// \param a 26800b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 26810b57cec5SDimitry Andric /// \param imm 26820b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to left-shift operand 26830b57cec5SDimitry Andric /// \a a. 26840b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value. 26850b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \ 268681ad6265SDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ 268781ad6265SDimitry Andric (int)(imm))) 26880b57cec5SDimitry Andric 26890b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \ 269081ad6265SDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ 269181ad6265SDimitry Andric (int)(imm))) 26920b57cec5SDimitry Andric 26930b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 26940b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 26950b57cec5SDimitry Andric /// 26960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26970b57cec5SDimitry Andric /// 26980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 26990b57cec5SDimitry Andric /// 27000b57cec5SDimitry Andric /// \param __a 27010b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27020b57cec5SDimitry Andric /// \param __count 27030b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27040b57cec5SDimitry Andric /// in operand \a __a. 27050b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 270681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, 270781ad6265SDimitry Andric int __count) { 27080b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 27090b57cec5SDimitry Andric } 27100b57cec5SDimitry Andric 27110b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 27120b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27130b57cec5SDimitry Andric /// 27140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27150b57cec5SDimitry Andric /// 27160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 27170b57cec5SDimitry Andric /// 27180b57cec5SDimitry Andric /// \param __a 27190b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27200b57cec5SDimitry Andric /// \param __count 27210b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27220b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27230b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 272481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, 272581ad6265SDimitry Andric __m128i __count) { 27260b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 27270b57cec5SDimitry Andric } 27280b57cec5SDimitry Andric 27290b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 27300b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27310b57cec5SDimitry Andric /// 27320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27330b57cec5SDimitry Andric /// 27340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 27350b57cec5SDimitry Andric /// 27360b57cec5SDimitry Andric /// \param __a 27370b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27380b57cec5SDimitry Andric /// \param __count 27390b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27400b57cec5SDimitry Andric /// in operand \a __a. 27410b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 274281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, 274381ad6265SDimitry Andric int __count) { 27440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 27450b57cec5SDimitry Andric } 27460b57cec5SDimitry Andric 27470b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 27480b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27490b57cec5SDimitry Andric /// 27500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27510b57cec5SDimitry Andric /// 27520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 27530b57cec5SDimitry Andric /// 27540b57cec5SDimitry Andric /// \param __a 27550b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27560b57cec5SDimitry Andric /// \param __count 27570b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27580b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27590b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 276081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, 276181ad6265SDimitry Andric __m128i __count) { 27620b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 27630b57cec5SDimitry Andric } 27640b57cec5SDimitry Andric 27650b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 27660b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27670b57cec5SDimitry Andric /// 27680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27690b57cec5SDimitry Andric /// 27700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 27710b57cec5SDimitry Andric /// 27720b57cec5SDimitry Andric /// \param __a 27730b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27740b57cec5SDimitry Andric /// \param __count 27750b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27760b57cec5SDimitry Andric /// in operand \a __a. 27770b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 277881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, 277981ad6265SDimitry Andric int __count) { 27800b57cec5SDimitry Andric return __builtin_ia32_psllqi128((__v2di)__a, __count); 27810b57cec5SDimitry Andric } 27820b57cec5SDimitry Andric 27830b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 27840b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27850b57cec5SDimitry Andric /// 27860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27870b57cec5SDimitry Andric /// 27880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 27890b57cec5SDimitry Andric /// 27900b57cec5SDimitry Andric /// \param __a 27910b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27920b57cec5SDimitry Andric /// \param __count 27930b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27940b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27950b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 279681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, 279781ad6265SDimitry Andric __m128i __count) { 27980b57cec5SDimitry Andric return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); 27990b57cec5SDimitry Andric } 28000b57cec5SDimitry Andric 28010b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 28020b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28030b57cec5SDimitry Andric /// bit of the initial value. 28040b57cec5SDimitry Andric /// 28050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28060b57cec5SDimitry Andric /// 28070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 28080b57cec5SDimitry Andric /// 28090b57cec5SDimitry Andric /// \param __a 28100b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28110b57cec5SDimitry Andric /// \param __count 28120b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 28130b57cec5SDimitry Andric /// in operand \a __a. 28140b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 281581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, 281681ad6265SDimitry Andric int __count) { 28170b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 28180b57cec5SDimitry Andric } 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 28210b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28220b57cec5SDimitry Andric /// bit of the initial value. 28230b57cec5SDimitry Andric /// 28240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28250b57cec5SDimitry Andric /// 28260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 28270b57cec5SDimitry Andric /// 28280b57cec5SDimitry Andric /// \param __a 28290b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28300b57cec5SDimitry Andric /// \param __count 28310b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28320b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 28330b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 283481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, 283581ad6265SDimitry Andric __m128i __count) { 28360b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 28370b57cec5SDimitry Andric } 28380b57cec5SDimitry Andric 28390b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 28400b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28410b57cec5SDimitry Andric /// bit of the initial value. 28420b57cec5SDimitry Andric /// 28430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28440b57cec5SDimitry Andric /// 28450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 28460b57cec5SDimitry Andric /// 28470b57cec5SDimitry Andric /// \param __a 28480b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28490b57cec5SDimitry Andric /// \param __count 28500b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 28510b57cec5SDimitry Andric /// in operand \a __a. 28520b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 285381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, 285481ad6265SDimitry Andric int __count) { 28550b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 28560b57cec5SDimitry Andric } 28570b57cec5SDimitry Andric 28580b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 28590b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28600b57cec5SDimitry Andric /// bit of the initial value. 28610b57cec5SDimitry Andric /// 28620b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28630b57cec5SDimitry Andric /// 28640b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 28650b57cec5SDimitry Andric /// 28660b57cec5SDimitry Andric /// \param __a 28670b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28680b57cec5SDimitry Andric /// \param __count 28690b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28700b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 28710b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 287281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, 287381ad6265SDimitry Andric __m128i __count) { 28740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 28750b57cec5SDimitry Andric } 28760b57cec5SDimitry Andric 28770b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified 28780b57cec5SDimitry Andric /// number of bytes. High-order bits are cleared. 28790b57cec5SDimitry Andric /// 28800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28810b57cec5SDimitry Andric /// 28820b57cec5SDimitry Andric /// \code 28830b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm); 28840b57cec5SDimitry Andric /// \endcode 28850b57cec5SDimitry Andric /// 28860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. 28870b57cec5SDimitry Andric /// 28880b57cec5SDimitry Andric /// \param a 28890b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28900b57cec5SDimitry Andric /// \param imm 28910b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to right-shift operand 28920b57cec5SDimitry Andric /// \a a. 28930b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value. 28940b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \ 289581ad6265SDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ 289681ad6265SDimitry Andric (int)(imm))) 28970b57cec5SDimitry Andric 28980b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \ 289981ad6265SDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ 290081ad6265SDimitry Andric (int)(imm))) 29010b57cec5SDimitry Andric 29020b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 29030b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29040b57cec5SDimitry Andric /// 29050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29060b57cec5SDimitry Andric /// 29070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 29080b57cec5SDimitry Andric /// 29090b57cec5SDimitry Andric /// \param __a 29100b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29110b57cec5SDimitry Andric /// \param __count 29120b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29130b57cec5SDimitry Andric /// in operand \a __a. 29140b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 291581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, 291681ad6265SDimitry Andric int __count) { 29170b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 29180b57cec5SDimitry Andric } 29190b57cec5SDimitry Andric 29200b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 29210b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29220b57cec5SDimitry Andric /// 29230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29240b57cec5SDimitry Andric /// 29250b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 29260b57cec5SDimitry Andric /// 29270b57cec5SDimitry Andric /// \param __a 29280b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29290b57cec5SDimitry Andric /// \param __count 29300b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29310b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29320b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 293381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, 293481ad6265SDimitry Andric __m128i __count) { 29350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 29360b57cec5SDimitry Andric } 29370b57cec5SDimitry Andric 29380b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 29390b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29400b57cec5SDimitry Andric /// 29410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29420b57cec5SDimitry Andric /// 29430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 29440b57cec5SDimitry Andric /// 29450b57cec5SDimitry Andric /// \param __a 29460b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29470b57cec5SDimitry Andric /// \param __count 29480b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29490b57cec5SDimitry Andric /// in operand \a __a. 29500b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 295181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, 295281ad6265SDimitry Andric int __count) { 29530b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 29540b57cec5SDimitry Andric } 29550b57cec5SDimitry Andric 29560b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 29570b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29580b57cec5SDimitry Andric /// 29590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29600b57cec5SDimitry Andric /// 29610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 29620b57cec5SDimitry Andric /// 29630b57cec5SDimitry Andric /// \param __a 29640b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29650b57cec5SDimitry Andric /// \param __count 29660b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29670b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29680b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 296981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, 297081ad6265SDimitry Andric __m128i __count) { 29710b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 29720b57cec5SDimitry Andric } 29730b57cec5SDimitry Andric 29740b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 29750b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29760b57cec5SDimitry Andric /// 29770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29780b57cec5SDimitry Andric /// 29790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 29800b57cec5SDimitry Andric /// 29810b57cec5SDimitry Andric /// \param __a 29820b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29830b57cec5SDimitry Andric /// \param __count 29840b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29850b57cec5SDimitry Andric /// in operand \a __a. 29860b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 298781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, 298881ad6265SDimitry Andric int __count) { 29890b57cec5SDimitry Andric return __builtin_ia32_psrlqi128((__v2di)__a, __count); 29900b57cec5SDimitry Andric } 29910b57cec5SDimitry Andric 29920b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 29930b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29940b57cec5SDimitry Andric /// 29950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29960b57cec5SDimitry Andric /// 29970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 29980b57cec5SDimitry Andric /// 29990b57cec5SDimitry Andric /// \param __a 30000b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30010b57cec5SDimitry Andric /// \param __count 30020b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30030b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 300581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, 300681ad6265SDimitry Andric __m128i __count) { 30070b57cec5SDimitry Andric return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); 30080b57cec5SDimitry Andric } 30090b57cec5SDimitry Andric 30100b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit 30110b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF 30120b57cec5SDimitry Andric /// for true. 30130b57cec5SDimitry Andric /// 30140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30150b57cec5SDimitry Andric /// 30160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. 30170b57cec5SDimitry Andric /// 30180b57cec5SDimitry Andric /// \param __a 30190b57cec5SDimitry Andric /// A 128-bit integer vector. 30200b57cec5SDimitry Andric /// \param __b 30210b57cec5SDimitry Andric /// A 128-bit integer vector. 30220b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 302381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, 302481ad6265SDimitry Andric __m128i __b) { 30250b57cec5SDimitry Andric return (__m128i)((__v16qi)__a == (__v16qi)__b); 30260b57cec5SDimitry Andric } 30270b57cec5SDimitry Andric 30280b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit 30290b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 30300b57cec5SDimitry Andric /// 0xFFFF for true. 30310b57cec5SDimitry Andric /// 30320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30330b57cec5SDimitry Andric /// 30340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. 30350b57cec5SDimitry Andric /// 30360b57cec5SDimitry Andric /// \param __a 30370b57cec5SDimitry Andric /// A 128-bit integer vector. 30380b57cec5SDimitry Andric /// \param __b 30390b57cec5SDimitry Andric /// A 128-bit integer vector. 30400b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 304181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, 304281ad6265SDimitry Andric __m128i __b) { 30430b57cec5SDimitry Andric return (__m128i)((__v8hi)__a == (__v8hi)__b); 30440b57cec5SDimitry Andric } 30450b57cec5SDimitry Andric 30460b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit 30470b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 30480b57cec5SDimitry Andric /// 0xFFFFFFFF for true. 30490b57cec5SDimitry Andric /// 30500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30510b57cec5SDimitry Andric /// 30520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. 30530b57cec5SDimitry Andric /// 30540b57cec5SDimitry Andric /// \param __a 30550b57cec5SDimitry Andric /// A 128-bit integer vector. 30560b57cec5SDimitry Andric /// \param __b 30570b57cec5SDimitry Andric /// A 128-bit integer vector. 30580b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 305981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, 306081ad6265SDimitry Andric __m128i __b) { 30610b57cec5SDimitry Andric return (__m128i)((__v4si)__a == (__v4si)__b); 30620b57cec5SDimitry Andric } 30630b57cec5SDimitry Andric 30640b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 30650b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are 30660b57cec5SDimitry Andric /// greater than those in the second operand. Each comparison yields 0x0 for 30670b57cec5SDimitry Andric /// false, 0xFF for true. 30680b57cec5SDimitry Andric /// 30690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30700b57cec5SDimitry Andric /// 30710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 30720b57cec5SDimitry Andric /// 30730b57cec5SDimitry Andric /// \param __a 30740b57cec5SDimitry Andric /// A 128-bit integer vector. 30750b57cec5SDimitry Andric /// \param __b 30760b57cec5SDimitry Andric /// A 128-bit integer vector. 30770b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 307881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, 307981ad6265SDimitry Andric __m128i __b) { 30800b57cec5SDimitry Andric /* This function always performs a signed comparison, but __v16qi is a char 30810b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 30820b57cec5SDimitry Andric return (__m128i)((__v16qs)__a > (__v16qs)__b); 30830b57cec5SDimitry Andric } 30840b57cec5SDimitry Andric 30850b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 30860b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 30870b57cec5SDimitry Andric /// are greater than those in the second operand. 30880b57cec5SDimitry Andric /// 30890b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 30900b57cec5SDimitry Andric /// 30910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30920b57cec5SDimitry Andric /// 30930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 30940b57cec5SDimitry Andric /// 30950b57cec5SDimitry Andric /// \param __a 30960b57cec5SDimitry Andric /// A 128-bit integer vector. 30970b57cec5SDimitry Andric /// \param __b 30980b57cec5SDimitry Andric /// A 128-bit integer vector. 30990b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 310081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, 310181ad6265SDimitry Andric __m128i __b) { 31020b57cec5SDimitry Andric return (__m128i)((__v8hi)__a > (__v8hi)__b); 31030b57cec5SDimitry Andric } 31040b57cec5SDimitry Andric 31050b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 31060b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31070b57cec5SDimitry Andric /// are greater than those in the second operand. 31080b57cec5SDimitry Andric /// 31090b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 31100b57cec5SDimitry Andric /// 31110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31120b57cec5SDimitry Andric /// 31130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 31140b57cec5SDimitry Andric /// 31150b57cec5SDimitry Andric /// \param __a 31160b57cec5SDimitry Andric /// A 128-bit integer vector. 31170b57cec5SDimitry Andric /// \param __b 31180b57cec5SDimitry Andric /// A 128-bit integer vector. 31190b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 312081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, 312181ad6265SDimitry Andric __m128i __b) { 31220b57cec5SDimitry Andric return (__m128i)((__v4si)__a > (__v4si)__b); 31230b57cec5SDimitry Andric } 31240b57cec5SDimitry Andric 31250b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 31260b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are less 31270b57cec5SDimitry Andric /// than those in the second operand. 31280b57cec5SDimitry Andric /// 31290b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFF for true. 31300b57cec5SDimitry Andric /// 31310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31320b57cec5SDimitry Andric /// 31330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 31340b57cec5SDimitry Andric /// 31350b57cec5SDimitry Andric /// \param __a 31360b57cec5SDimitry Andric /// A 128-bit integer vector. 31370b57cec5SDimitry Andric /// \param __b 31380b57cec5SDimitry Andric /// A 128-bit integer vector. 31390b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 314081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, 314181ad6265SDimitry Andric __m128i __b) { 31420b57cec5SDimitry Andric return _mm_cmpgt_epi8(__b, __a); 31430b57cec5SDimitry Andric } 31440b57cec5SDimitry Andric 31450b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 31460b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31470b57cec5SDimitry Andric /// are less than those in the second operand. 31480b57cec5SDimitry Andric /// 31490b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 31500b57cec5SDimitry Andric /// 31510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31520b57cec5SDimitry Andric /// 31530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 31540b57cec5SDimitry Andric /// 31550b57cec5SDimitry Andric /// \param __a 31560b57cec5SDimitry Andric /// A 128-bit integer vector. 31570b57cec5SDimitry Andric /// \param __b 31580b57cec5SDimitry Andric /// A 128-bit integer vector. 31590b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 316081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, 316181ad6265SDimitry Andric __m128i __b) { 31620b57cec5SDimitry Andric return _mm_cmpgt_epi16(__b, __a); 31630b57cec5SDimitry Andric } 31640b57cec5SDimitry Andric 31650b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 31660b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31670b57cec5SDimitry Andric /// are less than those in the second operand. 31680b57cec5SDimitry Andric /// 31690b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 31700b57cec5SDimitry Andric /// 31710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31720b57cec5SDimitry Andric /// 31730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 31740b57cec5SDimitry Andric /// 31750b57cec5SDimitry Andric /// \param __a 31760b57cec5SDimitry Andric /// A 128-bit integer vector. 31770b57cec5SDimitry Andric /// \param __b 31780b57cec5SDimitry Andric /// A 128-bit integer vector. 31790b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 318081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, 318181ad6265SDimitry Andric __m128i __b) { 31820b57cec5SDimitry Andric return _mm_cmpgt_epi32(__b, __a); 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric 31850b57cec5SDimitry Andric #ifdef __x86_64__ 31860b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a 31870b57cec5SDimitry Andric /// double-precision value and returns it in the lower element of a [2 x 31880b57cec5SDimitry Andric /// double] vector; the upper element of the returned vector is copied from 31890b57cec5SDimitry Andric /// the upper element of the first operand. 31900b57cec5SDimitry Andric /// 31910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31920b57cec5SDimitry Andric /// 31930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 31940b57cec5SDimitry Andric /// 31950b57cec5SDimitry Andric /// \param __a 31960b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are 31970b57cec5SDimitry Andric /// copied to the upper 64 bits of the destination. 31980b57cec5SDimitry Andric /// \param __b 31990b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 32000b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 32010b57cec5SDimitry Andric /// converted value of the second operand. The upper 64 bits are copied from 32020b57cec5SDimitry Andric /// the upper 64 bits of the first operand. 320381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, 320481ad6265SDimitry Andric long long __b) { 32050b57cec5SDimitry Andric __a[0] = __b; 32060b57cec5SDimitry Andric return __a; 32070b57cec5SDimitry Andric } 32080b57cec5SDimitry Andric 32090b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 32100b57cec5SDimitry Andric /// 64-bit signed integer value, according to the current rounding mode. 32110b57cec5SDimitry Andric /// 32120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32130b57cec5SDimitry Andric /// 32140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 32150b57cec5SDimitry Andric /// 32160b57cec5SDimitry Andric /// \param __a 32170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 32180b57cec5SDimitry Andric /// conversion. 32190b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 322081ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { 32210b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si64((__v2df)__a); 32220b57cec5SDimitry Andric } 32230b57cec5SDimitry Andric 32240b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 32250b57cec5SDimitry Andric /// 64-bit signed integer value, truncating the result when it is inexact. 32260b57cec5SDimitry Andric /// 32270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32280b57cec5SDimitry Andric /// 32290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 32300b57cec5SDimitry Andric /// instruction. 32310b57cec5SDimitry Andric /// 32320b57cec5SDimitry Andric /// \param __a 32330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 32340b57cec5SDimitry Andric /// conversion. 32350b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 323681ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { 32370b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si64((__v2df)__a); 32380b57cec5SDimitry Andric } 32390b57cec5SDimitry Andric #endif 32400b57cec5SDimitry Andric 32410b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float]. 32420b57cec5SDimitry Andric /// 32430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32440b57cec5SDimitry Andric /// 32450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. 32460b57cec5SDimitry Andric /// 32470b57cec5SDimitry Andric /// \param __a 32480b57cec5SDimitry Andric /// A 128-bit integer vector. 32490b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values. 325081ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { 32510b57cec5SDimitry Andric return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); 32520b57cec5SDimitry Andric } 32530b57cec5SDimitry Andric 32540b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32]. 32550b57cec5SDimitry Andric /// 32560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32570b57cec5SDimitry Andric /// 32580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. 32590b57cec5SDimitry Andric /// 32600b57cec5SDimitry Andric /// \param __a 32610b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 32620b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted 32630b57cec5SDimitry Andric /// values. 326481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { 32650b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); 32660b57cec5SDimitry Andric } 32670b57cec5SDimitry Andric 32680b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32], 32690b57cec5SDimitry Andric /// truncating the result when it is inexact. 32700b57cec5SDimitry Andric /// 32710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32720b57cec5SDimitry Andric /// 32730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> 32740b57cec5SDimitry Andric /// instruction. 32750b57cec5SDimitry Andric /// 32760b57cec5SDimitry Andric /// \param __a 32770b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 32780b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values. 327981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { 32800b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); 32810b57cec5SDimitry Andric } 32820b57cec5SDimitry Andric 32830b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input 32840b57cec5SDimitry Andric /// operand and the remaining elements are zero. 32850b57cec5SDimitry Andric /// 32860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32870b57cec5SDimitry Andric /// 32880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 32890b57cec5SDimitry Andric /// 32900b57cec5SDimitry Andric /// \param __a 32910b57cec5SDimitry Andric /// A 32-bit signed integer operand. 32920b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32]. 329381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { 32940b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; 32950b57cec5SDimitry Andric } 32960b57cec5SDimitry Andric 32970b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input 32980b57cec5SDimitry Andric /// operand and the upper element is zero. 32990b57cec5SDimitry Andric /// 33000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33010b57cec5SDimitry Andric /// 330281ad6265SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction 330381ad6265SDimitry Andric /// in 64-bit mode. 33040b57cec5SDimitry Andric /// 33050b57cec5SDimitry Andric /// \param __a 33060b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 33070b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value. 330881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { 33090b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__a, 0}; 33100b57cec5SDimitry Andric } 33110b57cec5SDimitry Andric 33120b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a 33130b57cec5SDimitry Andric /// 32-bit signed integer value. 33140b57cec5SDimitry Andric /// 33150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33160b57cec5SDimitry Andric /// 33170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 33180b57cec5SDimitry Andric /// 33190b57cec5SDimitry Andric /// \param __a 33200b57cec5SDimitry Andric /// A vector of [4 x i32]. The least significant 32 bits are moved to the 33210b57cec5SDimitry Andric /// destination. 33220b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 332381ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { 33240b57cec5SDimitry Andric __v4si __b = (__v4si)__a; 33250b57cec5SDimitry Andric return __b[0]; 33260b57cec5SDimitry Andric } 33270b57cec5SDimitry Andric 33280b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a 33290b57cec5SDimitry Andric /// 64-bit signed integer value. 33300b57cec5SDimitry Andric /// 33310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33320b57cec5SDimitry Andric /// 33330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 33340b57cec5SDimitry Andric /// 33350b57cec5SDimitry Andric /// \param __a 33360b57cec5SDimitry Andric /// A vector of [2 x i64]. The least significant 64 bits are moved to the 33370b57cec5SDimitry Andric /// destination. 33380b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value. 333981ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { 33400b57cec5SDimitry Andric return __a[0]; 33410b57cec5SDimitry Andric } 33420b57cec5SDimitry Andric 33430b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location 33440b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 33450b57cec5SDimitry Andric /// 33460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33470b57cec5SDimitry Andric /// 33480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. 33490b57cec5SDimitry Andric /// 33500b57cec5SDimitry Andric /// \param __p 33510b57cec5SDimitry Andric /// An aligned pointer to a memory location containing integer values. 33520b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 33530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 335481ad6265SDimitry Andric _mm_load_si128(__m128i const *__p) { 33550b57cec5SDimitry Andric return *__p; 33560b57cec5SDimitry Andric } 33570b57cec5SDimitry Andric 33580b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location 33590b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 33600b57cec5SDimitry Andric /// 33610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33620b57cec5SDimitry Andric /// 33630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. 33640b57cec5SDimitry Andric /// 33650b57cec5SDimitry Andric /// \param __p 33660b57cec5SDimitry Andric /// A pointer to a memory location containing integer values. 33670b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 33680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 336981ad6265SDimitry Andric _mm_loadu_si128(__m128i_u const *__p) { 33700b57cec5SDimitry Andric struct __loadu_si128 { 33710b57cec5SDimitry Andric __m128i_u __v; 33720b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3373480093f4SDimitry Andric return ((const struct __loadu_si128 *)__p)->__v; 33740b57cec5SDimitry Andric } 33750b57cec5SDimitry Andric 33760b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from 33770b57cec5SDimitry Andric /// the lower element of the operand, and the upper element is zero. 33780b57cec5SDimitry Andric /// 33790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33800b57cec5SDimitry Andric /// 33810b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 33820b57cec5SDimitry Andric /// 33830b57cec5SDimitry Andric /// \param __p 33840b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of 33850b57cec5SDimitry Andric /// the destination. 33860b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the 33870b57cec5SDimitry Andric /// moved value. The higher order bits are cleared. 33880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 338981ad6265SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p) { 33900b57cec5SDimitry Andric struct __mm_loadl_epi64_struct { 33910b57cec5SDimitry Andric long long __u; 33920b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 339381ad6265SDimitry Andric return __extension__(__m128i){ 339481ad6265SDimitry Andric ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0}; 33950b57cec5SDimitry Andric } 33960b57cec5SDimitry Andric 33970b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content. 33980b57cec5SDimitry Andric /// This could be used as an argument to another intrinsic function where the 33990b57cec5SDimitry Andric /// argument is required but the value is not actually used. 34000b57cec5SDimitry Andric /// 34010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34020b57cec5SDimitry Andric /// 34030b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 34040b57cec5SDimitry Andric /// 34050b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content. 340681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { 34070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_undef128(); 34080b57cec5SDimitry Andric } 34090b57cec5SDimitry Andric 34100b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 34110b57cec5SDimitry Andric /// the specified 64-bit integer values. 34120b57cec5SDimitry Andric /// 34130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34140b57cec5SDimitry Andric /// 34150b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34160b57cec5SDimitry Andric /// instruction. 34170b57cec5SDimitry Andric /// 34180b57cec5SDimitry Andric /// \param __q1 34190b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 34200b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34210b57cec5SDimitry Andric /// \param __q0 34220b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 34230b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34240b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 34250b57cec5SDimitry Andric /// provided in the operands. 342681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, 342781ad6265SDimitry Andric long long __q0) { 34280b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__q0, __q1}; 34290b57cec5SDimitry Andric } 34300b57cec5SDimitry Andric 34310b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 34320b57cec5SDimitry Andric /// the specified 64-bit integer values. 34330b57cec5SDimitry Andric /// 34340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34350b57cec5SDimitry Andric /// 34360b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34370b57cec5SDimitry Andric /// instruction. 34380b57cec5SDimitry Andric /// 34390b57cec5SDimitry Andric /// \param __q1 34400b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 34410b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34420b57cec5SDimitry Andric /// \param __q0 34430b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 34440b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34450b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 34460b57cec5SDimitry Andric /// provided in the operands. 344781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, 344881ad6265SDimitry Andric __m64 __q0) { 34490b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 34500b57cec5SDimitry Andric } 34510b57cec5SDimitry Andric 34520b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with 34530b57cec5SDimitry Andric /// the specified 32-bit integer values. 34540b57cec5SDimitry Andric /// 34550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34560b57cec5SDimitry Andric /// 34570b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34580b57cec5SDimitry Andric /// instruction. 34590b57cec5SDimitry Andric /// 34600b57cec5SDimitry Andric /// \param __i3 34610b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [127:96] of the 34620b57cec5SDimitry Andric /// destination vector. 34630b57cec5SDimitry Andric /// \param __i2 34640b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [95:64] of the destination 34650b57cec5SDimitry Andric /// vector. 34660b57cec5SDimitry Andric /// \param __i1 34670b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [63:32] of the destination 34680b57cec5SDimitry Andric /// vector. 34690b57cec5SDimitry Andric /// \param __i0 34700b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [31:0] of the destination 34710b57cec5SDimitry Andric /// vector. 34720b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values 34730b57cec5SDimitry Andric /// provided in the operands. 347481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, 347581ad6265SDimitry Andric int __i1, int __i0) { 34760b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; 34770b57cec5SDimitry Andric } 34780b57cec5SDimitry Andric 34790b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with 34800b57cec5SDimitry Andric /// the specified 16-bit integer values. 34810b57cec5SDimitry Andric /// 34820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34830b57cec5SDimitry Andric /// 34840b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34850b57cec5SDimitry Andric /// instruction. 34860b57cec5SDimitry Andric /// 34870b57cec5SDimitry Andric /// \param __w7 34880b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [127:112] of the 34890b57cec5SDimitry Andric /// destination vector. 34900b57cec5SDimitry Andric /// \param __w6 34910b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [111:96] of the 34920b57cec5SDimitry Andric /// destination vector. 34930b57cec5SDimitry Andric /// \param __w5 34940b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [95:80] of the destination 34950b57cec5SDimitry Andric /// vector. 34960b57cec5SDimitry Andric /// \param __w4 34970b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [79:64] of the destination 34980b57cec5SDimitry Andric /// vector. 34990b57cec5SDimitry Andric /// \param __w3 35000b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the destination 35010b57cec5SDimitry Andric /// vector. 35020b57cec5SDimitry Andric /// \param __w2 35030b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the destination 35040b57cec5SDimitry Andric /// vector. 35050b57cec5SDimitry Andric /// \param __w1 35060b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the destination 35070b57cec5SDimitry Andric /// vector. 35080b57cec5SDimitry Andric /// \param __w0 35090b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the destination 35100b57cec5SDimitry Andric /// vector. 35110b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values 35120b57cec5SDimitry Andric /// provided in the operands. 35130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 351481ad6265SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, 351581ad6265SDimitry Andric short __w2, short __w1, short __w0) { 351681ad6265SDimitry Andric return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, 351781ad6265SDimitry Andric __w4, __w5, __w6, __w7}; 35180b57cec5SDimitry Andric } 35190b57cec5SDimitry Andric 35200b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with 35210b57cec5SDimitry Andric /// the specified 8-bit integer values. 35220b57cec5SDimitry Andric /// 35230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35240b57cec5SDimitry Andric /// 35250b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35260b57cec5SDimitry Andric /// instruction. 35270b57cec5SDimitry Andric /// 35280b57cec5SDimitry Andric /// \param __b15 35290b57cec5SDimitry Andric /// Initializes bits [127:120] of the destination vector. 35300b57cec5SDimitry Andric /// \param __b14 35310b57cec5SDimitry Andric /// Initializes bits [119:112] of the destination vector. 35320b57cec5SDimitry Andric /// \param __b13 35330b57cec5SDimitry Andric /// Initializes bits [111:104] of the destination vector. 35340b57cec5SDimitry Andric /// \param __b12 35350b57cec5SDimitry Andric /// Initializes bits [103:96] of the destination vector. 35360b57cec5SDimitry Andric /// \param __b11 35370b57cec5SDimitry Andric /// Initializes bits [95:88] of the destination vector. 35380b57cec5SDimitry Andric /// \param __b10 35390b57cec5SDimitry Andric /// Initializes bits [87:80] of the destination vector. 35400b57cec5SDimitry Andric /// \param __b9 35410b57cec5SDimitry Andric /// Initializes bits [79:72] of the destination vector. 35420b57cec5SDimitry Andric /// \param __b8 35430b57cec5SDimitry Andric /// Initializes bits [71:64] of the destination vector. 35440b57cec5SDimitry Andric /// \param __b7 35450b57cec5SDimitry Andric /// Initializes bits [63:56] of the destination vector. 35460b57cec5SDimitry Andric /// \param __b6 35470b57cec5SDimitry Andric /// Initializes bits [55:48] of the destination vector. 35480b57cec5SDimitry Andric /// \param __b5 35490b57cec5SDimitry Andric /// Initializes bits [47:40] of the destination vector. 35500b57cec5SDimitry Andric /// \param __b4 35510b57cec5SDimitry Andric /// Initializes bits [39:32] of the destination vector. 35520b57cec5SDimitry Andric /// \param __b3 35530b57cec5SDimitry Andric /// Initializes bits [31:24] of the destination vector. 35540b57cec5SDimitry Andric /// \param __b2 35550b57cec5SDimitry Andric /// Initializes bits [23:16] of the destination vector. 35560b57cec5SDimitry Andric /// \param __b1 35570b57cec5SDimitry Andric /// Initializes bits [15:8] of the destination vector. 35580b57cec5SDimitry Andric /// \param __b0 35590b57cec5SDimitry Andric /// Initializes bits [7:0] of the destination vector. 35600b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values 35610b57cec5SDimitry Andric /// provided in the operands. 35620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 356381ad6265SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, 356481ad6265SDimitry Andric char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, 356581ad6265SDimitry Andric char __b4, char __b3, char __b2, char __b1, char __b0) { 356681ad6265SDimitry Andric return __extension__(__m128i)(__v16qi){ 356781ad6265SDimitry Andric __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, 356881ad6265SDimitry Andric __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15}; 35690b57cec5SDimitry Andric } 35700b57cec5SDimitry Andric 35710b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the 35720b57cec5SDimitry Andric /// specified 64-bit integer value. 35730b57cec5SDimitry Andric /// 35740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35750b57cec5SDimitry Andric /// 35760b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35770b57cec5SDimitry Andric /// instruction. 35780b57cec5SDimitry Andric /// 35790b57cec5SDimitry Andric /// \param __q 35800b57cec5SDimitry Andric /// Integer value used to initialize the elements of the destination integer 35810b57cec5SDimitry Andric /// vector. 35820b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both 35830b57cec5SDimitry Andric /// elements containing the value provided in the operand. 358481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { 35850b57cec5SDimitry Andric return _mm_set_epi64x(__q, __q); 35860b57cec5SDimitry Andric } 35870b57cec5SDimitry Andric 35880b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the 35890b57cec5SDimitry Andric /// specified 64-bit value. 35900b57cec5SDimitry Andric /// 35910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35920b57cec5SDimitry Andric /// 35930b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35940b57cec5SDimitry Andric /// instruction. 35950b57cec5SDimitry Andric /// 35960b57cec5SDimitry Andric /// \param __q 35970b57cec5SDimitry Andric /// A 64-bit value used to initialize the elements of the destination integer 35980b57cec5SDimitry Andric /// vector. 35990b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements 36000b57cec5SDimitry Andric /// containing the value provided in the operand. 360181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { 36020b57cec5SDimitry Andric return _mm_set_epi64(__q, __q); 36030b57cec5SDimitry Andric } 36040b57cec5SDimitry Andric 36050b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the 36060b57cec5SDimitry Andric /// specified 32-bit value. 36070b57cec5SDimitry Andric /// 36080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36090b57cec5SDimitry Andric /// 36100b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36110b57cec5SDimitry Andric /// instruction. 36120b57cec5SDimitry Andric /// 36130b57cec5SDimitry Andric /// \param __i 36140b57cec5SDimitry Andric /// A 32-bit value used to initialize the elements of the destination integer 36150b57cec5SDimitry Andric /// vector. 36160b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements 36170b57cec5SDimitry Andric /// containing the value provided in the operand. 361881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { 36190b57cec5SDimitry Andric return _mm_set_epi32(__i, __i, __i, __i); 36200b57cec5SDimitry Andric } 36210b57cec5SDimitry Andric 36220b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the 36230b57cec5SDimitry Andric /// specified 16-bit value. 36240b57cec5SDimitry Andric /// 36250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36260b57cec5SDimitry Andric /// 36270b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36280b57cec5SDimitry Andric /// instruction. 36290b57cec5SDimitry Andric /// 36300b57cec5SDimitry Andric /// \param __w 36310b57cec5SDimitry Andric /// A 16-bit value used to initialize the elements of the destination integer 36320b57cec5SDimitry Andric /// vector. 36330b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements 36340b57cec5SDimitry Andric /// containing the value provided in the operand. 363581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { 36360b57cec5SDimitry Andric return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); 36370b57cec5SDimitry Andric } 36380b57cec5SDimitry Andric 36390b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the 36400b57cec5SDimitry Andric /// specified 8-bit value. 36410b57cec5SDimitry Andric /// 36420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36430b57cec5SDimitry Andric /// 36440b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36450b57cec5SDimitry Andric /// instruction. 36460b57cec5SDimitry Andric /// 36470b57cec5SDimitry Andric /// \param __b 36480b57cec5SDimitry Andric /// An 8-bit value used to initialize the elements of the destination integer 36490b57cec5SDimitry Andric /// vector. 36500b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements 36510b57cec5SDimitry Andric /// containing the value provided in the operand. 365281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { 365381ad6265SDimitry Andric return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 365481ad6265SDimitry Andric __b, __b, __b, __b, __b); 36550b57cec5SDimitry Andric } 36560b57cec5SDimitry Andric 36570b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 36580b57cec5SDimitry Andric /// with the specified 64-bit integral values. 36590b57cec5SDimitry Andric /// 36600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36610b57cec5SDimitry Andric /// 36620b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 36630b57cec5SDimitry Andric /// 36640b57cec5SDimitry Andric /// \param __q0 36650b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the lower 64 bits of the 36660b57cec5SDimitry Andric /// result. 36670b57cec5SDimitry Andric /// \param __q1 36680b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the upper 64 bits of the 36690b57cec5SDimitry Andric /// result. 36700b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 367181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, 367281ad6265SDimitry Andric __m64 __q1) { 36730b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 36740b57cec5SDimitry Andric } 36750b57cec5SDimitry Andric 36760b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 36770b57cec5SDimitry Andric /// with the specified 32-bit integral values. 36780b57cec5SDimitry Andric /// 36790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36800b57cec5SDimitry Andric /// 36810b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36820b57cec5SDimitry Andric /// instruction. 36830b57cec5SDimitry Andric /// 36840b57cec5SDimitry Andric /// \param __i0 36850b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [31:0] of the result. 36860b57cec5SDimitry Andric /// \param __i1 36870b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [63:32] of the result. 36880b57cec5SDimitry Andric /// \param __i2 36890b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [95:64] of the result. 36900b57cec5SDimitry Andric /// \param __i3 36910b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [127:96] of the result. 36920b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 369381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, 369481ad6265SDimitry Andric int __i2, 369581ad6265SDimitry Andric int __i3) { 36960b57cec5SDimitry Andric return _mm_set_epi32(__i3, __i2, __i1, __i0); 36970b57cec5SDimitry Andric } 36980b57cec5SDimitry Andric 36990b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 37000b57cec5SDimitry Andric /// with the specified 16-bit integral values. 37010b57cec5SDimitry Andric /// 37020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37030b57cec5SDimitry Andric /// 37040b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37050b57cec5SDimitry Andric /// instruction. 37060b57cec5SDimitry Andric /// 37070b57cec5SDimitry Andric /// \param __w0 37080b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [15:0] of the result. 37090b57cec5SDimitry Andric /// \param __w1 37100b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [31:16] of the result. 37110b57cec5SDimitry Andric /// \param __w2 37120b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [47:32] of the result. 37130b57cec5SDimitry Andric /// \param __w3 37140b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [63:48] of the result. 37150b57cec5SDimitry Andric /// \param __w4 37160b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [79:64] of the result. 37170b57cec5SDimitry Andric /// \param __w5 37180b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [95:80] of the result. 37190b57cec5SDimitry Andric /// \param __w6 37200b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [111:96] of the result. 37210b57cec5SDimitry Andric /// \param __w7 37220b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [127:112] of the result. 37230b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 37240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 372581ad6265SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, 372681ad6265SDimitry Andric short __w5, short __w6, short __w7) { 37270b57cec5SDimitry Andric return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); 37280b57cec5SDimitry Andric } 37290b57cec5SDimitry Andric 37300b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 37310b57cec5SDimitry Andric /// with the specified 8-bit integral values. 37320b57cec5SDimitry Andric /// 37330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37340b57cec5SDimitry Andric /// 37350b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37360b57cec5SDimitry Andric /// instruction. 37370b57cec5SDimitry Andric /// 37380b57cec5SDimitry Andric /// \param __b0 37390b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [7:0] of the result. 37400b57cec5SDimitry Andric /// \param __b1 37410b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [15:8] of the result. 37420b57cec5SDimitry Andric /// \param __b2 37430b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [23:16] of the result. 37440b57cec5SDimitry Andric /// \param __b3 37450b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [31:24] of the result. 37460b57cec5SDimitry Andric /// \param __b4 37470b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [39:32] of the result. 37480b57cec5SDimitry Andric /// \param __b5 37490b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [47:40] of the result. 37500b57cec5SDimitry Andric /// \param __b6 37510b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [55:48] of the result. 37520b57cec5SDimitry Andric /// \param __b7 37530b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [63:56] of the result. 37540b57cec5SDimitry Andric /// \param __b8 37550b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [71:64] of the result. 37560b57cec5SDimitry Andric /// \param __b9 37570b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [79:72] of the result. 37580b57cec5SDimitry Andric /// \param __b10 37590b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [87:80] of the result. 37600b57cec5SDimitry Andric /// \param __b11 37610b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [95:88] of the result. 37620b57cec5SDimitry Andric /// \param __b12 37630b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [103:96] of the result. 37640b57cec5SDimitry Andric /// \param __b13 37650b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [111:104] of the result. 37660b57cec5SDimitry Andric /// \param __b14 37670b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [119:112] of the result. 37680b57cec5SDimitry Andric /// \param __b15 37690b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [127:120] of the result. 37700b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 37710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 377281ad6265SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 377381ad6265SDimitry Andric char __b6, char __b7, char __b8, char __b9, char __b10, 377481ad6265SDimitry Andric char __b11, char __b12, char __b13, char __b14, char __b15) { 377581ad6265SDimitry Andric return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, 377681ad6265SDimitry Andric __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 37770b57cec5SDimitry Andric } 37780b57cec5SDimitry Andric 37790b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero. 37800b57cec5SDimitry Andric /// 37810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37820b57cec5SDimitry Andric /// 37830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 37840b57cec5SDimitry Andric /// 37850b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to 37860b57cec5SDimitry Andric /// zero. 378781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { 37880b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){0LL, 0LL}; 37890b57cec5SDimitry Andric } 37900b57cec5SDimitry Andric 37910b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a 37920b57cec5SDimitry Andric /// 128-bit boundary. 37930b57cec5SDimitry Andric /// 37940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37950b57cec5SDimitry Andric /// 37960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. 37970b57cec5SDimitry Andric /// 37980b57cec5SDimitry Andric /// \param __p 37990b57cec5SDimitry Andric /// A pointer to an aligned memory location that will receive the integer 38000b57cec5SDimitry Andric /// values. 38010b57cec5SDimitry Andric /// \param __b 38020b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 380381ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, 380481ad6265SDimitry Andric __m128i __b) { 38050b57cec5SDimitry Andric *__p = __b; 38060b57cec5SDimitry Andric } 38070b57cec5SDimitry Andric 38080b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location. 38090b57cec5SDimitry Andric /// 38100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38110b57cec5SDimitry Andric /// 38120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. 38130b57cec5SDimitry Andric /// 38140b57cec5SDimitry Andric /// \param __p 38150b57cec5SDimitry Andric /// A pointer to a memory location that will receive the integer values. 38160b57cec5SDimitry Andric /// \param __b 38170b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 381881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, 381981ad6265SDimitry Andric __m128i __b) { 38200b57cec5SDimitry Andric struct __storeu_si128 { 38210b57cec5SDimitry Andric __m128i_u __v; 38220b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38230b57cec5SDimitry Andric ((struct __storeu_si128 *)__p)->__v = __b; 38240b57cec5SDimitry Andric } 38250b57cec5SDimitry Andric 38260b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer 38270b57cec5SDimitry Andric /// vector. 38280b57cec5SDimitry Andric /// 38290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38300b57cec5SDimitry Andric /// 38310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 38320b57cec5SDimitry Andric /// 38330b57cec5SDimitry Andric /// \param __p 38340b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 3835e8d8bef9SDimitry Andric /// location does not have to be aligned. 38360b57cec5SDimitry Andric /// \param __b 38370b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 383881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, 383981ad6265SDimitry Andric __m128i __b) { 38400b57cec5SDimitry Andric struct __storeu_si64 { 38410b57cec5SDimitry Andric long long __v; 38420b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38430b57cec5SDimitry Andric ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0]; 38440b57cec5SDimitry Andric } 38450b57cec5SDimitry Andric 38460b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer 38470b57cec5SDimitry Andric /// vector. 38480b57cec5SDimitry Andric /// 38490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38500b57cec5SDimitry Andric /// 38510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 38520b57cec5SDimitry Andric /// 38530b57cec5SDimitry Andric /// \param __p 38540b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 38550b57cec5SDimitry Andric /// location does not have to be aligned. 38560b57cec5SDimitry Andric /// \param __b 38570b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 385881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, 385981ad6265SDimitry Andric __m128i __b) { 38600b57cec5SDimitry Andric struct __storeu_si32 { 38610b57cec5SDimitry Andric int __v; 38620b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38630b57cec5SDimitry Andric ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0]; 38640b57cec5SDimitry Andric } 38650b57cec5SDimitry Andric 38660b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer 38670b57cec5SDimitry Andric /// vector. 38680b57cec5SDimitry Andric /// 38690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38700b57cec5SDimitry Andric /// 38710b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 38720b57cec5SDimitry Andric /// 38730b57cec5SDimitry Andric /// \param __p 38740b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 38750b57cec5SDimitry Andric /// location does not have to be aligned. 38760b57cec5SDimitry Andric /// \param __b 38770b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 387881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, 387981ad6265SDimitry Andric __m128i __b) { 38800b57cec5SDimitry Andric struct __storeu_si16 { 38810b57cec5SDimitry Andric short __v; 38820b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38830b57cec5SDimitry Andric ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0]; 38840b57cec5SDimitry Andric } 38850b57cec5SDimitry Andric 38860b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the 38870b57cec5SDimitry Andric /// specified unaligned memory location. When a mask bit is 1, the 38880b57cec5SDimitry Andric /// corresponding byte is written, otherwise it is not written. 38890b57cec5SDimitry Andric /// 38900b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 38910b57cec5SDimitry Andric /// used again soon). Exception and trap behavior for elements not selected 38920b57cec5SDimitry Andric /// for storage to memory are implementation dependent. 38930b57cec5SDimitry Andric /// 38940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38950b57cec5SDimitry Andric /// 38960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> 38970b57cec5SDimitry Andric /// instruction. 38980b57cec5SDimitry Andric /// 38990b57cec5SDimitry Andric /// \param __d 39000b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 39010b57cec5SDimitry Andric /// \param __n 39020b57cec5SDimitry Andric /// A 128-bit integer vector containing the mask. The most significant bit of 39030b57cec5SDimitry Andric /// each byte represents the mask bits. 39040b57cec5SDimitry Andric /// \param __p 39050b57cec5SDimitry Andric /// A pointer to an unaligned 128-bit memory location where the specified 39060b57cec5SDimitry Andric /// values are moved. 390781ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, 390881ad6265SDimitry Andric __m128i __n, 390981ad6265SDimitry Andric char *__p) { 39100b57cec5SDimitry Andric __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 39110b57cec5SDimitry Andric } 39120b57cec5SDimitry Andric 39130b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to 39140b57cec5SDimitry Andric /// a memory location. 39150b57cec5SDimitry Andric /// 39160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39170b57cec5SDimitry Andric /// 39180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. 39190b57cec5SDimitry Andric /// 39200b57cec5SDimitry Andric /// \param __p 39210b57cec5SDimitry Andric /// A pointer to a 64-bit memory location that will receive the lower 64 bits 39220b57cec5SDimitry Andric /// of the integer vector parameter. 39230b57cec5SDimitry Andric /// \param __a 39240b57cec5SDimitry Andric /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the 39250b57cec5SDimitry Andric /// value to be stored. 392681ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, 392781ad6265SDimitry Andric __m128i __a) { 39280b57cec5SDimitry Andric struct __mm_storel_epi64_struct { 39290b57cec5SDimitry Andric long long __u; 39300b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 39310b57cec5SDimitry Andric ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0]; 39320b57cec5SDimitry Andric } 39330b57cec5SDimitry Andric 39340b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit 39350b57cec5SDimitry Andric /// aligned memory location. 39360b57cec5SDimitry Andric /// 39370b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39380b57cec5SDimitry Andric /// used again soon). 39390b57cec5SDimitry Andric /// 39400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39410b57cec5SDimitry Andric /// 39420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 39430b57cec5SDimitry Andric /// 39440b57cec5SDimitry Andric /// \param __p 39450b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 39460b57cec5SDimitry Andric /// \param __a 39470b57cec5SDimitry Andric /// A vector of [2 x double] containing the 64-bit values to be stored. 394881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, 394981ad6265SDimitry Andric __m128d __a) { 39500b57cec5SDimitry Andric __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); 39510b57cec5SDimitry Andric } 39520b57cec5SDimitry Andric 39530b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location. 39540b57cec5SDimitry Andric /// 39550b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39560b57cec5SDimitry Andric /// used again soon). 39570b57cec5SDimitry Andric /// 39580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39590b57cec5SDimitry Andric /// 39600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 39610b57cec5SDimitry Andric /// 39620b57cec5SDimitry Andric /// \param __p 39630b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 39640b57cec5SDimitry Andric /// \param __a 39650b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be stored. 396681ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, 396781ad6265SDimitry Andric __m128i __a) { 39680b57cec5SDimitry Andric __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); 39690b57cec5SDimitry Andric } 39700b57cec5SDimitry Andric 39710b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location. 39720b57cec5SDimitry Andric /// 39730b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39740b57cec5SDimitry Andric /// used again soon). 39750b57cec5SDimitry Andric /// 39760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39770b57cec5SDimitry Andric /// 39780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. 39790b57cec5SDimitry Andric /// 39800b57cec5SDimitry Andric /// \param __p 39810b57cec5SDimitry Andric /// A pointer to the 32-bit memory location used to store the value. 39820b57cec5SDimitry Andric /// \param __a 39830b57cec5SDimitry Andric /// A 32-bit integer containing the value to be stored. 398481ad6265SDimitry Andric static __inline__ void 398581ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 398681ad6265SDimitry Andric _mm_stream_si32(int *__p, int __a) { 39870b57cec5SDimitry Andric __builtin_ia32_movnti(__p, __a); 39880b57cec5SDimitry Andric } 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric #ifdef __x86_64__ 39910b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location. 39920b57cec5SDimitry Andric /// 39930b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39940b57cec5SDimitry Andric /// used again soon). 39950b57cec5SDimitry Andric /// 39960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39970b57cec5SDimitry Andric /// 39980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. 39990b57cec5SDimitry Andric /// 40000b57cec5SDimitry Andric /// \param __p 40010b57cec5SDimitry Andric /// A pointer to the 64-bit memory location used to store the value. 40020b57cec5SDimitry Andric /// \param __a 40030b57cec5SDimitry Andric /// A 64-bit integer containing the value to be stored. 400481ad6265SDimitry Andric static __inline__ void 400581ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 400681ad6265SDimitry Andric _mm_stream_si64(long long *__p, long long __a) { 40070b57cec5SDimitry Andric __builtin_ia32_movnti64(__p, __a); 40080b57cec5SDimitry Andric } 40090b57cec5SDimitry Andric #endif 40100b57cec5SDimitry Andric 40110b57cec5SDimitry Andric #if defined(__cplusplus) 40120b57cec5SDimitry Andric extern "C" { 40130b57cec5SDimitry Andric #endif 40140b57cec5SDimitry Andric 40150b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all 40160b57cec5SDimitry Andric /// caches in the coherency domain. 40170b57cec5SDimitry Andric /// 40180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40190b57cec5SDimitry Andric /// 40200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. 40210b57cec5SDimitry Andric /// 40220b57cec5SDimitry Andric /// \param __p 40230b57cec5SDimitry Andric /// A pointer to the memory location used to identify the cache line to be 40240b57cec5SDimitry Andric /// flushed. 40250b57cec5SDimitry Andric void _mm_clflush(void const *__p); 40260b57cec5SDimitry Andric 40270b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load 40280b57cec5SDimitry Andric /// instructions preceding this instruction and load instructions following 40290b57cec5SDimitry Andric /// this instruction, ensuring the system completes all previous loads before 40300b57cec5SDimitry Andric /// executing subsequent loads. 40310b57cec5SDimitry Andric /// 40320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40330b57cec5SDimitry Andric /// 40340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction. 40350b57cec5SDimitry Andric /// 40360b57cec5SDimitry Andric void _mm_lfence(void); 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store 40390b57cec5SDimitry Andric /// instructions preceding this instruction and load and store instructions 40400b57cec5SDimitry Andric /// following this instruction, ensuring that the system completes all 40410b57cec5SDimitry Andric /// previous memory accesses before executing subsequent memory accesses. 40420b57cec5SDimitry Andric /// 40430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40440b57cec5SDimitry Andric /// 40450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction. 40460b57cec5SDimitry Andric /// 40470b57cec5SDimitry Andric void _mm_mfence(void); 40480b57cec5SDimitry Andric 40490b57cec5SDimitry Andric #if defined(__cplusplus) 40500b57cec5SDimitry Andric } // extern "C" 40510b57cec5SDimitry Andric #endif 40520b57cec5SDimitry Andric 40530b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 40540b57cec5SDimitry Andric /// operands into 8-bit signed integers, and packs the results into the 40550b57cec5SDimitry Andric /// destination. Positive values greater than 0x7F are saturated to 0x7F. 40560b57cec5SDimitry Andric /// Negative values less than 0x80 are saturated to 0x80. 40570b57cec5SDimitry Andric /// 40580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40590b57cec5SDimitry Andric /// 40600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. 40610b57cec5SDimitry Andric /// 40620b57cec5SDimitry Andric /// \param __a 40630b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 40640b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 40650b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 40660b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 40670b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 40680b57cec5SDimitry Andric /// \param __b 40690b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 40700b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 40710b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 40720b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 40730b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 40740b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 407581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, 407681ad6265SDimitry Andric __m128i __b) { 40770b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 40780b57cec5SDimitry Andric } 40790b57cec5SDimitry Andric 40800b57cec5SDimitry Andric /// Converts 32-bit signed integers from both 128-bit integer vector 40810b57cec5SDimitry Andric /// operands into 16-bit signed integers, and packs the results into the 40820b57cec5SDimitry Andric /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. 40830b57cec5SDimitry Andric /// Negative values less than 0x8000 are saturated to 0x8000. 40840b57cec5SDimitry Andric /// 40850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40860b57cec5SDimitry Andric /// 40870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. 40880b57cec5SDimitry Andric /// 40890b57cec5SDimitry Andric /// \param __a 40900b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 40910b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 40920b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 40930b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 40940b57cec5SDimitry Andric /// are written to the lower 64 bits of the result. 40950b57cec5SDimitry Andric /// \param __b 40960b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 40970b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 40980b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 40990b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 41000b57cec5SDimitry Andric /// are written to the higher 64 bits of the result. 41010b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values. 410281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, 410381ad6265SDimitry Andric __m128i __b) { 41040b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 41050b57cec5SDimitry Andric } 41060b57cec5SDimitry Andric 41070b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 41080b57cec5SDimitry Andric /// operands into 8-bit unsigned integers, and packs the results into the 41090b57cec5SDimitry Andric /// destination. Values greater than 0xFF are saturated to 0xFF. Values less 41100b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. 41110b57cec5SDimitry Andric /// 41120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41130b57cec5SDimitry Andric /// 41140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. 41150b57cec5SDimitry Andric /// 41160b57cec5SDimitry Andric /// \param __a 41170b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 41180b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 41190b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 41200b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 41210b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 41220b57cec5SDimitry Andric /// \param __b 41230b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 41240b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 41250b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 41260b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 41270b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 41280b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 412981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, 413081ad6265SDimitry Andric __m128i __b) { 41310b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 41320b57cec5SDimitry Andric } 41330b57cec5SDimitry Andric 41340b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using 41350b57cec5SDimitry Andric /// the immediate-value parameter as a selector. 41360b57cec5SDimitry Andric /// 41370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41380b57cec5SDimitry Andric /// 413981ad6265SDimitry Andric /// \code 414081ad6265SDimitry Andric /// __m128i _mm_extract_epi16(__m128i a, const int imm); 414181ad6265SDimitry Andric /// \endcode 414281ad6265SDimitry Andric /// 41430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. 41440b57cec5SDimitry Andric /// 414581ad6265SDimitry Andric /// \param a 41460b57cec5SDimitry Andric /// A 128-bit integer vector. 414781ad6265SDimitry Andric /// \param imm 414881ad6265SDimitry Andric /// An immediate value. Bits [2:0] selects values from \a a to be assigned 41490b57cec5SDimitry Andric /// to bits[15:0] of the result. \n 415081ad6265SDimitry Andric /// 000: assign values from bits [15:0] of \a a. \n 415181ad6265SDimitry Andric /// 001: assign values from bits [31:16] of \a a. \n 415281ad6265SDimitry Andric /// 010: assign values from bits [47:32] of \a a. \n 415381ad6265SDimitry Andric /// 011: assign values from bits [63:48] of \a a. \n 415481ad6265SDimitry Andric /// 100: assign values from bits [79:64] of \a a. \n 415581ad6265SDimitry Andric /// 101: assign values from bits [95:80] of \a a. \n 415681ad6265SDimitry Andric /// 110: assign values from bits [111:96] of \a a. \n 415781ad6265SDimitry Andric /// 111: assign values from bits [127:112] of \a a. 41580b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit 41590b57cec5SDimitry Andric /// integer vector parameter and the remaining bits are assigned zeros. 41600b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \ 4161349cc55cSDimitry Andric ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ 4162349cc55cSDimitry Andric (int)(imm))) 41630b57cec5SDimitry Andric 41640b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the 41650b57cec5SDimitry Andric /// 128-bit integer vector parameter, and then inserting the lower 16 bits 41660b57cec5SDimitry Andric /// of an integer parameter into an offset specified by the immediate-value 41670b57cec5SDimitry Andric /// parameter. 41680b57cec5SDimitry Andric /// 41690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41700b57cec5SDimitry Andric /// 417181ad6265SDimitry Andric /// \code 417281ad6265SDimitry Andric /// __m128i _mm_insert_epi16(__m128i a, int b, const int imm); 417381ad6265SDimitry Andric /// \endcode 417481ad6265SDimitry Andric /// 41750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. 41760b57cec5SDimitry Andric /// 417781ad6265SDimitry Andric /// \param a 41780b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. This vector is copied to the 41790b57cec5SDimitry Andric /// result and then one of the eight elements in the result is replaced by 418081ad6265SDimitry Andric /// the lower 16 bits of \a b. 418181ad6265SDimitry Andric /// \param b 41820b57cec5SDimitry Andric /// An integer. The lower 16 bits of this parameter are written to the 418381ad6265SDimitry Andric /// result beginning at an offset specified by \a imm. 418481ad6265SDimitry Andric /// \param imm 41850b57cec5SDimitry Andric /// An immediate value specifying the bit offset in the result at which the 418681ad6265SDimitry Andric /// lower 16 bits of \a b are written. 41870b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values. 41880b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \ 4189349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ 4190349cc55cSDimitry Andric (int)(imm))) 41910b57cec5SDimitry Andric 41920b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit 41930b57cec5SDimitry Andric /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask 41940b57cec5SDimitry Andric /// value, zero-extends the value, and writes it to the destination. 41950b57cec5SDimitry Andric /// 41960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41970b57cec5SDimitry Andric /// 41980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. 41990b57cec5SDimitry Andric /// 42000b57cec5SDimitry Andric /// \param __a 42010b57cec5SDimitry Andric /// A 128-bit integer vector containing the values with bits to be extracted. 42020b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a, 42030b57cec5SDimitry Andric /// written to bits [15:0]. The other bits are assigned zeros. 420481ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { 42050b57cec5SDimitry Andric return __builtin_ia32_pmovmskb128((__v16qi)__a); 42060b57cec5SDimitry Andric } 42070b57cec5SDimitry Andric 42080b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit 42090b57cec5SDimitry Andric /// elements of a 128-bit integer vector parameter, using the immediate-value 42100b57cec5SDimitry Andric /// parameter as a specifier. 42110b57cec5SDimitry Andric /// 42120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42130b57cec5SDimitry Andric /// 42140b57cec5SDimitry Andric /// \code 42150b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); 42160b57cec5SDimitry Andric /// \endcode 42170b57cec5SDimitry Andric /// 42180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 42190b57cec5SDimitry Andric /// 42200b57cec5SDimitry Andric /// \param a 42210b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 42220b57cec5SDimitry Andric /// \param imm 42230b57cec5SDimitry Andric /// An immediate value containing an 8-bit value specifying which elements to 42240b57cec5SDimitry Andric /// copy from a. The destinations within the 128-bit destination are assigned 42250b57cec5SDimitry Andric /// values as follows: \n 42260b57cec5SDimitry Andric /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n 42270b57cec5SDimitry Andric /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n 42280b57cec5SDimitry Andric /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n 42290b57cec5SDimitry Andric /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n 42300b57cec5SDimitry Andric /// Bit value assignments: \n 42310b57cec5SDimitry Andric /// 00: assign values from bits [31:0] of \a a. \n 42320b57cec5SDimitry Andric /// 01: assign values from bits [63:32] of \a a. \n 42330b57cec5SDimitry Andric /// 10: assign values from bits [95:64] of \a a. \n 423481ad6265SDimitry Andric /// 11: assign values from bits [127:96] of \a a. \n 423581ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 423681ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 423781ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 42380b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 42390b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \ 4240349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) 42410b57cec5SDimitry Andric 42420b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit 42430b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 42440b57cec5SDimitry Andric /// value parameter as a specifier. 42450b57cec5SDimitry Andric /// 42460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42470b57cec5SDimitry Andric /// 42480b57cec5SDimitry Andric /// \code 42490b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); 42500b57cec5SDimitry Andric /// \endcode 42510b57cec5SDimitry Andric /// 42520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 42530b57cec5SDimitry Andric /// 42540b57cec5SDimitry Andric /// \param a 42550b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits 42560b57cec5SDimitry Andric /// [127:64] of the result. 42570b57cec5SDimitry Andric /// \param imm 42580b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 42590b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n 42600b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n 42610b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n 42620b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n 42630b57cec5SDimitry Andric /// Bit value assignments: \n 42640b57cec5SDimitry Andric /// 00: assign values from bits [15:0] of \a a. \n 42650b57cec5SDimitry Andric /// 01: assign values from bits [31:16] of \a a. \n 42660b57cec5SDimitry Andric /// 10: assign values from bits [47:32] of \a a. \n 42670b57cec5SDimitry Andric /// 11: assign values from bits [63:48] of \a a. \n 426881ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 426981ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 427081ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 42710b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 42720b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \ 4273349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) 42740b57cec5SDimitry Andric 42750b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit 42760b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 42770b57cec5SDimitry Andric /// value parameter as a specifier. 42780b57cec5SDimitry Andric /// 42790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42800b57cec5SDimitry Andric /// 42810b57cec5SDimitry Andric /// \code 42820b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); 42830b57cec5SDimitry Andric /// \endcode 42840b57cec5SDimitry Andric /// 42850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. 42860b57cec5SDimitry Andric /// 42870b57cec5SDimitry Andric /// \param a 42880b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits 42890b57cec5SDimitry Andric /// [63:0] of the result. 42900b57cec5SDimitry Andric /// \param imm 42910b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 42920b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n 42930b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n 42940b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n 42950b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n 42960b57cec5SDimitry Andric /// Bit value assignments: \n 42970b57cec5SDimitry Andric /// 00: assign values from bits [79:64] of \a a. \n 42980b57cec5SDimitry Andric /// 01: assign values from bits [95:80] of \a a. \n 42990b57cec5SDimitry Andric /// 10: assign values from bits [111:96] of \a a. \n 43000b57cec5SDimitry Andric /// 11: assign values from bits [127:112] of \a a. \n 430181ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 430281ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 430381ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 43040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 43050b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \ 4306349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors 43090b57cec5SDimitry Andric /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 43100b57cec5SDimitry Andric /// 43110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43120b57cec5SDimitry Andric /// 43130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> 43140b57cec5SDimitry Andric /// instruction. 43150b57cec5SDimitry Andric /// 43160b57cec5SDimitry Andric /// \param __a 43170b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 43180b57cec5SDimitry Andric /// Bits [71:64] are written to bits [7:0] of the result. \n 43190b57cec5SDimitry Andric /// Bits [79:72] are written to bits [23:16] of the result. \n 43200b57cec5SDimitry Andric /// Bits [87:80] are written to bits [39:32] of the result. \n 43210b57cec5SDimitry Andric /// Bits [95:88] are written to bits [55:48] of the result. \n 43220b57cec5SDimitry Andric /// Bits [103:96] are written to bits [71:64] of the result. \n 43230b57cec5SDimitry Andric /// Bits [111:104] are written to bits [87:80] of the result. \n 43240b57cec5SDimitry Andric /// Bits [119:112] are written to bits [103:96] of the result. \n 43250b57cec5SDimitry Andric /// Bits [127:120] are written to bits [119:112] of the result. 43260b57cec5SDimitry Andric /// \param __b 43270b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 43280b57cec5SDimitry Andric /// Bits [71:64] are written to bits [15:8] of the result. \n 43290b57cec5SDimitry Andric /// Bits [79:72] are written to bits [31:24] of the result. \n 43300b57cec5SDimitry Andric /// Bits [87:80] are written to bits [47:40] of the result. \n 43310b57cec5SDimitry Andric /// Bits [95:88] are written to bits [63:56] of the result. \n 43320b57cec5SDimitry Andric /// Bits [103:96] are written to bits [79:72] of the result. \n 43330b57cec5SDimitry Andric /// Bits [111:104] are written to bits [95:88] of the result. \n 43340b57cec5SDimitry Andric /// Bits [119:112] are written to bits [111:104] of the result. \n 43350b57cec5SDimitry Andric /// Bits [127:120] are written to bits [127:120] of the result. 43360b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 433781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, 433881ad6265SDimitry Andric __m128i __b) { 433981ad6265SDimitry Andric return (__m128i)__builtin_shufflevector( 434081ad6265SDimitry Andric (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 434181ad6265SDimitry Andric 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); 43420b57cec5SDimitry Andric } 43430b57cec5SDimitry Andric 43440b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of 43450b57cec5SDimitry Andric /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. 43460b57cec5SDimitry Andric /// 43470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43480b57cec5SDimitry Andric /// 43490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> 43500b57cec5SDimitry Andric /// instruction. 43510b57cec5SDimitry Andric /// 43520b57cec5SDimitry Andric /// \param __a 43530b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 43540b57cec5SDimitry Andric /// Bits [79:64] are written to bits [15:0] of the result. \n 43550b57cec5SDimitry Andric /// Bits [95:80] are written to bits [47:32] of the result. \n 43560b57cec5SDimitry Andric /// Bits [111:96] are written to bits [79:64] of the result. \n 43570b57cec5SDimitry Andric /// Bits [127:112] are written to bits [111:96] of the result. 43580b57cec5SDimitry Andric /// \param __b 43590b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 43600b57cec5SDimitry Andric /// Bits [79:64] are written to bits [31:16] of the result. \n 43610b57cec5SDimitry Andric /// Bits [95:80] are written to bits [63:48] of the result. \n 43620b57cec5SDimitry Andric /// Bits [111:96] are written to bits [95:80] of the result. \n 43630b57cec5SDimitry Andric /// Bits [127:112] are written to bits [127:112] of the result. 43640b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 436581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, 436681ad6265SDimitry Andric __m128i __b) { 436781ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, 436881ad6265SDimitry Andric 8 + 5, 6, 8 + 6, 7, 8 + 7); 43690b57cec5SDimitry Andric } 43700b57cec5SDimitry Andric 43710b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of 43720b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 43730b57cec5SDimitry Andric /// 43740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43750b57cec5SDimitry Andric /// 43760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> 43770b57cec5SDimitry Andric /// instruction. 43780b57cec5SDimitry Andric /// 43790b57cec5SDimitry Andric /// \param __a 43800b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 43810b57cec5SDimitry Andric /// Bits [95:64] are written to bits [31:0] of the destination. \n 43820b57cec5SDimitry Andric /// Bits [127:96] are written to bits [95:64] of the destination. 43830b57cec5SDimitry Andric /// \param __b 43840b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 43850b57cec5SDimitry Andric /// Bits [95:64] are written to bits [64:32] of the destination. \n 43860b57cec5SDimitry Andric /// Bits [127:96] are written to bits [127:96] of the destination. 43870b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 438881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, 438981ad6265SDimitry Andric __m128i __b) { 439081ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, 439181ad6265SDimitry Andric 4 + 3); 43920b57cec5SDimitry Andric } 43930b57cec5SDimitry Andric 43940b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 43950b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 43960b57cec5SDimitry Andric /// 43970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43980b57cec5SDimitry Andric /// 43990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> 44000b57cec5SDimitry Andric /// instruction. 44010b57cec5SDimitry Andric /// 44020b57cec5SDimitry Andric /// \param __a 44030b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 44040b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 44050b57cec5SDimitry Andric /// \param __b 44060b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 44070b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 44080b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 440981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, 441081ad6265SDimitry Andric __m128i __b) { 44110b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); 44120b57cec5SDimitry Andric } 44130b57cec5SDimitry Andric 44140b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of 44150b57cec5SDimitry Andric /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 44160b57cec5SDimitry Andric /// 44170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44180b57cec5SDimitry Andric /// 44190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> 44200b57cec5SDimitry Andric /// instruction. 44210b57cec5SDimitry Andric /// 44220b57cec5SDimitry Andric /// \param __a 44230b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 44240b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 44250b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 44260b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 44270b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. \n 44280b57cec5SDimitry Andric /// Bits [39:32] are written to bits [71:64] of the result. \n 44290b57cec5SDimitry Andric /// Bits [47:40] are written to bits [87:80] of the result. \n 44300b57cec5SDimitry Andric /// Bits [55:48] are written to bits [103:96] of the result. \n 44310b57cec5SDimitry Andric /// Bits [63:56] are written to bits [119:112] of the result. 44320b57cec5SDimitry Andric /// \param __b 44330b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 44340b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 44350b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 44360b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 44370b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. \n 44380b57cec5SDimitry Andric /// Bits [39:32] are written to bits [79:72] of the result. \n 44390b57cec5SDimitry Andric /// Bits [47:40] are written to bits [95:88] of the result. \n 44400b57cec5SDimitry Andric /// Bits [55:48] are written to bits [111:104] of the result. \n 44410b57cec5SDimitry Andric /// Bits [63:56] are written to bits [127:120] of the result. 44420b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 444381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, 444481ad6265SDimitry Andric __m128i __b) { 444581ad6265SDimitry Andric return (__m128i)__builtin_shufflevector( 444681ad6265SDimitry Andric (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 444781ad6265SDimitry Andric 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); 44480b57cec5SDimitry Andric } 44490b57cec5SDimitry Andric 44500b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit 44510b57cec5SDimitry Andric /// vectors of [8 x i16] and interleaves them into a 128-bit vector of 44520b57cec5SDimitry Andric /// [8 x i16]. 44530b57cec5SDimitry Andric /// 44540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44550b57cec5SDimitry Andric /// 44560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> 44570b57cec5SDimitry Andric /// instruction. 44580b57cec5SDimitry Andric /// 44590b57cec5SDimitry Andric /// \param __a 44600b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 44610b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 44620b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. \n 44630b57cec5SDimitry Andric /// Bits [47:32] are written to bits [79:64] of the result. \n 44640b57cec5SDimitry Andric /// Bits [63:48] are written to bits [111:96] of the result. 44650b57cec5SDimitry Andric /// \param __b 44660b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 44670b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 44680b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. \n 44690b57cec5SDimitry Andric /// Bits [47:32] are written to bits [95:80] of the result. \n 44700b57cec5SDimitry Andric /// Bits [63:48] are written to bits [127:112] of the result. 44710b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 447281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, 447381ad6265SDimitry Andric __m128i __b) { 447481ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, 447581ad6265SDimitry Andric 8 + 1, 2, 8 + 2, 3, 8 + 3); 44760b57cec5SDimitry Andric } 44770b57cec5SDimitry Andric 44780b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of 44790b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 44800b57cec5SDimitry Andric /// 44810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44820b57cec5SDimitry Andric /// 44830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> 44840b57cec5SDimitry Andric /// instruction. 44850b57cec5SDimitry Andric /// 44860b57cec5SDimitry Andric /// \param __a 44870b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 44880b57cec5SDimitry Andric /// Bits [31:0] are written to bits [31:0] of the destination. \n 44890b57cec5SDimitry Andric /// Bits [63:32] are written to bits [95:64] of the destination. 44900b57cec5SDimitry Andric /// \param __b 44910b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 44920b57cec5SDimitry Andric /// Bits [31:0] are written to bits [64:32] of the destination. \n 44930b57cec5SDimitry Andric /// Bits [63:32] are written to bits [127:96] of the destination. 44940b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 449581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, 449681ad6265SDimitry Andric __m128i __b) { 449781ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, 449881ad6265SDimitry Andric 4 + 1); 44990b57cec5SDimitry Andric } 45000b57cec5SDimitry Andric 45010b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of 45020b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 45030b57cec5SDimitry Andric /// 45040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45050b57cec5SDimitry Andric /// 45060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> 45070b57cec5SDimitry Andric /// instruction. 45080b57cec5SDimitry Andric /// 45090b57cec5SDimitry Andric /// \param __a 45100b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45110b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. \n 45120b57cec5SDimitry Andric /// \param __b 45130b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45140b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. \n 45150b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 451681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, 451781ad6265SDimitry Andric __m128i __b) { 45180b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); 45190b57cec5SDimitry Andric } 45200b57cec5SDimitry Andric 45210b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit 45220b57cec5SDimitry Andric /// integer. 45230b57cec5SDimitry Andric /// 45240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45250b57cec5SDimitry Andric /// 45260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. 45270b57cec5SDimitry Andric /// 45280b57cec5SDimitry Andric /// \param __a 45290b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 45300b57cec5SDimitry Andric /// destination. 45310b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter. 453281ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { 45330b57cec5SDimitry Andric return (__m64)__a[0]; 45340b57cec5SDimitry Andric } 45350b57cec5SDimitry Andric 45360b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the 45370b57cec5SDimitry Andric /// upper bits. 45380b57cec5SDimitry Andric /// 45390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45400b57cec5SDimitry Andric /// 45410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. 45420b57cec5SDimitry Andric /// 45430b57cec5SDimitry Andric /// \param __a 45440b57cec5SDimitry Andric /// A 64-bit value. 45450b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 45460b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 454781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { 45480b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){(long long)__a, 0}; 45490b57cec5SDimitry Andric } 45500b57cec5SDimitry Andric 45510b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit 45520b57cec5SDimitry Andric /// integer vector, zeroing the upper bits. 45530b57cec5SDimitry Andric /// 45540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45550b57cec5SDimitry Andric /// 45560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 45570b57cec5SDimitry Andric /// 45580b57cec5SDimitry Andric /// \param __a 45590b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 45600b57cec5SDimitry Andric /// destination. 45610b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 45620b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 456381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { 45640b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); 45650b57cec5SDimitry Andric } 45660b57cec5SDimitry Andric 45670b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 45680b57cec5SDimitry Andric /// [2 x double] and interleaves them into a 128-bit vector of [2 x 45690b57cec5SDimitry Andric /// double]. 45700b57cec5SDimitry Andric /// 45710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45720b57cec5SDimitry Andric /// 45730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. 45740b57cec5SDimitry Andric /// 45750b57cec5SDimitry Andric /// \param __a 45760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45770b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 45780b57cec5SDimitry Andric /// \param __b 45790b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45800b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 45810b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 458281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, 458381ad6265SDimitry Andric __m128d __b) { 45840b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); 45850b57cec5SDimitry Andric } 45860b57cec5SDimitry Andric 45870b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors 45880b57cec5SDimitry Andric /// of [2 x double] and interleaves them into a 128-bit vector of [2 x 45890b57cec5SDimitry Andric /// double]. 45900b57cec5SDimitry Andric /// 45910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45920b57cec5SDimitry Andric /// 45930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 45940b57cec5SDimitry Andric /// 45950b57cec5SDimitry Andric /// \param __a 45960b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45970b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. 45980b57cec5SDimitry Andric /// \param __b 45990b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 46000b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. 46010b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 460281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, 460381ad6265SDimitry Andric __m128d __b) { 46040b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); 46050b57cec5SDimitry Andric } 46060b57cec5SDimitry Andric 46070b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit 46080b57cec5SDimitry Andric /// vector of [2 x double], zero-extends the value, and writes it to the 46090b57cec5SDimitry Andric /// low-order bits of the destination. 46100b57cec5SDimitry Andric /// 46110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46120b57cec5SDimitry Andric /// 46130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. 46140b57cec5SDimitry Andric /// 46150b57cec5SDimitry Andric /// \param __a 46160b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values with sign bits to 46170b57cec5SDimitry Andric /// be extracted. 46180b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a, 46190b57cec5SDimitry Andric /// written to bits [1:0]. The remaining bits are assigned values of zero. 462081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { 46210b57cec5SDimitry Andric return __builtin_ia32_movmskpd((__v2df)__a); 46220b57cec5SDimitry Andric } 46230b57cec5SDimitry Andric 46240b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two 46250b57cec5SDimitry Andric /// 128-bit vector parameters of [2 x double], using the immediate-value 46260b57cec5SDimitry Andric /// parameter as a specifier. 46270b57cec5SDimitry Andric /// 46280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46290b57cec5SDimitry Andric /// 46300b57cec5SDimitry Andric /// \code 46310b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); 46320b57cec5SDimitry Andric /// \endcode 46330b57cec5SDimitry Andric /// 46340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. 46350b57cec5SDimitry Andric /// 46360b57cec5SDimitry Andric /// \param a 46370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 46380b57cec5SDimitry Andric /// \param b 46390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 46400b57cec5SDimitry Andric /// \param i 46410b57cec5SDimitry Andric /// An 8-bit immediate value. The least significant two bits specify which 46420b57cec5SDimitry Andric /// elements to copy from \a a and \a b: \n 46430b57cec5SDimitry Andric /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n 46440b57cec5SDimitry Andric /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n 46450b57cec5SDimitry Andric /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n 46460b57cec5SDimitry Andric /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n 464781ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. 464881ad6265SDimitry Andric /// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form 464981ad6265SDimitry Andric /// <c>[b1, b0]</c>. 46500b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values. 46510b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \ 4652349cc55cSDimitry Andric ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 4653349cc55cSDimitry Andric (int)(i))) 46540b57cec5SDimitry Andric 46550b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 46560b57cec5SDimitry Andric /// floating-point vector of [4 x float]. 46570b57cec5SDimitry Andric /// 46580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46590b57cec5SDimitry Andric /// 46600b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46610b57cec5SDimitry Andric /// 46620b57cec5SDimitry Andric /// \param __a 46630b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 46640b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 46650b57cec5SDimitry Andric /// bitwise pattern as the parameter. 466681ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { 46670b57cec5SDimitry Andric return (__m128)__a; 46680b57cec5SDimitry Andric } 46690b57cec5SDimitry Andric 46700b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 46710b57cec5SDimitry Andric /// integer vector. 46720b57cec5SDimitry Andric /// 46730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46740b57cec5SDimitry Andric /// 46750b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46760b57cec5SDimitry Andric /// 46770b57cec5SDimitry Andric /// \param __a 46780b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 46790b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 46800b57cec5SDimitry Andric /// parameter. 468181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { 46820b57cec5SDimitry Andric return (__m128i)__a; 46830b57cec5SDimitry Andric } 46840b57cec5SDimitry Andric 46850b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 46860b57cec5SDimitry Andric /// floating-point vector of [2 x double]. 46870b57cec5SDimitry Andric /// 46880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46890b57cec5SDimitry Andric /// 46900b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46910b57cec5SDimitry Andric /// 46920b57cec5SDimitry Andric /// \param __a 46930b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 46940b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 46950b57cec5SDimitry Andric /// bitwise pattern as the parameter. 469681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { 46970b57cec5SDimitry Andric return (__m128d)__a; 46980b57cec5SDimitry Andric } 46990b57cec5SDimitry Andric 47000b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 47010b57cec5SDimitry Andric /// integer vector. 47020b57cec5SDimitry Andric /// 47030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47040b57cec5SDimitry Andric /// 47050b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47060b57cec5SDimitry Andric /// 47070b57cec5SDimitry Andric /// \param __a 47080b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 47090b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 47100b57cec5SDimitry Andric /// parameter. 471181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { 47120b57cec5SDimitry Andric return (__m128i)__a; 47130b57cec5SDimitry Andric } 47140b57cec5SDimitry Andric 47150b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 47160b57cec5SDimitry Andric /// of [4 x float]. 47170b57cec5SDimitry Andric /// 47180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47190b57cec5SDimitry Andric /// 47200b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47210b57cec5SDimitry Andric /// 47220b57cec5SDimitry Andric /// \param __a 47230b57cec5SDimitry Andric /// A 128-bit integer vector. 47240b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 47250b57cec5SDimitry Andric /// bitwise pattern as the parameter. 472681ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { 47270b57cec5SDimitry Andric return (__m128)__a; 47280b57cec5SDimitry Andric } 47290b57cec5SDimitry Andric 47300b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 47310b57cec5SDimitry Andric /// of [2 x double]. 47320b57cec5SDimitry Andric /// 47330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47340b57cec5SDimitry Andric /// 47350b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47360b57cec5SDimitry Andric /// 47370b57cec5SDimitry Andric /// \param __a 47380b57cec5SDimitry Andric /// A 128-bit integer vector. 47390b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 47400b57cec5SDimitry Andric /// bitwise pattern as the parameter. 474181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { 47420b57cec5SDimitry Andric return (__m128d)__a; 47430b57cec5SDimitry Andric } 47440b57cec5SDimitry Andric 47450b57cec5SDimitry Andric #if defined(__cplusplus) 47460b57cec5SDimitry Andric extern "C" { 47470b57cec5SDimitry Andric #endif 47480b57cec5SDimitry Andric 47490b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of 47500b57cec5SDimitry Andric /// optimizing power consumption during the loop. 47510b57cec5SDimitry Andric /// 47520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47530b57cec5SDimitry Andric /// 47540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction. 47550b57cec5SDimitry Andric /// 47560b57cec5SDimitry Andric void _mm_pause(void); 47570b57cec5SDimitry Andric 47580b57cec5SDimitry Andric #if defined(__cplusplus) 47590b57cec5SDimitry Andric } // extern "C" 47600b57cec5SDimitry Andric #endif 47610b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 47620b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 47630b57cec5SDimitry Andric 47640b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 47650b57cec5SDimitry Andric 47665ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_ON (0x0040U) 47675ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_OFF (0x0000U) 47680b57cec5SDimitry Andric 47695ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040U) 47700b57cec5SDimitry Andric 47710b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 477281ad6265SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) \ 477381ad6265SDimitry Andric (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 47740b57cec5SDimitry Andric 47750b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */ 4776