10b57cec5SDimitry Andric /*===---- emmintrin.h - Implementation of SSE2 intrinsics on PowerPC -------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler 110b57cec5SDimitry Andric User Guide and Reference, version 9.0. */ 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS 140b57cec5SDimitry Andric /* This header file is to help porting code using Intel intrinsics 150b57cec5SDimitry Andric explicitly from x86_64 to powerpc64/powerpc64le. 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric Since X86 SSE2 intrinsics mainly handles __m128i and __m128d type, 180b57cec5SDimitry Andric PowerPC VMX/VSX ISA is a good match for vector float SIMD operations. 190b57cec5SDimitry Andric However scalar float operations in vector (XMM) registers require 200b57cec5SDimitry Andric the POWER8 VSX ISA (2.07) level. There are differences for data 210b57cec5SDimitry Andric format and placement of float scalars in the vector register, which 220b57cec5SDimitry Andric require extra steps to match SSE2 scalar float semantics on POWER. 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric It should be noted that there's much difference between X86_64's 250b57cec5SDimitry Andric MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use 260b57cec5SDimitry Andric portable <fenv.h> instead of access MXSCR directly. 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric Most SSE2 scalar float intrinsic operations can be performed more 290b57cec5SDimitry Andric efficiently as C language float scalar operations or optimized to 300b57cec5SDimitry Andric use vector SIMD operations. We recommend this for new applications. 310b57cec5SDimitry Andric */ 3281ad6265SDimitry Andric #error \ 3381ad6265SDimitry Andric "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." 340b57cec5SDimitry Andric #endif 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric #ifndef EMMINTRIN_H_ 370b57cec5SDimitry Andric #define EMMINTRIN_H_ 380b57cec5SDimitry Andric 39bdd1243dSDimitry Andric #if defined(__powerpc64__) && \ 40fcaf7f86SDimitry Andric (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) 41a7dea167SDimitry Andric 420b57cec5SDimitry Andric #include <altivec.h> 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric /* We need definitions from the SSE header files. */ 450b57cec5SDimitry Andric #include <xmmintrin.h> 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric /* SSE2 */ 480b57cec5SDimitry Andric typedef __vector double __v2df; 49*06c3fb27SDimitry Andric typedef __vector float __v4f; 500b57cec5SDimitry Andric typedef __vector long long __v2di; 510b57cec5SDimitry Andric typedef __vector unsigned long long __v2du; 520b57cec5SDimitry Andric typedef __vector int __v4si; 530b57cec5SDimitry Andric typedef __vector unsigned int __v4su; 540b57cec5SDimitry Andric typedef __vector short __v8hi; 550b57cec5SDimitry Andric typedef __vector unsigned short __v8hu; 560b57cec5SDimitry Andric typedef __vector signed char __v16qi; 570b57cec5SDimitry Andric typedef __vector unsigned char __v16qu; 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric /* The Intel API is flexible enough that we must allow aliasing with other 600b57cec5SDimitry Andric vector types, and their scalar components. */ 610b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); 620b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric /* Unaligned version of the same types. */ 6581ad6265SDimitry Andric typedef long long __m128i_u 6681ad6265SDimitry Andric __attribute__((__vector_size__(16), __may_alias__, __aligned__(1))); 6781ad6265SDimitry Andric typedef double __m128d_u 6881ad6265SDimitry Andric __attribute__((__vector_size__(16), __may_alias__, __aligned__(1))); 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric /* Define two value permute mask. */ 710b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric /* Create a vector with element 0 as F and the rest zero. */ 7481ad6265SDimitry Andric extern __inline __m128d 7581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_sd(double __F)7681ad6265SDimitry Andric _mm_set_sd(double __F) { 770b57cec5SDimitry Andric return __extension__(__m128d){__F, 0.0}; 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /* Create a vector with both elements equal to F. */ 8181ad6265SDimitry Andric extern __inline __m128d 8281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pd(double __F)8381ad6265SDimitry Andric _mm_set1_pd(double __F) { 840b57cec5SDimitry Andric return __extension__(__m128d){__F, __F}; 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 8781ad6265SDimitry Andric extern __inline __m128d 8881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pd1(double __F)8981ad6265SDimitry Andric _mm_set_pd1(double __F) { 900b57cec5SDimitry Andric return _mm_set1_pd(__F); 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric /* Create a vector with the lower value X and upper value W. */ 9481ad6265SDimitry Andric extern __inline __m128d 9581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pd(double __W,double __X)9681ad6265SDimitry Andric _mm_set_pd(double __W, double __X) { 970b57cec5SDimitry Andric return __extension__(__m128d){__X, __W}; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric /* Create a vector with the lower value W and upper value X. */ 10181ad6265SDimitry Andric extern __inline __m128d 10281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pd(double __W,double __X)10381ad6265SDimitry Andric _mm_setr_pd(double __W, double __X) { 1040b57cec5SDimitry Andric return __extension__(__m128d){__W, __X}; 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric /* Create an undefined vector. */ 10881ad6265SDimitry Andric extern __inline __m128d 10981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_undefined_pd(void)11081ad6265SDimitry Andric _mm_undefined_pd(void) { 1110b57cec5SDimitry Andric __m128d __Y = __Y; 1120b57cec5SDimitry Andric return __Y; 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric /* Create a vector of zeros. */ 11681ad6265SDimitry Andric extern __inline __m128d 11781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_pd(void)11881ad6265SDimitry Andric _mm_setzero_pd(void) { 1190b57cec5SDimitry Andric return (__m128d)vec_splats(0); 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric /* Sets the low DPFP value of A from the low value of B. */ 12381ad6265SDimitry Andric extern __inline __m128d 12481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_sd(__m128d __A,__m128d __B)12581ad6265SDimitry Andric _mm_move_sd(__m128d __A, __m128d __B) { 12681ad6265SDimitry Andric __v2df __result = (__v2df)__A; 12781ad6265SDimitry Andric __result[0] = ((__v2df)__B)[0]; 12881ad6265SDimitry Andric return (__m128d)__result; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric /* Load two DPFP values from P. The address must be 16-byte aligned. */ 13281ad6265SDimitry Andric extern __inline __m128d 13381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_pd(double const * __P)13481ad6265SDimitry Andric _mm_load_pd(double const *__P) { 1350b57cec5SDimitry Andric return ((__m128d)vec_ld(0, (__v16qu *)__P)); 1360b57cec5SDimitry Andric } 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric /* Load two DPFP values from P. The address need not be 16-byte aligned. */ 13981ad6265SDimitry Andric extern __inline __m128d 14081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_pd(double const * __P)14181ad6265SDimitry Andric _mm_loadu_pd(double const *__P) { 1420b57cec5SDimitry Andric return (vec_vsx_ld(0, __P)); 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric /* Create a vector with all two elements equal to *P. */ 14681ad6265SDimitry Andric extern __inline __m128d 14781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load1_pd(double const * __P)14881ad6265SDimitry Andric _mm_load1_pd(double const *__P) { 1490b57cec5SDimitry Andric return (vec_splats(*__P)); 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric /* Create a vector with element 0 as *P and the rest zero. */ 15381ad6265SDimitry Andric extern __inline __m128d 15481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_sd(double const * __P)15581ad6265SDimitry Andric _mm_load_sd(double const *__P) { 1560b57cec5SDimitry Andric return _mm_set_sd(*__P); 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 15981ad6265SDimitry Andric extern __inline __m128d 16081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_pd1(double const * __P)16181ad6265SDimitry Andric _mm_load_pd1(double const *__P) { 1620b57cec5SDimitry Andric return _mm_load1_pd(__P); 1630b57cec5SDimitry Andric } 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric /* Load two DPFP values in reverse order. The address must be aligned. */ 16681ad6265SDimitry Andric extern __inline __m128d 16781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadr_pd(double const * __P)16881ad6265SDimitry Andric _mm_loadr_pd(double const *__P) { 1690b57cec5SDimitry Andric __v2df __tmp = _mm_load_pd(__P); 1700b57cec5SDimitry Andric return (__m128d)vec_xxpermdi(__tmp, __tmp, 2); 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric /* Store two DPFP values. The address must be 16-byte aligned. */ 17481ad6265SDimitry Andric extern __inline void 17581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_pd(double * __P,__m128d __A)17681ad6265SDimitry Andric _mm_store_pd(double *__P, __m128d __A) { 1770b57cec5SDimitry Andric vec_st((__v16qu)__A, 0, (__v16qu *)__P); 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric /* Store two DPFP values. The address need not be 16-byte aligned. */ 18181ad6265SDimitry Andric extern __inline void 18281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_pd(double * __P,__m128d __A)18381ad6265SDimitry Andric _mm_storeu_pd(double *__P, __m128d __A) { 1840b57cec5SDimitry Andric *(__m128d_u *)__P = __A; 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /* Stores the lower DPFP value. */ 18881ad6265SDimitry Andric extern __inline void 18981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_sd(double * __P,__m128d __A)19081ad6265SDimitry Andric _mm_store_sd(double *__P, __m128d __A) { 1910b57cec5SDimitry Andric *__P = ((__v2df)__A)[0]; 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 19481ad6265SDimitry Andric extern __inline double 19581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_f64(__m128d __A)19681ad6265SDimitry Andric _mm_cvtsd_f64(__m128d __A) { 1970b57cec5SDimitry Andric return ((__v2df)__A)[0]; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 20081ad6265SDimitry Andric extern __inline void 20181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pd(double * __P,__m128d __A)20281ad6265SDimitry Andric _mm_storel_pd(double *__P, __m128d __A) { 2030b57cec5SDimitry Andric _mm_store_sd(__P, __A); 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric /* Stores the upper DPFP value. */ 20781ad6265SDimitry Andric extern __inline void 20881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pd(double * __P,__m128d __A)20981ad6265SDimitry Andric _mm_storeh_pd(double *__P, __m128d __A) { 2100b57cec5SDimitry Andric *__P = ((__v2df)__A)[1]; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric /* Store the lower DPFP value across two words. 2130b57cec5SDimitry Andric The address must be 16-byte aligned. */ 21481ad6265SDimitry Andric extern __inline void 21581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store1_pd(double * __P,__m128d __A)21681ad6265SDimitry Andric _mm_store1_pd(double *__P, __m128d __A) { 2170b57cec5SDimitry Andric _mm_store_pd(__P, vec_splat(__A, 0)); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 22081ad6265SDimitry Andric extern __inline void 22181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_pd1(double * __P,__m128d __A)22281ad6265SDimitry Andric _mm_store_pd1(double *__P, __m128d __A) { 2230b57cec5SDimitry Andric _mm_store1_pd(__P, __A); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric /* Store two DPFP values in reverse order. The address must be aligned. */ 22781ad6265SDimitry Andric extern __inline void 22881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storer_pd(double * __P,__m128d __A)22981ad6265SDimitry Andric _mm_storer_pd(double *__P, __m128d __A) { 2300b57cec5SDimitry Andric _mm_store_pd(__P, vec_xxpermdi(__A, __A, 2)); 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric /* Intel intrinsic. */ 23481ad6265SDimitry Andric extern __inline long long 23581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64(__m128i __A)23681ad6265SDimitry Andric _mm_cvtsi128_si64(__m128i __A) { 2370b57cec5SDimitry Andric return ((__v2di)__A)[0]; 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric /* Microsoft intrinsic. */ 24181ad6265SDimitry Andric extern __inline long long 24281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64x(__m128i __A)24381ad6265SDimitry Andric _mm_cvtsi128_si64x(__m128i __A) { 2440b57cec5SDimitry Andric return ((__v2di)__A)[0]; 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric 24781ad6265SDimitry Andric extern __inline __m128d 24881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pd(__m128d __A,__m128d __B)24981ad6265SDimitry Andric _mm_add_pd(__m128d __A, __m128d __B) { 2500b57cec5SDimitry Andric return (__m128d)((__v2df)__A + (__v2df)__B); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric /* Add the lower double-precision (64-bit) floating-point element in 2540b57cec5SDimitry Andric a and b, store the result in the lower element of dst, and copy 2550b57cec5SDimitry Andric the upper element from a to the upper element of dst. */ 25681ad6265SDimitry Andric extern __inline __m128d 25781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_sd(__m128d __A,__m128d __B)25881ad6265SDimitry Andric _mm_add_sd(__m128d __A, __m128d __B) { 2590b57cec5SDimitry Andric __A[0] = __A[0] + __B[0]; 2600b57cec5SDimitry Andric return (__A); 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 26381ad6265SDimitry Andric extern __inline __m128d 26481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pd(__m128d __A,__m128d __B)26581ad6265SDimitry Andric _mm_sub_pd(__m128d __A, __m128d __B) { 2660b57cec5SDimitry Andric return (__m128d)((__v2df)__A - (__v2df)__B); 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 26981ad6265SDimitry Andric extern __inline __m128d 27081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_sd(__m128d __A,__m128d __B)27181ad6265SDimitry Andric _mm_sub_sd(__m128d __A, __m128d __B) { 2720b57cec5SDimitry Andric __A[0] = __A[0] - __B[0]; 2730b57cec5SDimitry Andric return (__A); 2740b57cec5SDimitry Andric } 2750b57cec5SDimitry Andric 27681ad6265SDimitry Andric extern __inline __m128d 27781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_pd(__m128d __A,__m128d __B)27881ad6265SDimitry Andric _mm_mul_pd(__m128d __A, __m128d __B) { 2790b57cec5SDimitry Andric return (__m128d)((__v2df)__A * (__v2df)__B); 2800b57cec5SDimitry Andric } 2810b57cec5SDimitry Andric 28281ad6265SDimitry Andric extern __inline __m128d 28381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_sd(__m128d __A,__m128d __B)28481ad6265SDimitry Andric _mm_mul_sd(__m128d __A, __m128d __B) { 2850b57cec5SDimitry Andric __A[0] = __A[0] * __B[0]; 2860b57cec5SDimitry Andric return (__A); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 28981ad6265SDimitry Andric extern __inline __m128d 29081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_pd(__m128d __A,__m128d __B)29181ad6265SDimitry Andric _mm_div_pd(__m128d __A, __m128d __B) { 2920b57cec5SDimitry Andric return (__m128d)((__v2df)__A / (__v2df)__B); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric 29581ad6265SDimitry Andric extern __inline __m128d 29681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_sd(__m128d __A,__m128d __B)29781ad6265SDimitry Andric _mm_div_sd(__m128d __A, __m128d __B) { 2980b57cec5SDimitry Andric __A[0] = __A[0] / __B[0]; 2990b57cec5SDimitry Andric return (__A); 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 30281ad6265SDimitry Andric extern __inline __m128d 30381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_pd(__m128d __A)30481ad6265SDimitry Andric _mm_sqrt_pd(__m128d __A) { 3050b57cec5SDimitry Andric return (vec_sqrt(__A)); 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric /* Return pair {sqrt (B[0]), A[1]}. */ 30981ad6265SDimitry Andric extern __inline __m128d 31081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_sd(__m128d __A,__m128d __B)31181ad6265SDimitry Andric _mm_sqrt_sd(__m128d __A, __m128d __B) { 31281ad6265SDimitry Andric __v2df __c; 31381ad6265SDimitry Andric __c = vec_sqrt((__v2df)_mm_set1_pd(__B[0])); 31481ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 31781ad6265SDimitry Andric extern __inline __m128d 31881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pd(__m128d __A,__m128d __B)31981ad6265SDimitry Andric _mm_min_pd(__m128d __A, __m128d __B) { 3200b57cec5SDimitry Andric return (vec_min(__A, __B)); 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric 32381ad6265SDimitry Andric extern __inline __m128d 32481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_sd(__m128d __A,__m128d __B)32581ad6265SDimitry Andric _mm_min_sd(__m128d __A, __m128d __B) { 32681ad6265SDimitry Andric __v2df __a, __b, __c; 32781ad6265SDimitry Andric __a = vec_splats(__A[0]); 32881ad6265SDimitry Andric __b = vec_splats(__B[0]); 32981ad6265SDimitry Andric __c = vec_min(__a, __b); 33081ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric 33381ad6265SDimitry Andric extern __inline __m128d 33481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pd(__m128d __A,__m128d __B)33581ad6265SDimitry Andric _mm_max_pd(__m128d __A, __m128d __B) { 3360b57cec5SDimitry Andric return (vec_max(__A, __B)); 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric 33981ad6265SDimitry Andric extern __inline __m128d 34081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_sd(__m128d __A,__m128d __B)34181ad6265SDimitry Andric _mm_max_sd(__m128d __A, __m128d __B) { 34281ad6265SDimitry Andric __v2df __a, __b, __c; 34381ad6265SDimitry Andric __a = vec_splats(__A[0]); 34481ad6265SDimitry Andric __b = vec_splats(__B[0]); 34581ad6265SDimitry Andric __c = vec_max(__a, __b); 34681ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 34981ad6265SDimitry Andric extern __inline __m128d 35081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pd(__m128d __A,__m128d __B)35181ad6265SDimitry Andric _mm_cmpeq_pd(__m128d __A, __m128d __B) { 3520b57cec5SDimitry Andric return ((__m128d)vec_cmpeq((__v2df)__A, (__v2df)__B)); 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 35581ad6265SDimitry Andric extern __inline __m128d 35681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_pd(__m128d __A,__m128d __B)35781ad6265SDimitry Andric _mm_cmplt_pd(__m128d __A, __m128d __B) { 3580b57cec5SDimitry Andric return ((__m128d)vec_cmplt((__v2df)__A, (__v2df)__B)); 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric 36181ad6265SDimitry Andric extern __inline __m128d 36281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_pd(__m128d __A,__m128d __B)36381ad6265SDimitry Andric _mm_cmple_pd(__m128d __A, __m128d __B) { 3640b57cec5SDimitry Andric return ((__m128d)vec_cmple((__v2df)__A, (__v2df)__B)); 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 36781ad6265SDimitry Andric extern __inline __m128d 36881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pd(__m128d __A,__m128d __B)36981ad6265SDimitry Andric _mm_cmpgt_pd(__m128d __A, __m128d __B) { 3700b57cec5SDimitry Andric return ((__m128d)vec_cmpgt((__v2df)__A, (__v2df)__B)); 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric 37381ad6265SDimitry Andric extern __inline __m128d 37481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_pd(__m128d __A,__m128d __B)37581ad6265SDimitry Andric _mm_cmpge_pd(__m128d __A, __m128d __B) { 3760b57cec5SDimitry Andric return ((__m128d)vec_cmpge((__v2df)__A, (__v2df)__B)); 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 37981ad6265SDimitry Andric extern __inline __m128d 38081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_pd(__m128d __A,__m128d __B)38181ad6265SDimitry Andric _mm_cmpneq_pd(__m128d __A, __m128d __B) { 38281ad6265SDimitry Andric __v2df __temp = (__v2df)vec_cmpeq((__v2df)__A, (__v2df)__B); 38381ad6265SDimitry Andric return ((__m128d)vec_nor(__temp, __temp)); 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric 38681ad6265SDimitry Andric extern __inline __m128d 38781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_pd(__m128d __A,__m128d __B)38881ad6265SDimitry Andric _mm_cmpnlt_pd(__m128d __A, __m128d __B) { 3890b57cec5SDimitry Andric return ((__m128d)vec_cmpge((__v2df)__A, (__v2df)__B)); 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 39281ad6265SDimitry Andric extern __inline __m128d 39381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_pd(__m128d __A,__m128d __B)39481ad6265SDimitry Andric _mm_cmpnle_pd(__m128d __A, __m128d __B) { 3950b57cec5SDimitry Andric return ((__m128d)vec_cmpgt((__v2df)__A, (__v2df)__B)); 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric 39881ad6265SDimitry Andric extern __inline __m128d 39981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_pd(__m128d __A,__m128d __B)40081ad6265SDimitry Andric _mm_cmpngt_pd(__m128d __A, __m128d __B) { 4010b57cec5SDimitry Andric return ((__m128d)vec_cmple((__v2df)__A, (__v2df)__B)); 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric 40481ad6265SDimitry Andric extern __inline __m128d 40581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_pd(__m128d __A,__m128d __B)40681ad6265SDimitry Andric _mm_cmpnge_pd(__m128d __A, __m128d __B) { 4070b57cec5SDimitry Andric return ((__m128d)vec_cmplt((__v2df)__A, (__v2df)__B)); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 41081ad6265SDimitry Andric extern __inline __m128d 41181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_pd(__m128d __A,__m128d __B)41281ad6265SDimitry Andric _mm_cmpord_pd(__m128d __A, __m128d __B) { 41381ad6265SDimitry Andric __v2du __c, __d; 4140b57cec5SDimitry Andric /* Compare against self will return false (0's) if NAN. */ 41581ad6265SDimitry Andric __c = (__v2du)vec_cmpeq(__A, __A); 41681ad6265SDimitry Andric __d = (__v2du)vec_cmpeq(__B, __B); 4170b57cec5SDimitry Andric /* A != NAN and B != NAN. */ 41881ad6265SDimitry Andric return ((__m128d)vec_and(__c, __d)); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric 42181ad6265SDimitry Andric extern __inline __m128d 42281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_pd(__m128d __A,__m128d __B)42381ad6265SDimitry Andric _mm_cmpunord_pd(__m128d __A, __m128d __B) { 4240b57cec5SDimitry Andric #if _ARCH_PWR8 42581ad6265SDimitry Andric __v2du __c, __d; 4260b57cec5SDimitry Andric /* Compare against self will return false (0's) if NAN. */ 42781ad6265SDimitry Andric __c = (__v2du)vec_cmpeq((__v2df)__A, (__v2df)__A); 42881ad6265SDimitry Andric __d = (__v2du)vec_cmpeq((__v2df)__B, (__v2df)__B); 4290b57cec5SDimitry Andric /* A == NAN OR B == NAN converts too: 4300b57cec5SDimitry Andric NOT(A != NAN) OR NOT(B != NAN). */ 43181ad6265SDimitry Andric __c = vec_nor(__c, __c); 43281ad6265SDimitry Andric return ((__m128d)vec_orc(__c, __d)); 4330b57cec5SDimitry Andric #else 43481ad6265SDimitry Andric __v2du __c, __d; 4350b57cec5SDimitry Andric /* Compare against self will return false (0's) if NAN. */ 43681ad6265SDimitry Andric __c = (__v2du)vec_cmpeq((__v2df)__A, (__v2df)__A); 43781ad6265SDimitry Andric __d = (__v2du)vec_cmpeq((__v2df)__B, (__v2df)__B); 4380b57cec5SDimitry Andric /* Convert the true ('1's) is NAN. */ 43981ad6265SDimitry Andric __c = vec_nor(__c, __c); 44081ad6265SDimitry Andric __d = vec_nor(__d, __d); 44181ad6265SDimitry Andric return ((__m128d)vec_or(__c, __d)); 4420b57cec5SDimitry Andric #endif 4430b57cec5SDimitry Andric } 4440b57cec5SDimitry Andric 44581ad6265SDimitry Andric extern __inline __m128d 44681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_sd(__m128d __A,__m128d __B)44781ad6265SDimitry Andric _mm_cmpeq_sd(__m128d __A, __m128d __B) { 44881ad6265SDimitry Andric __v2df __a, __b, __c; 4490b57cec5SDimitry Andric /* PowerISA VSX does not allow partial (for just lower double) 4500b57cec5SDimitry Andric results. So to insure we don't generate spurious exceptions 4510b57cec5SDimitry Andric (from the upper double values) we splat the lower double 4520b57cec5SDimitry Andric before we do the operation. */ 45381ad6265SDimitry Andric __a = vec_splats(__A[0]); 45481ad6265SDimitry Andric __b = vec_splats(__B[0]); 45581ad6265SDimitry Andric __c = (__v2df)vec_cmpeq(__a, __b); 4560b57cec5SDimitry Andric /* Then we merge the lower double result with the original upper 4570b57cec5SDimitry Andric double from __A. */ 45881ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 46181ad6265SDimitry Andric extern __inline __m128d 46281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_sd(__m128d __A,__m128d __B)46381ad6265SDimitry Andric _mm_cmplt_sd(__m128d __A, __m128d __B) { 46481ad6265SDimitry Andric __v2df __a, __b, __c; 46581ad6265SDimitry Andric __a = vec_splats(__A[0]); 46681ad6265SDimitry Andric __b = vec_splats(__B[0]); 46781ad6265SDimitry Andric __c = (__v2df)vec_cmplt(__a, __b); 46881ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric 47181ad6265SDimitry Andric extern __inline __m128d 47281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_sd(__m128d __A,__m128d __B)47381ad6265SDimitry Andric _mm_cmple_sd(__m128d __A, __m128d __B) { 47481ad6265SDimitry Andric __v2df __a, __b, __c; 47581ad6265SDimitry Andric __a = vec_splats(__A[0]); 47681ad6265SDimitry Andric __b = vec_splats(__B[0]); 47781ad6265SDimitry Andric __c = (__v2df)vec_cmple(__a, __b); 47881ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric 48181ad6265SDimitry Andric extern __inline __m128d 48281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_sd(__m128d __A,__m128d __B)48381ad6265SDimitry Andric _mm_cmpgt_sd(__m128d __A, __m128d __B) { 48481ad6265SDimitry Andric __v2df __a, __b, __c; 48581ad6265SDimitry Andric __a = vec_splats(__A[0]); 48681ad6265SDimitry Andric __b = vec_splats(__B[0]); 48781ad6265SDimitry Andric __c = (__v2df)vec_cmpgt(__a, __b); 48881ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 49181ad6265SDimitry Andric extern __inline __m128d 49281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_sd(__m128d __A,__m128d __B)49381ad6265SDimitry Andric _mm_cmpge_sd(__m128d __A, __m128d __B) { 49481ad6265SDimitry Andric __v2df __a, __b, __c; 49581ad6265SDimitry Andric __a = vec_splats(__A[0]); 49681ad6265SDimitry Andric __b = vec_splats(__B[0]); 49781ad6265SDimitry Andric __c = (__v2df)vec_cmpge(__a, __b); 49881ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric 50181ad6265SDimitry Andric extern __inline __m128d 50281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_sd(__m128d __A,__m128d __B)50381ad6265SDimitry Andric _mm_cmpneq_sd(__m128d __A, __m128d __B) { 50481ad6265SDimitry Andric __v2df __a, __b, __c; 50581ad6265SDimitry Andric __a = vec_splats(__A[0]); 50681ad6265SDimitry Andric __b = vec_splats(__B[0]); 50781ad6265SDimitry Andric __c = (__v2df)vec_cmpeq(__a, __b); 50881ad6265SDimitry Andric __c = vec_nor(__c, __c); 50981ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 51281ad6265SDimitry Andric extern __inline __m128d 51381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_sd(__m128d __A,__m128d __B)51481ad6265SDimitry Andric _mm_cmpnlt_sd(__m128d __A, __m128d __B) { 51581ad6265SDimitry Andric __v2df __a, __b, __c; 51681ad6265SDimitry Andric __a = vec_splats(__A[0]); 51781ad6265SDimitry Andric __b = vec_splats(__B[0]); 5180b57cec5SDimitry Andric /* Not less than is just greater than or equal. */ 51981ad6265SDimitry Andric __c = (__v2df)vec_cmpge(__a, __b); 52081ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 52381ad6265SDimitry Andric extern __inline __m128d 52481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_sd(__m128d __A,__m128d __B)52581ad6265SDimitry Andric _mm_cmpnle_sd(__m128d __A, __m128d __B) { 52681ad6265SDimitry Andric __v2df __a, __b, __c; 52781ad6265SDimitry Andric __a = vec_splats(__A[0]); 52881ad6265SDimitry Andric __b = vec_splats(__B[0]); 5290b57cec5SDimitry Andric /* Not less than or equal is just greater than. */ 53081ad6265SDimitry Andric __c = (__v2df)vec_cmpge(__a, __b); 53181ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 5320b57cec5SDimitry Andric } 5330b57cec5SDimitry Andric 53481ad6265SDimitry Andric extern __inline __m128d 53581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_sd(__m128d __A,__m128d __B)53681ad6265SDimitry Andric _mm_cmpngt_sd(__m128d __A, __m128d __B) { 53781ad6265SDimitry Andric __v2df __a, __b, __c; 53881ad6265SDimitry Andric __a = vec_splats(__A[0]); 53981ad6265SDimitry Andric __b = vec_splats(__B[0]); 5400b57cec5SDimitry Andric /* Not greater than is just less than or equal. */ 54181ad6265SDimitry Andric __c = (__v2df)vec_cmple(__a, __b); 54281ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 54581ad6265SDimitry Andric extern __inline __m128d 54681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_sd(__m128d __A,__m128d __B)54781ad6265SDimitry Andric _mm_cmpnge_sd(__m128d __A, __m128d __B) { 54881ad6265SDimitry Andric __v2df __a, __b, __c; 54981ad6265SDimitry Andric __a = vec_splats(__A[0]); 55081ad6265SDimitry Andric __b = vec_splats(__B[0]); 5510b57cec5SDimitry Andric /* Not greater than or equal is just less than. */ 55281ad6265SDimitry Andric __c = (__v2df)vec_cmplt(__a, __b); 55381ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__c[0], __A[1]); 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 55681ad6265SDimitry Andric extern __inline __m128d 55781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_sd(__m128d __A,__m128d __B)55881ad6265SDimitry Andric _mm_cmpord_sd(__m128d __A, __m128d __B) { 55981ad6265SDimitry Andric __v2df __r; 56081ad6265SDimitry Andric __r = (__v2df)_mm_cmpord_pd(vec_splats(__A[0]), vec_splats(__B[0])); 56181ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__r[0], ((__v2df)__A)[1]); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric 56481ad6265SDimitry Andric extern __inline __m128d 56581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_sd(__m128d __A,__m128d __B)56681ad6265SDimitry Andric _mm_cmpunord_sd(__m128d __A, __m128d __B) { 56781ad6265SDimitry Andric __v2df __r; 56881ad6265SDimitry Andric __r = _mm_cmpunord_pd(vec_splats(__A[0]), vec_splats(__B[0])); 56981ad6265SDimitry Andric return (__m128d)_mm_setr_pd(__r[0], __A[1]); 5700b57cec5SDimitry Andric } 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric /* FIXME 5730b57cec5SDimitry Andric The __mm_comi??_sd and __mm_ucomi??_sd implementations below are 5740b57cec5SDimitry Andric exactly the same because GCC for PowerPC only generates unordered 5750b57cec5SDimitry Andric compares (scalar and vector). 5760b57cec5SDimitry Andric Technically __mm_comieq_sp et all should be using the ordered 5770b57cec5SDimitry Andric compare and signal for QNaNs. The __mm_ucomieq_sd et all should 5780b57cec5SDimitry Andric be OK. */ 57981ad6265SDimitry Andric extern __inline int 58081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comieq_sd(__m128d __A,__m128d __B)58181ad6265SDimitry Andric _mm_comieq_sd(__m128d __A, __m128d __B) { 5820b57cec5SDimitry Andric return (__A[0] == __B[0]); 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric 58581ad6265SDimitry Andric extern __inline int 58681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comilt_sd(__m128d __A,__m128d __B)58781ad6265SDimitry Andric _mm_comilt_sd(__m128d __A, __m128d __B) { 5880b57cec5SDimitry Andric return (__A[0] < __B[0]); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric 59181ad6265SDimitry Andric extern __inline int 59281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comile_sd(__m128d __A,__m128d __B)59381ad6265SDimitry Andric _mm_comile_sd(__m128d __A, __m128d __B) { 5940b57cec5SDimitry Andric return (__A[0] <= __B[0]); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric 59781ad6265SDimitry Andric extern __inline int 59881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comigt_sd(__m128d __A,__m128d __B)59981ad6265SDimitry Andric _mm_comigt_sd(__m128d __A, __m128d __B) { 6000b57cec5SDimitry Andric return (__A[0] > __B[0]); 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 60381ad6265SDimitry Andric extern __inline int 60481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comige_sd(__m128d __A,__m128d __B)60581ad6265SDimitry Andric _mm_comige_sd(__m128d __A, __m128d __B) { 6060b57cec5SDimitry Andric return (__A[0] >= __B[0]); 6070b57cec5SDimitry Andric } 6080b57cec5SDimitry Andric 60981ad6265SDimitry Andric extern __inline int 61081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comineq_sd(__m128d __A,__m128d __B)61181ad6265SDimitry Andric _mm_comineq_sd(__m128d __A, __m128d __B) { 6120b57cec5SDimitry Andric return (__A[0] != __B[0]); 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric 61581ad6265SDimitry Andric extern __inline int 61681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomieq_sd(__m128d __A,__m128d __B)61781ad6265SDimitry Andric _mm_ucomieq_sd(__m128d __A, __m128d __B) { 6180b57cec5SDimitry Andric return (__A[0] == __B[0]); 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric 62181ad6265SDimitry Andric extern __inline int 62281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomilt_sd(__m128d __A,__m128d __B)62381ad6265SDimitry Andric _mm_ucomilt_sd(__m128d __A, __m128d __B) { 6240b57cec5SDimitry Andric return (__A[0] < __B[0]); 6250b57cec5SDimitry Andric } 6260b57cec5SDimitry Andric 62781ad6265SDimitry Andric extern __inline int 62881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomile_sd(__m128d __A,__m128d __B)62981ad6265SDimitry Andric _mm_ucomile_sd(__m128d __A, __m128d __B) { 6300b57cec5SDimitry Andric return (__A[0] <= __B[0]); 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 63381ad6265SDimitry Andric extern __inline int 63481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomigt_sd(__m128d __A,__m128d __B)63581ad6265SDimitry Andric _mm_ucomigt_sd(__m128d __A, __m128d __B) { 6360b57cec5SDimitry Andric return (__A[0] > __B[0]); 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 63981ad6265SDimitry Andric extern __inline int 64081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomige_sd(__m128d __A,__m128d __B)64181ad6265SDimitry Andric _mm_ucomige_sd(__m128d __A, __m128d __B) { 6420b57cec5SDimitry Andric return (__A[0] >= __B[0]); 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 64581ad6265SDimitry Andric extern __inline int 64681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomineq_sd(__m128d __A,__m128d __B)64781ad6265SDimitry Andric _mm_ucomineq_sd(__m128d __A, __m128d __B) { 6480b57cec5SDimitry Andric return (__A[0] != __B[0]); 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric /* Create a vector of Qi, where i is the element number. */ 65281ad6265SDimitry Andric extern __inline __m128i 65381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi64x(long long __q1,long long __q0)65481ad6265SDimitry Andric _mm_set_epi64x(long long __q1, long long __q0) { 6550b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__q0, __q1}; 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 65881ad6265SDimitry Andric extern __inline __m128i 65981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi64(__m64 __q1,__m64 __q0)66081ad6265SDimitry Andric _mm_set_epi64(__m64 __q1, __m64 __q0) { 6610b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 6620b57cec5SDimitry Andric } 6630b57cec5SDimitry Andric 66481ad6265SDimitry Andric extern __inline __m128i 66581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi32(int __q3,int __q2,int __q1,int __q0)66681ad6265SDimitry Andric _mm_set_epi32(int __q3, int __q2, int __q1, int __q0) { 6670b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__q0, __q1, __q2, __q3}; 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 67081ad6265SDimitry Andric extern __inline __m128i 67181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi16(short __q7,short __q6,short __q5,short __q4,short __q3,short __q2,short __q1,short __q0)67281ad6265SDimitry Andric _mm_set_epi16(short __q7, short __q6, short __q5, short __q4, short __q3, 67381ad6265SDimitry Andric short __q2, short __q1, short __q0) { 67481ad6265SDimitry Andric return __extension__(__m128i)(__v8hi){__q0, __q1, __q2, __q3, 67581ad6265SDimitry Andric __q4, __q5, __q6, __q7}; 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 67881ad6265SDimitry Andric extern __inline __m128i 67981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi8(char __q15,char __q14,char __q13,char __q12,char __q11,char __q10,char __q09,char __q08,char __q07,char __q06,char __q05,char __q04,char __q03,char __q02,char __q01,char __q00)68081ad6265SDimitry Andric _mm_set_epi8(char __q15, char __q14, char __q13, char __q12, char __q11, 68181ad6265SDimitry Andric char __q10, char __q09, char __q08, char __q07, char __q06, 68281ad6265SDimitry Andric char __q05, char __q04, char __q03, char __q02, char __q01, 68381ad6265SDimitry Andric char __q00) { 6840b57cec5SDimitry Andric return __extension__(__m128i)(__v16qi){ 6850b57cec5SDimitry Andric __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 68681ad6265SDimitry Andric __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15}; 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric /* Set all of the elements of the vector to A. */ 69081ad6265SDimitry Andric extern __inline __m128i 69181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi64x(long long __A)69281ad6265SDimitry Andric _mm_set1_epi64x(long long __A) { 6930b57cec5SDimitry Andric return _mm_set_epi64x(__A, __A); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 69681ad6265SDimitry Andric extern __inline __m128i 69781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi64(__m64 __A)69881ad6265SDimitry Andric _mm_set1_epi64(__m64 __A) { 6990b57cec5SDimitry Andric return _mm_set_epi64(__A, __A); 7000b57cec5SDimitry Andric } 7010b57cec5SDimitry Andric 70281ad6265SDimitry Andric extern __inline __m128i 70381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi32(int __A)70481ad6265SDimitry Andric _mm_set1_epi32(int __A) { 7050b57cec5SDimitry Andric return _mm_set_epi32(__A, __A, __A, __A); 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric 70881ad6265SDimitry Andric extern __inline __m128i 70981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi16(short __A)71081ad6265SDimitry Andric _mm_set1_epi16(short __A) { 7110b57cec5SDimitry Andric return _mm_set_epi16(__A, __A, __A, __A, __A, __A, __A, __A); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 71481ad6265SDimitry Andric extern __inline __m128i 71581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi8(char __A)71681ad6265SDimitry Andric _mm_set1_epi8(char __A) { 71781ad6265SDimitry Andric return _mm_set_epi8(__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, 71881ad6265SDimitry Andric __A, __A, __A, __A, __A); 7190b57cec5SDimitry Andric } 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric /* Create a vector of Qi, where i is the element number. 7220b57cec5SDimitry Andric The parameter order is reversed from the _mm_set_epi* functions. */ 72381ad6265SDimitry Andric extern __inline __m128i 72481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi64(__m64 __q0,__m64 __q1)72581ad6265SDimitry Andric _mm_setr_epi64(__m64 __q0, __m64 __q1) { 7260b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 72981ad6265SDimitry Andric extern __inline __m128i 73081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi32(int __q0,int __q1,int __q2,int __q3)73181ad6265SDimitry Andric _mm_setr_epi32(int __q0, int __q1, int __q2, int __q3) { 7320b57cec5SDimitry Andric return _mm_set_epi32(__q3, __q2, __q1, __q0); 7330b57cec5SDimitry Andric } 7340b57cec5SDimitry Andric 73581ad6265SDimitry Andric extern __inline __m128i 73681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi16(short __q0,short __q1,short __q2,short __q3,short __q4,short __q5,short __q6,short __q7)73781ad6265SDimitry Andric _mm_setr_epi16(short __q0, short __q1, short __q2, short __q3, short __q4, 73881ad6265SDimitry Andric short __q5, short __q6, short __q7) { 7390b57cec5SDimitry Andric return _mm_set_epi16(__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); 7400b57cec5SDimitry Andric } 7410b57cec5SDimitry Andric 74281ad6265SDimitry Andric extern __inline __m128i 74381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi8(char __q00,char __q01,char __q02,char __q03,char __q04,char __q05,char __q06,char __q07,char __q08,char __q09,char __q10,char __q11,char __q12,char __q13,char __q14,char __q15)74481ad6265SDimitry Andric _mm_setr_epi8(char __q00, char __q01, char __q02, char __q03, char __q04, 74581ad6265SDimitry Andric char __q05, char __q06, char __q07, char __q08, char __q09, 74681ad6265SDimitry Andric char __q10, char __q11, char __q12, char __q13, char __q14, 74781ad6265SDimitry Andric char __q15) { 7480b57cec5SDimitry Andric return _mm_set_epi8(__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, 7490b57cec5SDimitry Andric __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric /* Create a vector with element 0 as *P and the rest zero. */ 75381ad6265SDimitry Andric extern __inline __m128i 75481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_si128(__m128i const * __P)75581ad6265SDimitry Andric _mm_load_si128(__m128i const *__P) { 7560b57cec5SDimitry Andric return *__P; 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric 75981ad6265SDimitry Andric extern __inline __m128i 76081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si128(__m128i_u const * __P)76181ad6265SDimitry Andric _mm_loadu_si128(__m128i_u const *__P) { 7620b57cec5SDimitry Andric return (__m128i)(vec_vsx_ld(0, (signed int const *)__P)); 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric 76581ad6265SDimitry Andric extern __inline __m128i 76681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_epi64(__m128i_u const * __P)76781ad6265SDimitry Andric _mm_loadl_epi64(__m128i_u const *__P) { 7680b57cec5SDimitry Andric return _mm_set_epi64((__m64)0LL, *(__m64 *)__P); 7690b57cec5SDimitry Andric } 7700b57cec5SDimitry Andric 77181ad6265SDimitry Andric extern __inline void 77281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_si128(__m128i * __P,__m128i __B)77381ad6265SDimitry Andric _mm_store_si128(__m128i *__P, __m128i __B) { 7740b57cec5SDimitry Andric vec_st((__v16qu)__B, 0, (__v16qu *)__P); 7750b57cec5SDimitry Andric } 7760b57cec5SDimitry Andric 77781ad6265SDimitry Andric extern __inline void 77881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_si128(__m128i_u * __P,__m128i __B)77981ad6265SDimitry Andric _mm_storeu_si128(__m128i_u *__P, __m128i __B) { 7800b57cec5SDimitry Andric *__P = __B; 7810b57cec5SDimitry Andric } 7820b57cec5SDimitry Andric 78381ad6265SDimitry Andric extern __inline void 78481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_epi64(__m128i_u * __P,__m128i __B)78581ad6265SDimitry Andric _mm_storel_epi64(__m128i_u *__P, __m128i __B) { 7860b57cec5SDimitry Andric *(long long *)__P = ((__v2di)__B)[0]; 7870b57cec5SDimitry Andric } 7880b57cec5SDimitry Andric 78981ad6265SDimitry Andric extern __inline __m64 79081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movepi64_pi64(__m128i_u __B)79181ad6265SDimitry Andric _mm_movepi64_pi64(__m128i_u __B) { 7920b57cec5SDimitry Andric return (__m64)((__v2di)__B)[0]; 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 79581ad6265SDimitry Andric extern __inline __m128i 79681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movpi64_epi64(__m64 __A)79781ad6265SDimitry Andric _mm_movpi64_epi64(__m64 __A) { 7980b57cec5SDimitry Andric return _mm_set_epi64((__m64)0LL, __A); 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric 80181ad6265SDimitry Andric extern __inline __m128i 80281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_epi64(__m128i __A)80381ad6265SDimitry Andric _mm_move_epi64(__m128i __A) { 8040b57cec5SDimitry Andric return _mm_set_epi64((__m64)0LL, (__m64)__A[0]); 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric /* Create an undefined vector. */ 80881ad6265SDimitry Andric extern __inline __m128i 80981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_undefined_si128(void)81081ad6265SDimitry Andric _mm_undefined_si128(void) { 8110b57cec5SDimitry Andric __m128i __Y = __Y; 8120b57cec5SDimitry Andric return __Y; 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric /* Create a vector of zeros. */ 81681ad6265SDimitry Andric extern __inline __m128i 81781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_si128(void)81881ad6265SDimitry Andric _mm_setzero_si128(void) { 8190b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){0, 0, 0, 0}; 8200b57cec5SDimitry Andric } 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric #ifdef _ARCH_PWR8 82381ad6265SDimitry Andric extern __inline __m128d 82481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_pd(__m128i __A)82581ad6265SDimitry Andric _mm_cvtepi32_pd(__m128i __A) { 82681ad6265SDimitry Andric __v2di __val; 8270b57cec5SDimitry Andric /* For LE need to generate Vector Unpack Low Signed Word. 8280b57cec5SDimitry Andric Which is generated from unpackh. */ 82981ad6265SDimitry Andric __val = (__v2di)vec_unpackh((__v4si)__A); 8300b57cec5SDimitry Andric 83181ad6265SDimitry Andric return (__m128d)vec_ctf(__val, 0); 8320b57cec5SDimitry Andric } 8330b57cec5SDimitry Andric #endif 8340b57cec5SDimitry Andric 83581ad6265SDimitry Andric extern __inline __m128 83681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_ps(__m128i __A)83781ad6265SDimitry Andric _mm_cvtepi32_ps(__m128i __A) { 8380b57cec5SDimitry Andric return ((__m128)vec_ctf((__v4si)__A, 0)); 8390b57cec5SDimitry Andric } 8400b57cec5SDimitry Andric 84181ad6265SDimitry Andric extern __inline __m128i 84281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_epi32(__m128d __A)84381ad6265SDimitry Andric _mm_cvtpd_epi32(__m128d __A) { 84481ad6265SDimitry Andric __v2df __rounded = vec_rint(__A); 84581ad6265SDimitry Andric __v4si __result, __temp; 84681ad6265SDimitry Andric const __v4si __vzero = {0, 0, 0, 0}; 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric /* VSX Vector truncate Double-Precision to integer and Convert to 8490b57cec5SDimitry Andric Signed Integer Word format with Saturate. */ 85081ad6265SDimitry Andric __asm__("xvcvdpsxws %x0,%x1" : "=wa"(__temp) : "wa"(__rounded) :); 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andric #ifdef _ARCH_PWR8 85381ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 85481ad6265SDimitry Andric __temp = vec_mergeo(__temp, __temp); 85581ad6265SDimitry Andric #else 85681ad6265SDimitry Andric __temp = vec_mergee(__temp, __temp); 85781ad6265SDimitry Andric #endif 85881ad6265SDimitry Andric __result = (__v4si)vec_vpkudum((__vector long long)__temp, 85981ad6265SDimitry Andric (__vector long long)__vzero); 8600b57cec5SDimitry Andric #else 8610b57cec5SDimitry Andric { 86281ad6265SDimitry Andric const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 8630b57cec5SDimitry Andric 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f}; 86481ad6265SDimitry Andric __result = (__v4si)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm); 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric #endif 86781ad6265SDimitry Andric return (__m128i)__result; 8680b57cec5SDimitry Andric } 8690b57cec5SDimitry Andric 87081ad6265SDimitry Andric extern __inline __m64 87181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_pi32(__m128d __A)87281ad6265SDimitry Andric _mm_cvtpd_pi32(__m128d __A) { 87381ad6265SDimitry Andric __m128i __result = _mm_cvtpd_epi32(__A); 8740b57cec5SDimitry Andric 87581ad6265SDimitry Andric return (__m64)__result[0]; 8760b57cec5SDimitry Andric } 8770b57cec5SDimitry Andric 87881ad6265SDimitry Andric extern __inline __m128 87981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_ps(__m128d __A)88081ad6265SDimitry Andric _mm_cvtpd_ps(__m128d __A) { 88181ad6265SDimitry Andric __v4sf __result; 88281ad6265SDimitry Andric __v4si __temp; 88381ad6265SDimitry Andric const __v4si __vzero = {0, 0, 0, 0}; 8840b57cec5SDimitry Andric 88581ad6265SDimitry Andric __asm__("xvcvdpsp %x0,%x1" : "=wa"(__temp) : "wa"(__A) :); 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric #ifdef _ARCH_PWR8 88881ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 88981ad6265SDimitry Andric __temp = vec_mergeo(__temp, __temp); 89081ad6265SDimitry Andric #else 89181ad6265SDimitry Andric __temp = vec_mergee(__temp, __temp); 89281ad6265SDimitry Andric #endif 89381ad6265SDimitry Andric __result = (__v4sf)vec_vpkudum((__vector long long)__temp, 89481ad6265SDimitry Andric (__vector long long)__vzero); 8950b57cec5SDimitry Andric #else 8960b57cec5SDimitry Andric { 89781ad6265SDimitry Andric const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 8980b57cec5SDimitry Andric 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f}; 89981ad6265SDimitry Andric __result = (__v4sf)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm); 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric #endif 90281ad6265SDimitry Andric return ((__m128)__result); 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric 90581ad6265SDimitry Andric extern __inline __m128i 90681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_epi32(__m128d __A)90781ad6265SDimitry Andric _mm_cvttpd_epi32(__m128d __A) { 90881ad6265SDimitry Andric __v4si __result; 90981ad6265SDimitry Andric __v4si __temp; 91081ad6265SDimitry Andric const __v4si __vzero = {0, 0, 0, 0}; 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric /* VSX Vector truncate Double-Precision to integer and Convert to 9130b57cec5SDimitry Andric Signed Integer Word format with Saturate. */ 91481ad6265SDimitry Andric __asm__("xvcvdpsxws %x0,%x1" : "=wa"(__temp) : "wa"(__A) :); 9150b57cec5SDimitry Andric 9160b57cec5SDimitry Andric #ifdef _ARCH_PWR8 91781ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 91881ad6265SDimitry Andric __temp = vec_mergeo(__temp, __temp); 91981ad6265SDimitry Andric #else 92081ad6265SDimitry Andric __temp = vec_mergee(__temp, __temp); 92181ad6265SDimitry Andric #endif 92281ad6265SDimitry Andric __result = (__v4si)vec_vpkudum((__vector long long)__temp, 92381ad6265SDimitry Andric (__vector long long)__vzero); 9240b57cec5SDimitry Andric #else 9250b57cec5SDimitry Andric { 92681ad6265SDimitry Andric const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 9270b57cec5SDimitry Andric 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f}; 92881ad6265SDimitry Andric __result = (__v4si)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm); 9290b57cec5SDimitry Andric } 9300b57cec5SDimitry Andric #endif 9310b57cec5SDimitry Andric 93281ad6265SDimitry Andric return ((__m128i)__result); 9330b57cec5SDimitry Andric } 9340b57cec5SDimitry Andric 93581ad6265SDimitry Andric extern __inline __m64 93681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_pi32(__m128d __A)93781ad6265SDimitry Andric _mm_cvttpd_pi32(__m128d __A) { 93881ad6265SDimitry Andric __m128i __result = _mm_cvttpd_epi32(__A); 9390b57cec5SDimitry Andric 94081ad6265SDimitry Andric return (__m64)__result[0]; 9410b57cec5SDimitry Andric } 9420b57cec5SDimitry Andric 94381ad6265SDimitry Andric extern __inline int 94481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si32(__m128i __A)94581ad6265SDimitry Andric _mm_cvtsi128_si32(__m128i __A) { 9460b57cec5SDimitry Andric return ((__v4si)__A)[0]; 9470b57cec5SDimitry Andric } 9480b57cec5SDimitry Andric 9490b57cec5SDimitry Andric #ifdef _ARCH_PWR8 95081ad6265SDimitry Andric extern __inline __m128d 95181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_pd(__m64 __A)95281ad6265SDimitry Andric _mm_cvtpi32_pd(__m64 __A) { 95381ad6265SDimitry Andric __v4si __temp; 95481ad6265SDimitry Andric __v2di __tmp2; 955*06c3fb27SDimitry Andric __v4f __result; 9560b57cec5SDimitry Andric 95781ad6265SDimitry Andric __temp = (__v4si)vec_splats(__A); 95881ad6265SDimitry Andric __tmp2 = (__v2di)vec_unpackl(__temp); 95981ad6265SDimitry Andric __result = vec_ctf((__vector signed long long)__tmp2, 0); 96081ad6265SDimitry Andric return (__m128d)__result; 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric #endif 9630b57cec5SDimitry Andric 96481ad6265SDimitry Andric extern __inline __m128i 96581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_epi32(__m128 __A)96681ad6265SDimitry Andric _mm_cvtps_epi32(__m128 __A) { 96781ad6265SDimitry Andric __v4sf __rounded; 96881ad6265SDimitry Andric __v4si __result; 9690b57cec5SDimitry Andric 97081ad6265SDimitry Andric __rounded = vec_rint((__v4sf)__A); 97181ad6265SDimitry Andric __result = vec_cts(__rounded, 0); 97281ad6265SDimitry Andric return (__m128i)__result; 9730b57cec5SDimitry Andric } 9740b57cec5SDimitry Andric 97581ad6265SDimitry Andric extern __inline __m128i 97681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_epi32(__m128 __A)97781ad6265SDimitry Andric _mm_cvttps_epi32(__m128 __A) { 97881ad6265SDimitry Andric __v4si __result; 9790b57cec5SDimitry Andric 98081ad6265SDimitry Andric __result = vec_cts((__v4sf)__A, 0); 98181ad6265SDimitry Andric return (__m128i)__result; 9820b57cec5SDimitry Andric } 9830b57cec5SDimitry Andric 98481ad6265SDimitry Andric extern __inline __m128d 98581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pd(__m128 __A)98681ad6265SDimitry Andric _mm_cvtps_pd(__m128 __A) { 9870b57cec5SDimitry Andric /* Check if vec_doubleh is defined by <altivec.h>. If so use that. */ 9880b57cec5SDimitry Andric #ifdef vec_doubleh 9890b57cec5SDimitry Andric return (__m128d)vec_doubleh((__v4sf)__A); 9900b57cec5SDimitry Andric #else 9910b57cec5SDimitry Andric /* Otherwise the compiler is not current and so need to generate the 9920b57cec5SDimitry Andric equivalent code. */ 99381ad6265SDimitry Andric __v4sf __a = (__v4sf)__A; 99481ad6265SDimitry Andric __v4sf __temp; 99581ad6265SDimitry Andric __v2df __result; 9960b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 9970b57cec5SDimitry Andric /* The input float values are in elements {[0], [1]} but the convert 9980b57cec5SDimitry Andric instruction needs them in elements {[1], [3]}, So we use two 9990b57cec5SDimitry Andric shift left double vector word immediates to get the elements 10000b57cec5SDimitry Andric lined up. */ 100181ad6265SDimitry Andric __temp = __builtin_vsx_xxsldwi(__a, __a, 3); 100281ad6265SDimitry Andric __temp = __builtin_vsx_xxsldwi(__a, __temp, 2); 10030b57cec5SDimitry Andric #else 10040b57cec5SDimitry Andric /* The input float values are in elements {[0], [1]} but the convert 10050b57cec5SDimitry Andric instruction needs them in elements {[0], [2]}, So we use two 10060b57cec5SDimitry Andric shift left double vector word immediates to get the elements 10070b57cec5SDimitry Andric lined up. */ 100881ad6265SDimitry Andric __temp = vec_vmrghw(__a, __a); 10090b57cec5SDimitry Andric #endif 101081ad6265SDimitry Andric __asm__(" xvcvspdp %x0,%x1" : "=wa"(__result) : "wa"(__temp) :); 101181ad6265SDimitry Andric return (__m128d)__result; 10120b57cec5SDimitry Andric #endif 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric 101581ad6265SDimitry Andric extern __inline int 101681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si32(__m128d __A)101781ad6265SDimitry Andric _mm_cvtsd_si32(__m128d __A) { 101881ad6265SDimitry Andric __v2df __rounded = vec_rint((__v2df)__A); 101981ad6265SDimitry Andric int __result = ((__v2df)__rounded)[0]; 10200b57cec5SDimitry Andric 102181ad6265SDimitry Andric return __result; 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric /* Intel intrinsic. */ 102481ad6265SDimitry Andric extern __inline long long 102581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64(__m128d __A)102681ad6265SDimitry Andric _mm_cvtsd_si64(__m128d __A) { 102781ad6265SDimitry Andric __v2df __rounded = vec_rint((__v2df)__A); 102881ad6265SDimitry Andric long long __result = ((__v2df)__rounded)[0]; 10290b57cec5SDimitry Andric 103081ad6265SDimitry Andric return __result; 10310b57cec5SDimitry Andric } 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric /* Microsoft intrinsic. */ 103481ad6265SDimitry Andric extern __inline long long 103581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64x(__m128d __A)103681ad6265SDimitry Andric _mm_cvtsd_si64x(__m128d __A) { 10370b57cec5SDimitry Andric return _mm_cvtsd_si64((__v2df)__A); 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 104081ad6265SDimitry Andric extern __inline int 104181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si32(__m128d __A)104281ad6265SDimitry Andric _mm_cvttsd_si32(__m128d __A) { 104381ad6265SDimitry Andric int __result = ((__v2df)__A)[0]; 10440b57cec5SDimitry Andric 104581ad6265SDimitry Andric return __result; 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric /* Intel intrinsic. */ 104981ad6265SDimitry Andric extern __inline long long 105081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64(__m128d __A)105181ad6265SDimitry Andric _mm_cvttsd_si64(__m128d __A) { 105281ad6265SDimitry Andric long long __result = ((__v2df)__A)[0]; 10530b57cec5SDimitry Andric 105481ad6265SDimitry Andric return __result; 10550b57cec5SDimitry Andric } 10560b57cec5SDimitry Andric 10570b57cec5SDimitry Andric /* Microsoft intrinsic. */ 105881ad6265SDimitry Andric extern __inline long long 105981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64x(__m128d __A)106081ad6265SDimitry Andric _mm_cvttsd_si64x(__m128d __A) { 10610b57cec5SDimitry Andric return _mm_cvttsd_si64(__A); 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric 106481ad6265SDimitry Andric extern __inline __m128 106581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_ss(__m128 __A,__m128d __B)106681ad6265SDimitry Andric _mm_cvtsd_ss(__m128 __A, __m128d __B) { 106781ad6265SDimitry Andric __v4sf __result = (__v4sf)__A; 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 107081ad6265SDimitry Andric __v4sf __temp_s; 10710b57cec5SDimitry Andric /* Copy double element[0] to element [1] for conversion. */ 107281ad6265SDimitry Andric __v2df __temp_b = vec_splat((__v2df)__B, 0); 10730b57cec5SDimitry Andric 10740b57cec5SDimitry Andric /* Pre-rotate __A left 3 (logically right 1) elements. */ 107581ad6265SDimitry Andric __result = __builtin_vsx_xxsldwi(__result, __result, 3); 10760b57cec5SDimitry Andric /* Convert double to single float scalar in a vector. */ 107781ad6265SDimitry Andric __asm__("xscvdpsp %x0,%x1" : "=wa"(__temp_s) : "wa"(__temp_b) :); 10780b57cec5SDimitry Andric /* Shift the resulting scalar into vector element [0]. */ 107981ad6265SDimitry Andric __result = __builtin_vsx_xxsldwi(__result, __temp_s, 1); 10800b57cec5SDimitry Andric #else 108181ad6265SDimitry Andric __result[0] = ((__v2df)__B)[0]; 10820b57cec5SDimitry Andric #endif 108381ad6265SDimitry Andric return (__m128)__result; 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric 108681ad6265SDimitry Andric extern __inline __m128d 108781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_sd(__m128d __A,int __B)108881ad6265SDimitry Andric _mm_cvtsi32_sd(__m128d __A, int __B) { 108981ad6265SDimitry Andric __v2df __result = (__v2df)__A; 109081ad6265SDimitry Andric double __db = __B; 109181ad6265SDimitry Andric __result[0] = __db; 109281ad6265SDimitry Andric return (__m128d)__result; 10930b57cec5SDimitry Andric } 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric /* Intel intrinsic. */ 109681ad6265SDimitry Andric extern __inline __m128d 109781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_sd(__m128d __A,long long __B)109881ad6265SDimitry Andric _mm_cvtsi64_sd(__m128d __A, long long __B) { 109981ad6265SDimitry Andric __v2df __result = (__v2df)__A; 110081ad6265SDimitry Andric double __db = __B; 110181ad6265SDimitry Andric __result[0] = __db; 110281ad6265SDimitry Andric return (__m128d)__result; 11030b57cec5SDimitry Andric } 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andric /* Microsoft intrinsic. */ 110681ad6265SDimitry Andric extern __inline __m128d 110781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_sd(__m128d __A,long long __B)110881ad6265SDimitry Andric _mm_cvtsi64x_sd(__m128d __A, long long __B) { 11090b57cec5SDimitry Andric return _mm_cvtsi64_sd(__A, __B); 11100b57cec5SDimitry Andric } 11110b57cec5SDimitry Andric 111281ad6265SDimitry Andric extern __inline __m128d 111381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_sd(__m128d __A,__m128 __B)111481ad6265SDimitry Andric _mm_cvtss_sd(__m128d __A, __m128 __B) { 11150b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 11160b57cec5SDimitry Andric /* Use splat to move element [0] into position for the convert. */ 111781ad6265SDimitry Andric __v4sf __temp = vec_splat((__v4sf)__B, 0); 111881ad6265SDimitry Andric __v2df __res; 11190b57cec5SDimitry Andric /* Convert single float scalar to double in a vector. */ 112081ad6265SDimitry Andric __asm__("xscvspdp %x0,%x1" : "=wa"(__res) : "wa"(__temp) :); 112181ad6265SDimitry Andric return (__m128d)vec_mergel(__res, (__v2df)__A); 11220b57cec5SDimitry Andric #else 112381ad6265SDimitry Andric __v2df __res = (__v2df)__A; 112481ad6265SDimitry Andric __res[0] = ((__v4sf)__B)[0]; 112581ad6265SDimitry Andric return (__m128d)__res; 11260b57cec5SDimitry Andric #endif 11270b57cec5SDimitry Andric } 11280b57cec5SDimitry Andric 112981ad6265SDimitry Andric extern __inline __m128d 113081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pd(__m128d __A,__m128d __B,const int __mask)113181ad6265SDimitry Andric _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) { 113281ad6265SDimitry Andric __vector double __result; 113381ad6265SDimitry Andric const int __litmsk = __mask & 0x3; 11340b57cec5SDimitry Andric 113581ad6265SDimitry Andric if (__litmsk == 0) 113681ad6265SDimitry Andric __result = vec_mergeh(__A, __B); 11370b57cec5SDimitry Andric #if __GNUC__ < 6 113881ad6265SDimitry Andric else if (__litmsk == 1) 113981ad6265SDimitry Andric __result = vec_xxpermdi(__B, __A, 2); 114081ad6265SDimitry Andric else if (__litmsk == 2) 114181ad6265SDimitry Andric __result = vec_xxpermdi(__B, __A, 1); 11420b57cec5SDimitry Andric #else 114381ad6265SDimitry Andric else if (__litmsk == 1) 114481ad6265SDimitry Andric __result = vec_xxpermdi(__A, __B, 2); 114581ad6265SDimitry Andric else if (__litmsk == 2) 114681ad6265SDimitry Andric __result = vec_xxpermdi(__A, __B, 1); 11470b57cec5SDimitry Andric #endif 11480b57cec5SDimitry Andric else 114981ad6265SDimitry Andric __result = vec_mergel(__A, __B); 11500b57cec5SDimitry Andric 115181ad6265SDimitry Andric return __result; 11520b57cec5SDimitry Andric } 11530b57cec5SDimitry Andric 115481ad6265SDimitry Andric extern __inline __m128d 115581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pd(__m128d __A,__m128d __B)115681ad6265SDimitry Andric _mm_unpackhi_pd(__m128d __A, __m128d __B) { 11570b57cec5SDimitry Andric return (__m128d)vec_mergel((__v2df)__A, (__v2df)__B); 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric 116081ad6265SDimitry Andric extern __inline __m128d 116181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pd(__m128d __A,__m128d __B)116281ad6265SDimitry Andric _mm_unpacklo_pd(__m128d __A, __m128d __B) { 11630b57cec5SDimitry Andric return (__m128d)vec_mergeh((__v2df)__A, (__v2df)__B); 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 116681ad6265SDimitry Andric extern __inline __m128d 116781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pd(__m128d __A,double const * __B)116881ad6265SDimitry Andric _mm_loadh_pd(__m128d __A, double const *__B) { 116981ad6265SDimitry Andric __v2df __result = (__v2df)__A; 117081ad6265SDimitry Andric __result[1] = *__B; 117181ad6265SDimitry Andric return (__m128d)__result; 11720b57cec5SDimitry Andric } 11730b57cec5SDimitry Andric 117481ad6265SDimitry Andric extern __inline __m128d 117581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pd(__m128d __A,double const * __B)117681ad6265SDimitry Andric _mm_loadl_pd(__m128d __A, double const *__B) { 117781ad6265SDimitry Andric __v2df __result = (__v2df)__A; 117881ad6265SDimitry Andric __result[0] = *__B; 117981ad6265SDimitry Andric return (__m128d)__result; 11800b57cec5SDimitry Andric } 11810b57cec5SDimitry Andric 11820b57cec5SDimitry Andric #ifdef _ARCH_PWR8 11830b57cec5SDimitry Andric /* Intrinsic functions that require PowerISA 2.07 minimum. */ 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric /* Creates a 2-bit mask from the most significant bits of the DPFP values. */ 118681ad6265SDimitry Andric extern __inline int 118781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pd(__m128d __A)118881ad6265SDimitry Andric _mm_movemask_pd(__m128d __A) { 118981ad6265SDimitry Andric #ifdef _ARCH_PWR10 119081ad6265SDimitry Andric return vec_extractm((__v2du)__A); 119181ad6265SDimitry Andric #else 119281ad6265SDimitry Andric __vector unsigned long long __result; 119381ad6265SDimitry Andric static const __vector unsigned int __perm_mask = { 11940b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 11950b57cec5SDimitry Andric 0x80800040, 0x80808080, 0x80808080, 0x80808080 11960b57cec5SDimitry Andric #else 11970b57cec5SDimitry Andric 0x80808080, 0x80808080, 0x80808080, 0x80804000 11980b57cec5SDimitry Andric #endif 11990b57cec5SDimitry Andric }; 12000b57cec5SDimitry Andric 120181ad6265SDimitry Andric __result = ((__vector unsigned long long)vec_vbpermq( 120281ad6265SDimitry Andric (__vector unsigned char)__A, (__vector unsigned char)__perm_mask)); 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 120581ad6265SDimitry Andric return __result[1]; 12060b57cec5SDimitry Andric #else 120781ad6265SDimitry Andric return __result[0]; 12080b57cec5SDimitry Andric #endif 120981ad6265SDimitry Andric #endif /* !_ARCH_PWR10 */ 12100b57cec5SDimitry Andric } 12110b57cec5SDimitry Andric #endif /* _ARCH_PWR8 */ 12120b57cec5SDimitry Andric 121381ad6265SDimitry Andric extern __inline __m128i 121481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_epi16(__m128i __A,__m128i __B)121581ad6265SDimitry Andric _mm_packs_epi16(__m128i __A, __m128i __B) { 12160b57cec5SDimitry Andric return (__m128i)vec_packs((__v8hi)__A, (__v8hi)__B); 12170b57cec5SDimitry Andric } 12180b57cec5SDimitry Andric 121981ad6265SDimitry Andric extern __inline __m128i 122081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_epi32(__m128i __A,__m128i __B)122181ad6265SDimitry Andric _mm_packs_epi32(__m128i __A, __m128i __B) { 12220b57cec5SDimitry Andric return (__m128i)vec_packs((__v4si)__A, (__v4si)__B); 12230b57cec5SDimitry Andric } 12240b57cec5SDimitry Andric 122581ad6265SDimitry Andric extern __inline __m128i 122681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packus_epi16(__m128i __A,__m128i __B)122781ad6265SDimitry Andric _mm_packus_epi16(__m128i __A, __m128i __B) { 12280b57cec5SDimitry Andric return (__m128i)vec_packsu((__v8hi)__A, (__v8hi)__B); 12290b57cec5SDimitry Andric } 12300b57cec5SDimitry Andric 123181ad6265SDimitry Andric extern __inline __m128i 123281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi8(__m128i __A,__m128i __B)123381ad6265SDimitry Andric _mm_unpackhi_epi8(__m128i __A, __m128i __B) { 12340b57cec5SDimitry Andric return (__m128i)vec_mergel((__v16qu)__A, (__v16qu)__B); 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 123781ad6265SDimitry Andric extern __inline __m128i 123881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi16(__m128i __A,__m128i __B)123981ad6265SDimitry Andric _mm_unpackhi_epi16(__m128i __A, __m128i __B) { 12400b57cec5SDimitry Andric return (__m128i)vec_mergel((__v8hu)__A, (__v8hu)__B); 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric 124381ad6265SDimitry Andric extern __inline __m128i 124481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi32(__m128i __A,__m128i __B)124581ad6265SDimitry Andric _mm_unpackhi_epi32(__m128i __A, __m128i __B) { 12460b57cec5SDimitry Andric return (__m128i)vec_mergel((__v4su)__A, (__v4su)__B); 12470b57cec5SDimitry Andric } 12480b57cec5SDimitry Andric 124981ad6265SDimitry Andric extern __inline __m128i 125081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi64(__m128i __A,__m128i __B)125181ad6265SDimitry Andric _mm_unpackhi_epi64(__m128i __A, __m128i __B) { 125281ad6265SDimitry Andric return (__m128i)vec_mergel((__vector long long)__A, (__vector long long)__B); 12530b57cec5SDimitry Andric } 12540b57cec5SDimitry Andric 125581ad6265SDimitry Andric extern __inline __m128i 125681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi8(__m128i __A,__m128i __B)125781ad6265SDimitry Andric _mm_unpacklo_epi8(__m128i __A, __m128i __B) { 12580b57cec5SDimitry Andric return (__m128i)vec_mergeh((__v16qu)__A, (__v16qu)__B); 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric 126181ad6265SDimitry Andric extern __inline __m128i 126281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi16(__m128i __A,__m128i __B)126381ad6265SDimitry Andric _mm_unpacklo_epi16(__m128i __A, __m128i __B) { 12640b57cec5SDimitry Andric return (__m128i)vec_mergeh((__v8hi)__A, (__v8hi)__B); 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric 126781ad6265SDimitry Andric extern __inline __m128i 126881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi32(__m128i __A,__m128i __B)126981ad6265SDimitry Andric _mm_unpacklo_epi32(__m128i __A, __m128i __B) { 12700b57cec5SDimitry Andric return (__m128i)vec_mergeh((__v4si)__A, (__v4si)__B); 12710b57cec5SDimitry Andric } 12720b57cec5SDimitry Andric 127381ad6265SDimitry Andric extern __inline __m128i 127481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi64(__m128i __A,__m128i __B)127581ad6265SDimitry Andric _mm_unpacklo_epi64(__m128i __A, __m128i __B) { 127681ad6265SDimitry Andric return (__m128i)vec_mergeh((__vector long long)__A, (__vector long long)__B); 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric 127981ad6265SDimitry Andric extern __inline __m128i 128081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi8(__m128i __A,__m128i __B)128181ad6265SDimitry Andric _mm_add_epi8(__m128i __A, __m128i __B) { 12820b57cec5SDimitry Andric return (__m128i)((__v16qu)__A + (__v16qu)__B); 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 128581ad6265SDimitry Andric extern __inline __m128i 128681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi16(__m128i __A,__m128i __B)128781ad6265SDimitry Andric _mm_add_epi16(__m128i __A, __m128i __B) { 12880b57cec5SDimitry Andric return (__m128i)((__v8hu)__A + (__v8hu)__B); 12890b57cec5SDimitry Andric } 12900b57cec5SDimitry Andric 129181ad6265SDimitry Andric extern __inline __m128i 129281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi32(__m128i __A,__m128i __B)129381ad6265SDimitry Andric _mm_add_epi32(__m128i __A, __m128i __B) { 12940b57cec5SDimitry Andric return (__m128i)((__v4su)__A + (__v4su)__B); 12950b57cec5SDimitry Andric } 12960b57cec5SDimitry Andric 129781ad6265SDimitry Andric extern __inline __m128i 129881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi64(__m128i __A,__m128i __B)129981ad6265SDimitry Andric _mm_add_epi64(__m128i __A, __m128i __B) { 13000b57cec5SDimitry Andric return (__m128i)((__v2du)__A + (__v2du)__B); 13010b57cec5SDimitry Andric } 13020b57cec5SDimitry Andric 130381ad6265SDimitry Andric extern __inline __m128i 130481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epi8(__m128i __A,__m128i __B)130581ad6265SDimitry Andric _mm_adds_epi8(__m128i __A, __m128i __B) { 13060b57cec5SDimitry Andric return (__m128i)vec_adds((__v16qi)__A, (__v16qi)__B); 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric 130981ad6265SDimitry Andric extern __inline __m128i 131081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epi16(__m128i __A,__m128i __B)131181ad6265SDimitry Andric _mm_adds_epi16(__m128i __A, __m128i __B) { 13120b57cec5SDimitry Andric return (__m128i)vec_adds((__v8hi)__A, (__v8hi)__B); 13130b57cec5SDimitry Andric } 13140b57cec5SDimitry Andric 131581ad6265SDimitry Andric extern __inline __m128i 131681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epu8(__m128i __A,__m128i __B)131781ad6265SDimitry Andric _mm_adds_epu8(__m128i __A, __m128i __B) { 13180b57cec5SDimitry Andric return (__m128i)vec_adds((__v16qu)__A, (__v16qu)__B); 13190b57cec5SDimitry Andric } 13200b57cec5SDimitry Andric 132181ad6265SDimitry Andric extern __inline __m128i 132281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epu16(__m128i __A,__m128i __B)132381ad6265SDimitry Andric _mm_adds_epu16(__m128i __A, __m128i __B) { 13240b57cec5SDimitry Andric return (__m128i)vec_adds((__v8hu)__A, (__v8hu)__B); 13250b57cec5SDimitry Andric } 13260b57cec5SDimitry Andric 132781ad6265SDimitry Andric extern __inline __m128i 132881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi8(__m128i __A,__m128i __B)132981ad6265SDimitry Andric _mm_sub_epi8(__m128i __A, __m128i __B) { 13300b57cec5SDimitry Andric return (__m128i)((__v16qu)__A - (__v16qu)__B); 13310b57cec5SDimitry Andric } 13320b57cec5SDimitry Andric 133381ad6265SDimitry Andric extern __inline __m128i 133481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi16(__m128i __A,__m128i __B)133581ad6265SDimitry Andric _mm_sub_epi16(__m128i __A, __m128i __B) { 13360b57cec5SDimitry Andric return (__m128i)((__v8hu)__A - (__v8hu)__B); 13370b57cec5SDimitry Andric } 13380b57cec5SDimitry Andric 133981ad6265SDimitry Andric extern __inline __m128i 134081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi32(__m128i __A,__m128i __B)134181ad6265SDimitry Andric _mm_sub_epi32(__m128i __A, __m128i __B) { 13420b57cec5SDimitry Andric return (__m128i)((__v4su)__A - (__v4su)__B); 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric 134581ad6265SDimitry Andric extern __inline __m128i 134681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi64(__m128i __A,__m128i __B)134781ad6265SDimitry Andric _mm_sub_epi64(__m128i __A, __m128i __B) { 13480b57cec5SDimitry Andric return (__m128i)((__v2du)__A - (__v2du)__B); 13490b57cec5SDimitry Andric } 13500b57cec5SDimitry Andric 135181ad6265SDimitry Andric extern __inline __m128i 135281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epi8(__m128i __A,__m128i __B)135381ad6265SDimitry Andric _mm_subs_epi8(__m128i __A, __m128i __B) { 13540b57cec5SDimitry Andric return (__m128i)vec_subs((__v16qi)__A, (__v16qi)__B); 13550b57cec5SDimitry Andric } 13560b57cec5SDimitry Andric 135781ad6265SDimitry Andric extern __inline __m128i 135881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epi16(__m128i __A,__m128i __B)135981ad6265SDimitry Andric _mm_subs_epi16(__m128i __A, __m128i __B) { 13600b57cec5SDimitry Andric return (__m128i)vec_subs((__v8hi)__A, (__v8hi)__B); 13610b57cec5SDimitry Andric } 13620b57cec5SDimitry Andric 136381ad6265SDimitry Andric extern __inline __m128i 136481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epu8(__m128i __A,__m128i __B)136581ad6265SDimitry Andric _mm_subs_epu8(__m128i __A, __m128i __B) { 13660b57cec5SDimitry Andric return (__m128i)vec_subs((__v16qu)__A, (__v16qu)__B); 13670b57cec5SDimitry Andric } 13680b57cec5SDimitry Andric 136981ad6265SDimitry Andric extern __inline __m128i 137081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epu16(__m128i __A,__m128i __B)137181ad6265SDimitry Andric _mm_subs_epu16(__m128i __A, __m128i __B) { 13720b57cec5SDimitry Andric return (__m128i)vec_subs((__v8hu)__A, (__v8hu)__B); 13730b57cec5SDimitry Andric } 13740b57cec5SDimitry Andric 137581ad6265SDimitry Andric extern __inline __m128i 137681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_epi16(__m128i __A,__m128i __B)137781ad6265SDimitry Andric _mm_madd_epi16(__m128i __A, __m128i __B) { 137881ad6265SDimitry Andric __vector signed int __zero = {0, 0, 0, 0}; 13790b57cec5SDimitry Andric 138081ad6265SDimitry Andric return (__m128i)vec_vmsumshm((__v8hi)__A, (__v8hi)__B, __zero); 13810b57cec5SDimitry Andric } 13820b57cec5SDimitry Andric 138381ad6265SDimitry Andric extern __inline __m128i 138481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epi16(__m128i __A,__m128i __B)138581ad6265SDimitry Andric _mm_mulhi_epi16(__m128i __A, __m128i __B) { 138681ad6265SDimitry Andric __vector signed int __w0, __w1; 13870b57cec5SDimitry Andric 138881ad6265SDimitry Andric __vector unsigned char __xform1 = { 13890b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 139081ad6265SDimitry Andric 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 139181ad6265SDimitry Andric 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F 13920b57cec5SDimitry Andric #else 139381ad6265SDimitry Andric 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08, 139481ad6265SDimitry Andric 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D 13950b57cec5SDimitry Andric #endif 13960b57cec5SDimitry Andric }; 13970b57cec5SDimitry Andric 139881ad6265SDimitry Andric __w0 = vec_vmulesh((__v8hi)__A, (__v8hi)__B); 139981ad6265SDimitry Andric __w1 = vec_vmulosh((__v8hi)__A, (__v8hi)__B); 140081ad6265SDimitry Andric return (__m128i)vec_perm(__w0, __w1, __xform1); 14010b57cec5SDimitry Andric } 14020b57cec5SDimitry Andric 140381ad6265SDimitry Andric extern __inline __m128i 140481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi16(__m128i __A,__m128i __B)140581ad6265SDimitry Andric _mm_mullo_epi16(__m128i __A, __m128i __B) { 14060b57cec5SDimitry Andric return (__m128i)((__v8hi)__A * (__v8hi)__B); 14070b57cec5SDimitry Andric } 14080b57cec5SDimitry Andric 140981ad6265SDimitry Andric extern __inline __m64 141081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_su32(__m64 __A,__m64 __B)141181ad6265SDimitry Andric _mm_mul_su32(__m64 __A, __m64 __B) { 141281ad6265SDimitry Andric unsigned int __a = __A; 141381ad6265SDimitry Andric unsigned int __b = __B; 14140b57cec5SDimitry Andric 141581ad6265SDimitry Andric return ((__m64)__a * (__m64)__b); 14160b57cec5SDimitry Andric } 14170b57cec5SDimitry Andric 141881ad6265SDimitry Andric #ifdef _ARCH_PWR8 141981ad6265SDimitry Andric extern __inline __m128i 142081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_epu32(__m128i __A,__m128i __B)142181ad6265SDimitry Andric _mm_mul_epu32(__m128i __A, __m128i __B) { 14220b57cec5SDimitry Andric #if __GNUC__ < 8 142381ad6265SDimitry Andric __v2du __result; 14240b57cec5SDimitry Andric 14250b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 14260b57cec5SDimitry Andric /* VMX Vector Multiply Odd Unsigned Word. */ 142781ad6265SDimitry Andric __asm__("vmulouw %0,%1,%2" : "=v"(__result) : "v"(__A), "v"(__B) :); 14280b57cec5SDimitry Andric #else 14290b57cec5SDimitry Andric /* VMX Vector Multiply Even Unsigned Word. */ 143081ad6265SDimitry Andric __asm__("vmuleuw %0,%1,%2" : "=v"(__result) : "v"(__A), "v"(__B) :); 14310b57cec5SDimitry Andric #endif 143281ad6265SDimitry Andric return (__m128i)__result; 14330b57cec5SDimitry Andric #else 14340b57cec5SDimitry Andric return (__m128i)vec_mule((__v4su)__A, (__v4su)__B); 14350b57cec5SDimitry Andric #endif 14360b57cec5SDimitry Andric } 143781ad6265SDimitry Andric #endif 14380b57cec5SDimitry Andric 143981ad6265SDimitry Andric extern __inline __m128i 144081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi16(__m128i __A,int __B)144181ad6265SDimitry Andric _mm_slli_epi16(__m128i __A, int __B) { 144281ad6265SDimitry Andric __v8hu __lshift; 144381ad6265SDimitry Andric __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0}; 14440b57cec5SDimitry Andric 144581ad6265SDimitry Andric if (__B >= 0 && __B < 16) { 14460b57cec5SDimitry Andric if (__builtin_constant_p(__B)) 144781ad6265SDimitry Andric __lshift = (__v8hu)vec_splat_s16(__B); 14480b57cec5SDimitry Andric else 144981ad6265SDimitry Andric __lshift = vec_splats((unsigned short)__B); 14500b57cec5SDimitry Andric 145181ad6265SDimitry Andric __result = vec_sl((__v8hi)__A, __lshift); 14520b57cec5SDimitry Andric } 14530b57cec5SDimitry Andric 145481ad6265SDimitry Andric return (__m128i)__result; 14550b57cec5SDimitry Andric } 14560b57cec5SDimitry Andric 145781ad6265SDimitry Andric extern __inline __m128i 145881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi32(__m128i __A,int __B)145981ad6265SDimitry Andric _mm_slli_epi32(__m128i __A, int __B) { 146081ad6265SDimitry Andric __v4su __lshift; 146181ad6265SDimitry Andric __v4si __result = {0, 0, 0, 0}; 14620b57cec5SDimitry Andric 146381ad6265SDimitry Andric if (__B >= 0 && __B < 32) { 14640b57cec5SDimitry Andric if (__builtin_constant_p(__B) && __B < 16) 146581ad6265SDimitry Andric __lshift = (__v4su)vec_splat_s32(__B); 14660b57cec5SDimitry Andric else 146781ad6265SDimitry Andric __lshift = vec_splats((unsigned int)__B); 14680b57cec5SDimitry Andric 146981ad6265SDimitry Andric __result = vec_sl((__v4si)__A, __lshift); 14700b57cec5SDimitry Andric } 14710b57cec5SDimitry Andric 147281ad6265SDimitry Andric return (__m128i)__result; 14730b57cec5SDimitry Andric } 14740b57cec5SDimitry Andric 14750b57cec5SDimitry Andric #ifdef _ARCH_PWR8 147681ad6265SDimitry Andric extern __inline __m128i 147781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi64(__m128i __A,int __B)147881ad6265SDimitry Andric _mm_slli_epi64(__m128i __A, int __B) { 147981ad6265SDimitry Andric __v2du __lshift; 148081ad6265SDimitry Andric __v2di __result = {0, 0}; 14810b57cec5SDimitry Andric 148281ad6265SDimitry Andric if (__B >= 0 && __B < 64) { 14830b57cec5SDimitry Andric if (__builtin_constant_p(__B) && __B < 16) 148481ad6265SDimitry Andric __lshift = (__v2du)vec_splat_s32(__B); 14850b57cec5SDimitry Andric else 148681ad6265SDimitry Andric __lshift = (__v2du)vec_splats((unsigned int)__B); 14870b57cec5SDimitry Andric 148881ad6265SDimitry Andric __result = vec_sl((__v2di)__A, __lshift); 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric 149181ad6265SDimitry Andric return (__m128i)__result; 14920b57cec5SDimitry Andric } 14930b57cec5SDimitry Andric #endif 14940b57cec5SDimitry Andric 149581ad6265SDimitry Andric extern __inline __m128i 149681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi16(__m128i __A,int __B)149781ad6265SDimitry Andric _mm_srai_epi16(__m128i __A, int __B) { 149881ad6265SDimitry Andric __v8hu __rshift = {15, 15, 15, 15, 15, 15, 15, 15}; 149981ad6265SDimitry Andric __v8hi __result; 15000b57cec5SDimitry Andric 150181ad6265SDimitry Andric if (__B < 16) { 15020b57cec5SDimitry Andric if (__builtin_constant_p(__B)) 150381ad6265SDimitry Andric __rshift = (__v8hu)vec_splat_s16(__B); 15040b57cec5SDimitry Andric else 150581ad6265SDimitry Andric __rshift = vec_splats((unsigned short)__B); 15060b57cec5SDimitry Andric } 150781ad6265SDimitry Andric __result = vec_sra((__v8hi)__A, __rshift); 15080b57cec5SDimitry Andric 150981ad6265SDimitry Andric return (__m128i)__result; 15100b57cec5SDimitry Andric } 15110b57cec5SDimitry Andric 151281ad6265SDimitry Andric extern __inline __m128i 151381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi32(__m128i __A,int __B)151481ad6265SDimitry Andric _mm_srai_epi32(__m128i __A, int __B) { 151581ad6265SDimitry Andric __v4su __rshift = {31, 31, 31, 31}; 151681ad6265SDimitry Andric __v4si __result; 15170b57cec5SDimitry Andric 151881ad6265SDimitry Andric if (__B < 32) { 151981ad6265SDimitry Andric if (__builtin_constant_p(__B)) { 15200b57cec5SDimitry Andric if (__B < 16) 152181ad6265SDimitry Andric __rshift = (__v4su)vec_splat_s32(__B); 15220b57cec5SDimitry Andric else 152381ad6265SDimitry Andric __rshift = (__v4su)vec_splats((unsigned int)__B); 152481ad6265SDimitry Andric } else 152581ad6265SDimitry Andric __rshift = vec_splats((unsigned int)__B); 15260b57cec5SDimitry Andric } 152781ad6265SDimitry Andric __result = vec_sra((__v4si)__A, __rshift); 15280b57cec5SDimitry Andric 152981ad6265SDimitry Andric return (__m128i)__result; 15300b57cec5SDimitry Andric } 15310b57cec5SDimitry Andric 153281ad6265SDimitry Andric extern __inline __m128i 153381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bslli_si128(__m128i __A,const int __N)153481ad6265SDimitry Andric _mm_bslli_si128(__m128i __A, const int __N) { 153581ad6265SDimitry Andric __v16qu __result; 153681ad6265SDimitry Andric const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 15370b57cec5SDimitry Andric 15380b57cec5SDimitry Andric if (__N < 16) 153981ad6265SDimitry Andric __result = vec_sld((__v16qu)__A, __zeros, __N); 15400b57cec5SDimitry Andric else 154181ad6265SDimitry Andric __result = __zeros; 15420b57cec5SDimitry Andric 154381ad6265SDimitry Andric return (__m128i)__result; 15440b57cec5SDimitry Andric } 15450b57cec5SDimitry Andric 154681ad6265SDimitry Andric extern __inline __m128i 154781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bsrli_si128(__m128i __A,const int __N)154881ad6265SDimitry Andric _mm_bsrli_si128(__m128i __A, const int __N) { 154981ad6265SDimitry Andric __v16qu __result; 155081ad6265SDimitry Andric const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 15510b57cec5SDimitry Andric 15520b57cec5SDimitry Andric if (__N < 16) 15530b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 15540b57cec5SDimitry Andric if (__builtin_constant_p(__N)) 15550b57cec5SDimitry Andric /* Would like to use Vector Shift Left Double by Octet 15560b57cec5SDimitry Andric Immediate here to use the immediate form and avoid 15570b57cec5SDimitry Andric load of __N * 8 value into a separate VR. */ 155881ad6265SDimitry Andric __result = vec_sld(__zeros, (__v16qu)__A, (16 - __N)); 15590b57cec5SDimitry Andric else 15600b57cec5SDimitry Andric #endif 15610b57cec5SDimitry Andric { 156281ad6265SDimitry Andric __v16qu __shift = vec_splats((unsigned char)(__N * 8)); 15630b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 156481ad6265SDimitry Andric __result = vec_sro((__v16qu)__A, __shift); 15650b57cec5SDimitry Andric #else 156681ad6265SDimitry Andric __result = vec_slo((__v16qu)__A, __shift); 15670b57cec5SDimitry Andric #endif 15680b57cec5SDimitry Andric } 15690b57cec5SDimitry Andric else 157081ad6265SDimitry Andric __result = __zeros; 15710b57cec5SDimitry Andric 157281ad6265SDimitry Andric return (__m128i)__result; 15730b57cec5SDimitry Andric } 15740b57cec5SDimitry Andric 157581ad6265SDimitry Andric extern __inline __m128i 157681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_si128(__m128i __A,const int __N)157781ad6265SDimitry Andric _mm_srli_si128(__m128i __A, const int __N) { 15780b57cec5SDimitry Andric return _mm_bsrli_si128(__A, __N); 15790b57cec5SDimitry Andric } 15800b57cec5SDimitry Andric 158181ad6265SDimitry Andric extern __inline __m128i 158281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_si128(__m128i __A,const int _imm5)158381ad6265SDimitry Andric _mm_slli_si128(__m128i __A, const int _imm5) { 158481ad6265SDimitry Andric __v16qu __result; 158581ad6265SDimitry Andric const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 15860b57cec5SDimitry Andric 15870b57cec5SDimitry Andric if (_imm5 < 16) 15880b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 158981ad6265SDimitry Andric __result = vec_sld((__v16qu)__A, __zeros, _imm5); 15900b57cec5SDimitry Andric #else 159181ad6265SDimitry Andric __result = vec_sld(__zeros, (__v16qu)__A, (16 - _imm5)); 15920b57cec5SDimitry Andric #endif 15930b57cec5SDimitry Andric else 159481ad6265SDimitry Andric __result = __zeros; 15950b57cec5SDimitry Andric 159681ad6265SDimitry Andric return (__m128i)__result; 15970b57cec5SDimitry Andric } 15980b57cec5SDimitry Andric 159981ad6265SDimitry Andric extern __inline __m128i 160081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 16010b57cec5SDimitry Andric _mm_srli_epi16(__m128i __A,int __B)160281ad6265SDimitry Andric _mm_srli_epi16(__m128i __A, int __B) { 160381ad6265SDimitry Andric __v8hu __rshift; 160481ad6265SDimitry Andric __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0}; 16050b57cec5SDimitry Andric 160681ad6265SDimitry Andric if (__B < 16) { 16070b57cec5SDimitry Andric if (__builtin_constant_p(__B)) 160881ad6265SDimitry Andric __rshift = (__v8hu)vec_splat_s16(__B); 16090b57cec5SDimitry Andric else 161081ad6265SDimitry Andric __rshift = vec_splats((unsigned short)__B); 16110b57cec5SDimitry Andric 161281ad6265SDimitry Andric __result = vec_sr((__v8hi)__A, __rshift); 16130b57cec5SDimitry Andric } 16140b57cec5SDimitry Andric 161581ad6265SDimitry Andric return (__m128i)__result; 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric 161881ad6265SDimitry Andric extern __inline __m128i 161981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi32(__m128i __A,int __B)162081ad6265SDimitry Andric _mm_srli_epi32(__m128i __A, int __B) { 162181ad6265SDimitry Andric __v4su __rshift; 162281ad6265SDimitry Andric __v4si __result = {0, 0, 0, 0}; 16230b57cec5SDimitry Andric 162481ad6265SDimitry Andric if (__B < 32) { 162581ad6265SDimitry Andric if (__builtin_constant_p(__B)) { 16260b57cec5SDimitry Andric if (__B < 16) 162781ad6265SDimitry Andric __rshift = (__v4su)vec_splat_s32(__B); 16280b57cec5SDimitry Andric else 162981ad6265SDimitry Andric __rshift = (__v4su)vec_splats((unsigned int)__B); 163081ad6265SDimitry Andric } else 163181ad6265SDimitry Andric __rshift = vec_splats((unsigned int)__B); 16320b57cec5SDimitry Andric 163381ad6265SDimitry Andric __result = vec_sr((__v4si)__A, __rshift); 16340b57cec5SDimitry Andric } 16350b57cec5SDimitry Andric 163681ad6265SDimitry Andric return (__m128i)__result; 16370b57cec5SDimitry Andric } 16380b57cec5SDimitry Andric 16390b57cec5SDimitry Andric #ifdef _ARCH_PWR8 164081ad6265SDimitry Andric extern __inline __m128i 164181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi64(__m128i __A,int __B)164281ad6265SDimitry Andric _mm_srli_epi64(__m128i __A, int __B) { 164381ad6265SDimitry Andric __v2du __rshift; 164481ad6265SDimitry Andric __v2di __result = {0, 0}; 16450b57cec5SDimitry Andric 164681ad6265SDimitry Andric if (__B < 64) { 164781ad6265SDimitry Andric if (__builtin_constant_p(__B)) { 16480b57cec5SDimitry Andric if (__B < 16) 164981ad6265SDimitry Andric __rshift = (__v2du)vec_splat_s32(__B); 16500b57cec5SDimitry Andric else 165181ad6265SDimitry Andric __rshift = (__v2du)vec_splats((unsigned long long)__B); 165281ad6265SDimitry Andric } else 165381ad6265SDimitry Andric __rshift = (__v2du)vec_splats((unsigned int)__B); 16540b57cec5SDimitry Andric 165581ad6265SDimitry Andric __result = vec_sr((__v2di)__A, __rshift); 16560b57cec5SDimitry Andric } 16570b57cec5SDimitry Andric 165881ad6265SDimitry Andric return (__m128i)__result; 16590b57cec5SDimitry Andric } 16600b57cec5SDimitry Andric #endif 16610b57cec5SDimitry Andric 166281ad6265SDimitry Andric extern __inline __m128i 166381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi16(__m128i __A,__m128i __B)166481ad6265SDimitry Andric _mm_sll_epi16(__m128i __A, __m128i __B) { 166581ad6265SDimitry Andric __v8hu __lshift; 166681ad6265SDimitry Andric __vector __bool short __shmask; 166781ad6265SDimitry Andric const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15}; 166881ad6265SDimitry Andric __v8hu __result; 16690b57cec5SDimitry Andric 16700b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 167181ad6265SDimitry Andric __lshift = vec_splat((__v8hu)__B, 0); 16720b57cec5SDimitry Andric #else 167381ad6265SDimitry Andric __lshift = vec_splat((__v8hu)__B, 3); 16740b57cec5SDimitry Andric #endif 167581ad6265SDimitry Andric __shmask = vec_cmple(__lshift, __shmax); 167681ad6265SDimitry Andric __result = vec_sl((__v8hu)__A, __lshift); 167781ad6265SDimitry Andric __result = vec_sel((__v8hu)__shmask, __result, __shmask); 16780b57cec5SDimitry Andric 167981ad6265SDimitry Andric return (__m128i)__result; 16800b57cec5SDimitry Andric } 16810b57cec5SDimitry Andric 168281ad6265SDimitry Andric extern __inline __m128i 168381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi32(__m128i __A,__m128i __B)168481ad6265SDimitry Andric _mm_sll_epi32(__m128i __A, __m128i __B) { 168581ad6265SDimitry Andric __v4su __lshift; 168681ad6265SDimitry Andric __vector __bool int __shmask; 168781ad6265SDimitry Andric const __v4su __shmax = {32, 32, 32, 32}; 168881ad6265SDimitry Andric __v4su __result; 16890b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 169081ad6265SDimitry Andric __lshift = vec_splat((__v4su)__B, 0); 16910b57cec5SDimitry Andric #else 169281ad6265SDimitry Andric __lshift = vec_splat((__v4su)__B, 1); 16930b57cec5SDimitry Andric #endif 169481ad6265SDimitry Andric __shmask = vec_cmplt(__lshift, __shmax); 169581ad6265SDimitry Andric __result = vec_sl((__v4su)__A, __lshift); 169681ad6265SDimitry Andric __result = vec_sel((__v4su)__shmask, __result, __shmask); 16970b57cec5SDimitry Andric 169881ad6265SDimitry Andric return (__m128i)__result; 16990b57cec5SDimitry Andric } 17000b57cec5SDimitry Andric 17010b57cec5SDimitry Andric #ifdef _ARCH_PWR8 170281ad6265SDimitry Andric extern __inline __m128i 170381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi64(__m128i __A,__m128i __B)170481ad6265SDimitry Andric _mm_sll_epi64(__m128i __A, __m128i __B) { 170581ad6265SDimitry Andric __v2du __lshift; 170681ad6265SDimitry Andric __vector __bool long long __shmask; 170781ad6265SDimitry Andric const __v2du __shmax = {64, 64}; 170881ad6265SDimitry Andric __v2du __result; 17090b57cec5SDimitry Andric 171081ad6265SDimitry Andric __lshift = vec_splat((__v2du)__B, 0); 171181ad6265SDimitry Andric __shmask = vec_cmplt(__lshift, __shmax); 171281ad6265SDimitry Andric __result = vec_sl((__v2du)__A, __lshift); 171381ad6265SDimitry Andric __result = vec_sel((__v2du)__shmask, __result, __shmask); 17140b57cec5SDimitry Andric 171581ad6265SDimitry Andric return (__m128i)__result; 17160b57cec5SDimitry Andric } 17170b57cec5SDimitry Andric #endif 17180b57cec5SDimitry Andric 171981ad6265SDimitry Andric extern __inline __m128i 172081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi16(__m128i __A,__m128i __B)172181ad6265SDimitry Andric _mm_sra_epi16(__m128i __A, __m128i __B) { 172281ad6265SDimitry Andric const __v8hu __rshmax = {15, 15, 15, 15, 15, 15, 15, 15}; 172381ad6265SDimitry Andric __v8hu __rshift; 172481ad6265SDimitry Andric __v8hi __result; 17250b57cec5SDimitry Andric 17260b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 172781ad6265SDimitry Andric __rshift = vec_splat((__v8hu)__B, 0); 17280b57cec5SDimitry Andric #else 172981ad6265SDimitry Andric __rshift = vec_splat((__v8hu)__B, 3); 17300b57cec5SDimitry Andric #endif 173181ad6265SDimitry Andric __rshift = vec_min(__rshift, __rshmax); 173281ad6265SDimitry Andric __result = vec_sra((__v8hi)__A, __rshift); 17330b57cec5SDimitry Andric 173481ad6265SDimitry Andric return (__m128i)__result; 17350b57cec5SDimitry Andric } 17360b57cec5SDimitry Andric 173781ad6265SDimitry Andric extern __inline __m128i 173881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi32(__m128i __A,__m128i __B)173981ad6265SDimitry Andric _mm_sra_epi32(__m128i __A, __m128i __B) { 174081ad6265SDimitry Andric const __v4su __rshmax = {31, 31, 31, 31}; 174181ad6265SDimitry Andric __v4su __rshift; 174281ad6265SDimitry Andric __v4si __result; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 174581ad6265SDimitry Andric __rshift = vec_splat((__v4su)__B, 0); 17460b57cec5SDimitry Andric #else 174781ad6265SDimitry Andric __rshift = vec_splat((__v4su)__B, 1); 17480b57cec5SDimitry Andric #endif 174981ad6265SDimitry Andric __rshift = vec_min(__rshift, __rshmax); 175081ad6265SDimitry Andric __result = vec_sra((__v4si)__A, __rshift); 17510b57cec5SDimitry Andric 175281ad6265SDimitry Andric return (__m128i)__result; 17530b57cec5SDimitry Andric } 17540b57cec5SDimitry Andric 175581ad6265SDimitry Andric extern __inline __m128i 175681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi16(__m128i __A,__m128i __B)175781ad6265SDimitry Andric _mm_srl_epi16(__m128i __A, __m128i __B) { 175881ad6265SDimitry Andric __v8hu __rshift; 175981ad6265SDimitry Andric __vector __bool short __shmask; 176081ad6265SDimitry Andric const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15}; 176181ad6265SDimitry Andric __v8hu __result; 17620b57cec5SDimitry Andric 17630b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 176481ad6265SDimitry Andric __rshift = vec_splat((__v8hu)__B, 0); 17650b57cec5SDimitry Andric #else 176681ad6265SDimitry Andric __rshift = vec_splat((__v8hu)__B, 3); 17670b57cec5SDimitry Andric #endif 176881ad6265SDimitry Andric __shmask = vec_cmple(__rshift, __shmax); 176981ad6265SDimitry Andric __result = vec_sr((__v8hu)__A, __rshift); 177081ad6265SDimitry Andric __result = vec_sel((__v8hu)__shmask, __result, __shmask); 17710b57cec5SDimitry Andric 177281ad6265SDimitry Andric return (__m128i)__result; 17730b57cec5SDimitry Andric } 17740b57cec5SDimitry Andric 177581ad6265SDimitry Andric extern __inline __m128i 177681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi32(__m128i __A,__m128i __B)177781ad6265SDimitry Andric _mm_srl_epi32(__m128i __A, __m128i __B) { 177881ad6265SDimitry Andric __v4su __rshift; 177981ad6265SDimitry Andric __vector __bool int __shmask; 178081ad6265SDimitry Andric const __v4su __shmax = {32, 32, 32, 32}; 178181ad6265SDimitry Andric __v4su __result; 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 178481ad6265SDimitry Andric __rshift = vec_splat((__v4su)__B, 0); 17850b57cec5SDimitry Andric #else 178681ad6265SDimitry Andric __rshift = vec_splat((__v4su)__B, 1); 17870b57cec5SDimitry Andric #endif 178881ad6265SDimitry Andric __shmask = vec_cmplt(__rshift, __shmax); 178981ad6265SDimitry Andric __result = vec_sr((__v4su)__A, __rshift); 179081ad6265SDimitry Andric __result = vec_sel((__v4su)__shmask, __result, __shmask); 17910b57cec5SDimitry Andric 179281ad6265SDimitry Andric return (__m128i)__result; 17930b57cec5SDimitry Andric } 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric #ifdef _ARCH_PWR8 179681ad6265SDimitry Andric extern __inline __m128i 179781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi64(__m128i __A,__m128i __B)179881ad6265SDimitry Andric _mm_srl_epi64(__m128i __A, __m128i __B) { 179981ad6265SDimitry Andric __v2du __rshift; 180081ad6265SDimitry Andric __vector __bool long long __shmask; 180181ad6265SDimitry Andric const __v2du __shmax = {64, 64}; 180281ad6265SDimitry Andric __v2du __result; 18030b57cec5SDimitry Andric 180481ad6265SDimitry Andric __rshift = vec_splat((__v2du)__B, 0); 180581ad6265SDimitry Andric __shmask = vec_cmplt(__rshift, __shmax); 180681ad6265SDimitry Andric __result = vec_sr((__v2du)__A, __rshift); 180781ad6265SDimitry Andric __result = vec_sel((__v2du)__shmask, __result, __shmask); 18080b57cec5SDimitry Andric 180981ad6265SDimitry Andric return (__m128i)__result; 18100b57cec5SDimitry Andric } 18110b57cec5SDimitry Andric #endif 18120b57cec5SDimitry Andric 181381ad6265SDimitry Andric extern __inline __m128d 181481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_pd(__m128d __A,__m128d __B)181581ad6265SDimitry Andric _mm_and_pd(__m128d __A, __m128d __B) { 18160b57cec5SDimitry Andric return (vec_and((__v2df)__A, (__v2df)__B)); 18170b57cec5SDimitry Andric } 18180b57cec5SDimitry Andric 181981ad6265SDimitry Andric extern __inline __m128d 182081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_pd(__m128d __A,__m128d __B)182181ad6265SDimitry Andric _mm_andnot_pd(__m128d __A, __m128d __B) { 18220b57cec5SDimitry Andric return (vec_andc((__v2df)__B, (__v2df)__A)); 18230b57cec5SDimitry Andric } 18240b57cec5SDimitry Andric 182581ad6265SDimitry Andric extern __inline __m128d 182681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_pd(__m128d __A,__m128d __B)182781ad6265SDimitry Andric _mm_or_pd(__m128d __A, __m128d __B) { 18280b57cec5SDimitry Andric return (vec_or((__v2df)__A, (__v2df)__B)); 18290b57cec5SDimitry Andric } 18300b57cec5SDimitry Andric 183181ad6265SDimitry Andric extern __inline __m128d 183281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_pd(__m128d __A,__m128d __B)183381ad6265SDimitry Andric _mm_xor_pd(__m128d __A, __m128d __B) { 18340b57cec5SDimitry Andric return (vec_xor((__v2df)__A, (__v2df)__B)); 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric 183781ad6265SDimitry Andric extern __inline __m128i 183881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_si128(__m128i __A,__m128i __B)183981ad6265SDimitry Andric _mm_and_si128(__m128i __A, __m128i __B) { 18400b57cec5SDimitry Andric return (__m128i)vec_and((__v2di)__A, (__v2di)__B); 18410b57cec5SDimitry Andric } 18420b57cec5SDimitry Andric 184381ad6265SDimitry Andric extern __inline __m128i 184481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_si128(__m128i __A,__m128i __B)184581ad6265SDimitry Andric _mm_andnot_si128(__m128i __A, __m128i __B) { 18460b57cec5SDimitry Andric return (__m128i)vec_andc((__v2di)__B, (__v2di)__A); 18470b57cec5SDimitry Andric } 18480b57cec5SDimitry Andric 184981ad6265SDimitry Andric extern __inline __m128i 185081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_si128(__m128i __A,__m128i __B)185181ad6265SDimitry Andric _mm_or_si128(__m128i __A, __m128i __B) { 18520b57cec5SDimitry Andric return (__m128i)vec_or((__v2di)__A, (__v2di)__B); 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric 185581ad6265SDimitry Andric extern __inline __m128i 185681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_si128(__m128i __A,__m128i __B)185781ad6265SDimitry Andric _mm_xor_si128(__m128i __A, __m128i __B) { 18580b57cec5SDimitry Andric return (__m128i)vec_xor((__v2di)__A, (__v2di)__B); 18590b57cec5SDimitry Andric } 18600b57cec5SDimitry Andric 186181ad6265SDimitry Andric extern __inline __m128i 186281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi8(__m128i __A,__m128i __B)186381ad6265SDimitry Andric _mm_cmpeq_epi8(__m128i __A, __m128i __B) { 18640b57cec5SDimitry Andric return (__m128i)vec_cmpeq((__v16qi)__A, (__v16qi)__B); 18650b57cec5SDimitry Andric } 18660b57cec5SDimitry Andric 186781ad6265SDimitry Andric extern __inline __m128i 186881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi16(__m128i __A,__m128i __B)186981ad6265SDimitry Andric _mm_cmpeq_epi16(__m128i __A, __m128i __B) { 18700b57cec5SDimitry Andric return (__m128i)vec_cmpeq((__v8hi)__A, (__v8hi)__B); 18710b57cec5SDimitry Andric } 18720b57cec5SDimitry Andric 187381ad6265SDimitry Andric extern __inline __m128i 187481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi32(__m128i __A,__m128i __B)187581ad6265SDimitry Andric _mm_cmpeq_epi32(__m128i __A, __m128i __B) { 18760b57cec5SDimitry Andric return (__m128i)vec_cmpeq((__v4si)__A, (__v4si)__B); 18770b57cec5SDimitry Andric } 18780b57cec5SDimitry Andric 187981ad6265SDimitry Andric extern __inline __m128i 188081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi8(__m128i __A,__m128i __B)188181ad6265SDimitry Andric _mm_cmplt_epi8(__m128i __A, __m128i __B) { 18820b57cec5SDimitry Andric return (__m128i)vec_cmplt((__v16qi)__A, (__v16qi)__B); 18830b57cec5SDimitry Andric } 18840b57cec5SDimitry Andric 188581ad6265SDimitry Andric extern __inline __m128i 188681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi16(__m128i __A,__m128i __B)188781ad6265SDimitry Andric _mm_cmplt_epi16(__m128i __A, __m128i __B) { 18880b57cec5SDimitry Andric return (__m128i)vec_cmplt((__v8hi)__A, (__v8hi)__B); 18890b57cec5SDimitry Andric } 18900b57cec5SDimitry Andric 189181ad6265SDimitry Andric extern __inline __m128i 189281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi32(__m128i __A,__m128i __B)189381ad6265SDimitry Andric _mm_cmplt_epi32(__m128i __A, __m128i __B) { 18940b57cec5SDimitry Andric return (__m128i)vec_cmplt((__v4si)__A, (__v4si)__B); 18950b57cec5SDimitry Andric } 18960b57cec5SDimitry Andric 189781ad6265SDimitry Andric extern __inline __m128i 189881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi8(__m128i __A,__m128i __B)189981ad6265SDimitry Andric _mm_cmpgt_epi8(__m128i __A, __m128i __B) { 19000b57cec5SDimitry Andric return (__m128i)vec_cmpgt((__v16qi)__A, (__v16qi)__B); 19010b57cec5SDimitry Andric } 19020b57cec5SDimitry Andric 190381ad6265SDimitry Andric extern __inline __m128i 190481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi16(__m128i __A,__m128i __B)190581ad6265SDimitry Andric _mm_cmpgt_epi16(__m128i __A, __m128i __B) { 19060b57cec5SDimitry Andric return (__m128i)vec_cmpgt((__v8hi)__A, (__v8hi)__B); 19070b57cec5SDimitry Andric } 19080b57cec5SDimitry Andric 190981ad6265SDimitry Andric extern __inline __m128i 191081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi32(__m128i __A,__m128i __B)191181ad6265SDimitry Andric _mm_cmpgt_epi32(__m128i __A, __m128i __B) { 19120b57cec5SDimitry Andric return (__m128i)vec_cmpgt((__v4si)__A, (__v4si)__B); 19130b57cec5SDimitry Andric } 19140b57cec5SDimitry Andric 191581ad6265SDimitry Andric extern __inline int 191681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16(__m128i const __A,int const __N)191781ad6265SDimitry Andric _mm_extract_epi16(__m128i const __A, int const __N) { 19180b57cec5SDimitry Andric return (unsigned short)((__v8hi)__A)[__N & 7]; 19190b57cec5SDimitry Andric } 19200b57cec5SDimitry Andric 192181ad6265SDimitry Andric extern __inline __m128i 192281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi16(__m128i const __A,int const __D,int const __N)192381ad6265SDimitry Andric _mm_insert_epi16(__m128i const __A, int const __D, int const __N) { 192481ad6265SDimitry Andric __v8hi __result = (__v8hi)__A; 19250b57cec5SDimitry Andric 192681ad6265SDimitry Andric __result[(__N & 7)] = __D; 19270b57cec5SDimitry Andric 192881ad6265SDimitry Andric return (__m128i)__result; 19290b57cec5SDimitry Andric } 19300b57cec5SDimitry Andric 193181ad6265SDimitry Andric extern __inline __m128i 193281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi16(__m128i __A,__m128i __B)193381ad6265SDimitry Andric _mm_max_epi16(__m128i __A, __m128i __B) { 19340b57cec5SDimitry Andric return (__m128i)vec_max((__v8hi)__A, (__v8hi)__B); 19350b57cec5SDimitry Andric } 19360b57cec5SDimitry Andric 193781ad6265SDimitry Andric extern __inline __m128i 193881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu8(__m128i __A,__m128i __B)193981ad6265SDimitry Andric _mm_max_epu8(__m128i __A, __m128i __B) { 19400b57cec5SDimitry Andric return (__m128i)vec_max((__v16qu)__A, (__v16qu)__B); 19410b57cec5SDimitry Andric } 19420b57cec5SDimitry Andric 194381ad6265SDimitry Andric extern __inline __m128i 194481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi16(__m128i __A,__m128i __B)194581ad6265SDimitry Andric _mm_min_epi16(__m128i __A, __m128i __B) { 19460b57cec5SDimitry Andric return (__m128i)vec_min((__v8hi)__A, (__v8hi)__B); 19470b57cec5SDimitry Andric } 19480b57cec5SDimitry Andric 194981ad6265SDimitry Andric extern __inline __m128i 195081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu8(__m128i __A,__m128i __B)195181ad6265SDimitry Andric _mm_min_epu8(__m128i __A, __m128i __B) { 19520b57cec5SDimitry Andric return (__m128i)vec_min((__v16qu)__A, (__v16qu)__B); 19530b57cec5SDimitry Andric } 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric #ifdef _ARCH_PWR8 19560b57cec5SDimitry Andric /* Intrinsic functions that require PowerISA 2.07 minimum. */ 19570b57cec5SDimitry Andric 195881ad6265SDimitry Andric /* Return a mask created from the most significant bit of each 8-bit 195981ad6265SDimitry Andric element in A. */ 196081ad6265SDimitry Andric extern __inline int 196181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_epi8(__m128i __A)196281ad6265SDimitry Andric _mm_movemask_epi8(__m128i __A) { 196381ad6265SDimitry Andric #ifdef _ARCH_PWR10 196481ad6265SDimitry Andric return vec_extractm((__v16qu)__A); 196581ad6265SDimitry Andric #else 196681ad6265SDimitry Andric __vector unsigned long long __result; 196781ad6265SDimitry Andric static const __vector unsigned char __perm_mask = { 19680b57cec5SDimitry Andric 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 196981ad6265SDimitry Andric 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; 19700b57cec5SDimitry Andric 197181ad6265SDimitry Andric __result = ((__vector unsigned long long)vec_vbpermq( 197281ad6265SDimitry Andric (__vector unsigned char)__A, (__vector unsigned char)__perm_mask)); 19730b57cec5SDimitry Andric 19740b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 197581ad6265SDimitry Andric return __result[1]; 19760b57cec5SDimitry Andric #else 197781ad6265SDimitry Andric return __result[0]; 19780b57cec5SDimitry Andric #endif 197981ad6265SDimitry Andric #endif /* !_ARCH_PWR10 */ 19800b57cec5SDimitry Andric } 19810b57cec5SDimitry Andric #endif /* _ARCH_PWR8 */ 19820b57cec5SDimitry Andric 198381ad6265SDimitry Andric extern __inline __m128i 198481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epu16(__m128i __A,__m128i __B)198581ad6265SDimitry Andric _mm_mulhi_epu16(__m128i __A, __m128i __B) { 198681ad6265SDimitry Andric __v4su __w0, __w1; 198781ad6265SDimitry Andric __v16qu __xform1 = { 19880b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 198981ad6265SDimitry Andric 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 199081ad6265SDimitry Andric 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F 19910b57cec5SDimitry Andric #else 199281ad6265SDimitry Andric 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08, 199381ad6265SDimitry Andric 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D 19940b57cec5SDimitry Andric #endif 19950b57cec5SDimitry Andric }; 19960b57cec5SDimitry Andric 199781ad6265SDimitry Andric __w0 = vec_vmuleuh((__v8hu)__A, (__v8hu)__B); 199881ad6265SDimitry Andric __w1 = vec_vmulouh((__v8hu)__A, (__v8hu)__B); 199981ad6265SDimitry Andric return (__m128i)vec_perm(__w0, __w1, __xform1); 20000b57cec5SDimitry Andric } 20010b57cec5SDimitry Andric 200281ad6265SDimitry Andric extern __inline __m128i 200381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflehi_epi16(__m128i __A,const int __mask)200481ad6265SDimitry Andric _mm_shufflehi_epi16(__m128i __A, const int __mask) { 200581ad6265SDimitry Andric unsigned long __element_selector_98 = __mask & 0x03; 200681ad6265SDimitry Andric unsigned long __element_selector_BA = (__mask >> 2) & 0x03; 200781ad6265SDimitry Andric unsigned long __element_selector_DC = (__mask >> 4) & 0x03; 200881ad6265SDimitry Andric unsigned long __element_selector_FE = (__mask >> 6) & 0x03; 200981ad6265SDimitry Andric static const unsigned short __permute_selectors[4] = { 20100b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 20110b57cec5SDimitry Andric 0x0908, 0x0B0A, 0x0D0C, 0x0F0E 20120b57cec5SDimitry Andric #else 20130b57cec5SDimitry Andric 0x0809, 0x0A0B, 0x0C0D, 0x0E0F 20140b57cec5SDimitry Andric #endif 20150b57cec5SDimitry Andric }; 201681ad6265SDimitry Andric __v2du __pmask = 20170b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 20180b57cec5SDimitry Andric {0x1716151413121110UL, 0UL}; 20190b57cec5SDimitry Andric #else 20200b57cec5SDimitry Andric {0x1011121314151617UL, 0UL}; 20210b57cec5SDimitry Andric #endif 202281ad6265SDimitry Andric __m64_union __t; 202381ad6265SDimitry Andric __v2du __a, __r; 20240b57cec5SDimitry Andric 202581ad6265SDimitry Andric __t.as_short[0] = __permute_selectors[__element_selector_98]; 202681ad6265SDimitry Andric __t.as_short[1] = __permute_selectors[__element_selector_BA]; 202781ad6265SDimitry Andric __t.as_short[2] = __permute_selectors[__element_selector_DC]; 202881ad6265SDimitry Andric __t.as_short[3] = __permute_selectors[__element_selector_FE]; 202981ad6265SDimitry Andric __pmask[1] = __t.as_m64; 203081ad6265SDimitry Andric __a = (__v2du)__A; 203181ad6265SDimitry Andric __r = vec_perm(__a, __a, (__vector unsigned char)__pmask); 203281ad6265SDimitry Andric return (__m128i)__r; 20330b57cec5SDimitry Andric } 20340b57cec5SDimitry Andric 203581ad6265SDimitry Andric extern __inline __m128i 203681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflelo_epi16(__m128i __A,const int __mask)203781ad6265SDimitry Andric _mm_shufflelo_epi16(__m128i __A, const int __mask) { 203881ad6265SDimitry Andric unsigned long __element_selector_10 = __mask & 0x03; 203981ad6265SDimitry Andric unsigned long __element_selector_32 = (__mask >> 2) & 0x03; 204081ad6265SDimitry Andric unsigned long __element_selector_54 = (__mask >> 4) & 0x03; 204181ad6265SDimitry Andric unsigned long __element_selector_76 = (__mask >> 6) & 0x03; 204281ad6265SDimitry Andric static const unsigned short __permute_selectors[4] = { 20430b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 20440b57cec5SDimitry Andric 0x0100, 0x0302, 0x0504, 0x0706 20450b57cec5SDimitry Andric #else 20460b57cec5SDimitry Andric 0x0001, 0x0203, 0x0405, 0x0607 20470b57cec5SDimitry Andric #endif 20480b57cec5SDimitry Andric }; 204981ad6265SDimitry Andric __v2du __pmask = 20500b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 20510b57cec5SDimitry Andric {0UL, 0x1f1e1d1c1b1a1918UL}; 20520b57cec5SDimitry Andric #else 20530b57cec5SDimitry Andric {0UL, 0x18191a1b1c1d1e1fUL}; 20540b57cec5SDimitry Andric #endif 205581ad6265SDimitry Andric __m64_union __t; 205681ad6265SDimitry Andric __v2du __a, __r; 205781ad6265SDimitry Andric __t.as_short[0] = __permute_selectors[__element_selector_10]; 205881ad6265SDimitry Andric __t.as_short[1] = __permute_selectors[__element_selector_32]; 205981ad6265SDimitry Andric __t.as_short[2] = __permute_selectors[__element_selector_54]; 206081ad6265SDimitry Andric __t.as_short[3] = __permute_selectors[__element_selector_76]; 206181ad6265SDimitry Andric __pmask[0] = __t.as_m64; 206281ad6265SDimitry Andric __a = (__v2du)__A; 206381ad6265SDimitry Andric __r = vec_perm(__a, __a, (__vector unsigned char)__pmask); 206481ad6265SDimitry Andric return (__m128i)__r; 20650b57cec5SDimitry Andric } 20660b57cec5SDimitry Andric 206781ad6265SDimitry Andric extern __inline __m128i 206881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi32(__m128i __A,const int __mask)206981ad6265SDimitry Andric _mm_shuffle_epi32(__m128i __A, const int __mask) { 207081ad6265SDimitry Andric unsigned long __element_selector_10 = __mask & 0x03; 207181ad6265SDimitry Andric unsigned long __element_selector_32 = (__mask >> 2) & 0x03; 207281ad6265SDimitry Andric unsigned long __element_selector_54 = (__mask >> 4) & 0x03; 207381ad6265SDimitry Andric unsigned long __element_selector_76 = (__mask >> 6) & 0x03; 207481ad6265SDimitry Andric static const unsigned int __permute_selectors[4] = { 20750b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__ 20760b57cec5SDimitry Andric 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C 20770b57cec5SDimitry Andric #else 20780b57cec5SDimitry Andric 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F 20790b57cec5SDimitry Andric #endif 20800b57cec5SDimitry Andric }; 208181ad6265SDimitry Andric __v4su __t; 20820b57cec5SDimitry Andric 208381ad6265SDimitry Andric __t[0] = __permute_selectors[__element_selector_10]; 208481ad6265SDimitry Andric __t[1] = __permute_selectors[__element_selector_32]; 208581ad6265SDimitry Andric __t[2] = __permute_selectors[__element_selector_54] + 0x10101010; 208681ad6265SDimitry Andric __t[3] = __permute_selectors[__element_selector_76] + 0x10101010; 208781ad6265SDimitry Andric return (__m128i)vec_perm((__v4si)__A, (__v4si)__A, 208881ad6265SDimitry Andric (__vector unsigned char)__t); 20890b57cec5SDimitry Andric } 20900b57cec5SDimitry Andric 209181ad6265SDimitry Andric extern __inline void 209281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmoveu_si128(__m128i __A,__m128i __B,char * __C)209381ad6265SDimitry Andric _mm_maskmoveu_si128(__m128i __A, __m128i __B, char *__C) { 209481ad6265SDimitry Andric __v2du __hibit = {0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL}; 209581ad6265SDimitry Andric __v16qu __mask, __tmp; 209681ad6265SDimitry Andric __m128i_u *__p = (__m128i_u *)__C; 20970b57cec5SDimitry Andric 209881ad6265SDimitry Andric __tmp = (__v16qu)_mm_loadu_si128(__p); 209981ad6265SDimitry Andric __mask = (__v16qu)vec_cmpgt((__v16qu)__B, (__v16qu)__hibit); 210081ad6265SDimitry Andric __tmp = vec_sel(__tmp, (__v16qu)__A, __mask); 210181ad6265SDimitry Andric _mm_storeu_si128(__p, (__m128i)__tmp); 21020b57cec5SDimitry Andric } 21030b57cec5SDimitry Andric 210481ad6265SDimitry Andric extern __inline __m128i 210581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_epu8(__m128i __A,__m128i __B)210681ad6265SDimitry Andric _mm_avg_epu8(__m128i __A, __m128i __B) { 21070b57cec5SDimitry Andric return (__m128i)vec_avg((__v16qu)__A, (__v16qu)__B); 21080b57cec5SDimitry Andric } 21090b57cec5SDimitry Andric 211081ad6265SDimitry Andric extern __inline __m128i 211181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_epu16(__m128i __A,__m128i __B)211281ad6265SDimitry Andric _mm_avg_epu16(__m128i __A, __m128i __B) { 21130b57cec5SDimitry Andric return (__m128i)vec_avg((__v8hu)__A, (__v8hu)__B); 21140b57cec5SDimitry Andric } 21150b57cec5SDimitry Andric 211681ad6265SDimitry Andric extern __inline __m128i 211781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_epu8(__m128i __A,__m128i __B)211881ad6265SDimitry Andric _mm_sad_epu8(__m128i __A, __m128i __B) { 211981ad6265SDimitry Andric __v16qu __a, __b; 212081ad6265SDimitry Andric __v16qu __vabsdiff; 212181ad6265SDimitry Andric __v4si __vsum; 212281ad6265SDimitry Andric const __v4su __zero = {0, 0, 0, 0}; 212381ad6265SDimitry Andric __v4si __result; 21240b57cec5SDimitry Andric 212581ad6265SDimitry Andric __a = (__v16qu)__A; 212681ad6265SDimitry Andric __b = (__v16qu)__B; 212781ad6265SDimitry Andric #ifndef _ARCH_PWR9 212881ad6265SDimitry Andric __v16qu __vmin = vec_min(__a, __b); 212981ad6265SDimitry Andric __v16qu __vmax = vec_max(__a, __b); 213081ad6265SDimitry Andric __vabsdiff = vec_sub(__vmax, __vmin); 21310b57cec5SDimitry Andric #else 213281ad6265SDimitry Andric __vabsdiff = vec_absd(__a, __b); 21330b57cec5SDimitry Andric #endif 213481ad6265SDimitry Andric /* Sum four groups of bytes into integers. */ 213581ad6265SDimitry Andric __vsum = (__vector signed int)vec_sum4s(__vabsdiff, __zero); 213681ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 213781ad6265SDimitry Andric /* Sum across four integers with two integer results. */ 213881ad6265SDimitry Andric __asm__("vsum2sws %0,%1,%2" : "=v"(__result) : "v"(__vsum), "v"(__zero)); 213981ad6265SDimitry Andric /* Note: vec_sum2s could be used here, but on little-endian, vector 214081ad6265SDimitry Andric shifts are added that are not needed for this use-case. 214181ad6265SDimitry Andric A vector shift to correctly position the 32-bit integer results 214281ad6265SDimitry Andric (currently at [0] and [2]) to [1] and [3] would then need to be 214381ad6265SDimitry Andric swapped back again since the desired results are two 64-bit 214481ad6265SDimitry Andric integers ([1]|[0] and [3]|[2]). Thus, no shift is performed. */ 214581ad6265SDimitry Andric #else 214681ad6265SDimitry Andric /* Sum across four integers with two integer results. */ 214781ad6265SDimitry Andric __result = vec_sum2s(__vsum, (__vector signed int)__zero); 21480b57cec5SDimitry Andric /* Rotate the sums into the correct position. */ 214981ad6265SDimitry Andric __result = vec_sld(__result, __result, 6); 215081ad6265SDimitry Andric #endif 215181ad6265SDimitry Andric return (__m128i)__result; 21520b57cec5SDimitry Andric } 21530b57cec5SDimitry Andric 215481ad6265SDimitry Andric extern __inline void 215581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si32(int * __A,int __B)215681ad6265SDimitry Andric _mm_stream_si32(int *__A, int __B) { 21570b57cec5SDimitry Andric /* Use the data cache block touch for store transient. */ 215881ad6265SDimitry Andric __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory"); 21590b57cec5SDimitry Andric *__A = __B; 21600b57cec5SDimitry Andric } 21610b57cec5SDimitry Andric 216281ad6265SDimitry Andric extern __inline void 216381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si64(long long int * __A,long long int __B)216481ad6265SDimitry Andric _mm_stream_si64(long long int *__A, long long int __B) { 21650b57cec5SDimitry Andric /* Use the data cache block touch for store transient. */ 216681ad6265SDimitry Andric __asm__(" dcbtstt 0,%0" : : "b"(__A) : "memory"); 21670b57cec5SDimitry Andric *__A = __B; 21680b57cec5SDimitry Andric } 21690b57cec5SDimitry Andric 217081ad6265SDimitry Andric extern __inline void 217181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si128(__m128i * __A,__m128i __B)217281ad6265SDimitry Andric _mm_stream_si128(__m128i *__A, __m128i __B) { 21730b57cec5SDimitry Andric /* Use the data cache block touch for store transient. */ 217481ad6265SDimitry Andric __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory"); 21750b57cec5SDimitry Andric *__A = __B; 21760b57cec5SDimitry Andric } 21770b57cec5SDimitry Andric 217881ad6265SDimitry Andric extern __inline void 217981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pd(double * __A,__m128d __B)218081ad6265SDimitry Andric _mm_stream_pd(double *__A, __m128d __B) { 21810b57cec5SDimitry Andric /* Use the data cache block touch for store transient. */ 218281ad6265SDimitry Andric __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory"); 21830b57cec5SDimitry Andric *(__m128d *)__A = __B; 21840b57cec5SDimitry Andric } 21850b57cec5SDimitry Andric 218681ad6265SDimitry Andric extern __inline void 218781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_clflush(void const * __A)218881ad6265SDimitry Andric _mm_clflush(void const *__A) { 21890b57cec5SDimitry Andric /* Use the data cache block flush. */ 219081ad6265SDimitry Andric __asm__("dcbf 0,%0" : : "b"(__A) : "memory"); 21910b57cec5SDimitry Andric } 21920b57cec5SDimitry Andric 219381ad6265SDimitry Andric extern __inline void 219481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_lfence(void)219581ad6265SDimitry Andric _mm_lfence(void) { 21960b57cec5SDimitry Andric /* Use light weight sync for load to load ordering. */ 21970b57cec5SDimitry Andric __atomic_thread_fence(__ATOMIC_RELEASE); 21980b57cec5SDimitry Andric } 21990b57cec5SDimitry Andric 220081ad6265SDimitry Andric extern __inline void 220181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mfence(void)220281ad6265SDimitry Andric _mm_mfence(void) { 22030b57cec5SDimitry Andric /* Use heavy weight sync for any to any ordering. */ 22040b57cec5SDimitry Andric __atomic_thread_fence(__ATOMIC_SEQ_CST); 22050b57cec5SDimitry Andric } 22060b57cec5SDimitry Andric 220781ad6265SDimitry Andric extern __inline __m128i 220881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_si128(int __A)220981ad6265SDimitry Andric _mm_cvtsi32_si128(int __A) { 22100b57cec5SDimitry Andric return _mm_set_epi32(0, 0, 0, __A); 22110b57cec5SDimitry Andric } 22120b57cec5SDimitry Andric 221381ad6265SDimitry Andric extern __inline __m128i 221481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si128(long long __A)221581ad6265SDimitry Andric _mm_cvtsi64_si128(long long __A) { 22160b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__A, 0LL}; 22170b57cec5SDimitry Andric } 22180b57cec5SDimitry Andric 22190b57cec5SDimitry Andric /* Microsoft intrinsic. */ 222081ad6265SDimitry Andric extern __inline __m128i 222181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_si128(long long __A)222281ad6265SDimitry Andric _mm_cvtsi64x_si128(long long __A) { 22230b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__A, 0LL}; 22240b57cec5SDimitry Andric } 22250b57cec5SDimitry Andric 22260b57cec5SDimitry Andric /* Casts between various SP, DP, INT vector types. Note that these do no 22270b57cec5SDimitry Andric conversion of values, they just change the type. */ 222881ad6265SDimitry Andric extern __inline __m128 222981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_ps(__m128d __A)223081ad6265SDimitry Andric _mm_castpd_ps(__m128d __A) { 22310b57cec5SDimitry Andric return (__m128)__A; 22320b57cec5SDimitry Andric } 22330b57cec5SDimitry Andric 223481ad6265SDimitry Andric extern __inline __m128i 223581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_si128(__m128d __A)223681ad6265SDimitry Andric _mm_castpd_si128(__m128d __A) { 22370b57cec5SDimitry Andric return (__m128i)__A; 22380b57cec5SDimitry Andric } 22390b57cec5SDimitry Andric 224081ad6265SDimitry Andric extern __inline __m128d 224181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_pd(__m128 __A)224281ad6265SDimitry Andric _mm_castps_pd(__m128 __A) { 22430b57cec5SDimitry Andric return (__m128d)__A; 22440b57cec5SDimitry Andric } 22450b57cec5SDimitry Andric 224681ad6265SDimitry Andric extern __inline __m128i 224781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_si128(__m128 __A)224881ad6265SDimitry Andric _mm_castps_si128(__m128 __A) { 22490b57cec5SDimitry Andric return (__m128i)__A; 22500b57cec5SDimitry Andric } 22510b57cec5SDimitry Andric 225281ad6265SDimitry Andric extern __inline __m128 225381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_ps(__m128i __A)225481ad6265SDimitry Andric _mm_castsi128_ps(__m128i __A) { 22550b57cec5SDimitry Andric return (__m128)__A; 22560b57cec5SDimitry Andric } 22570b57cec5SDimitry Andric 225881ad6265SDimitry Andric extern __inline __m128d 225981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_pd(__m128i __A)226081ad6265SDimitry Andric _mm_castsi128_pd(__m128i __A) { 22610b57cec5SDimitry Andric return (__m128d)__A; 22620b57cec5SDimitry Andric } 22630b57cec5SDimitry Andric 2264a7dea167SDimitry Andric #else 2265a7dea167SDimitry Andric #include_next <emmintrin.h> 2266bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) && \ 2267fcaf7f86SDimitry Andric * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ 2268a7dea167SDimitry Andric 22690b57cec5SDimitry Andric #endif /* EMMINTRIN_H_ */ 2270