xref: /freebsd/contrib/llvm-project/clang/lib/Headers/ppc_wrappers/emmintrin.h (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
10b57cec5SDimitry Andric /*===---- emmintrin.h - Implementation of SSE2 intrinsics on PowerPC -------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler
110b57cec5SDimitry Andric    User Guide and Reference, version 9.0.  */
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS
140b57cec5SDimitry Andric /* This header file is to help porting code using Intel intrinsics
150b57cec5SDimitry Andric    explicitly from x86_64 to powerpc64/powerpc64le.
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric    Since X86 SSE2 intrinsics mainly handles __m128i and __m128d type,
180b57cec5SDimitry Andric    PowerPC VMX/VSX ISA is a good match for vector float SIMD operations.
190b57cec5SDimitry Andric    However scalar float operations in vector (XMM) registers require
200b57cec5SDimitry Andric    the POWER8 VSX ISA (2.07) level. There are differences for data
210b57cec5SDimitry Andric    format and placement of float scalars in the vector register, which
220b57cec5SDimitry Andric    require extra steps to match SSE2 scalar float semantics on POWER.
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric    It should be noted that there's much difference between X86_64's
250b57cec5SDimitry Andric    MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use
260b57cec5SDimitry Andric    portable <fenv.h> instead of access MXSCR directly.
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric    Most SSE2 scalar float intrinsic operations can be performed more
290b57cec5SDimitry Andric    efficiently as C language float scalar operations or optimized to
300b57cec5SDimitry Andric    use vector SIMD operations. We recommend this for new applications.
310b57cec5SDimitry Andric */
3281ad6265SDimitry Andric #error                                                                         \
3381ad6265SDimitry Andric     "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
340b57cec5SDimitry Andric #endif
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric #ifndef EMMINTRIN_H_
370b57cec5SDimitry Andric #define EMMINTRIN_H_
380b57cec5SDimitry Andric 
39bdd1243dSDimitry Andric #if defined(__powerpc64__) &&                                                  \
40fcaf7f86SDimitry Andric     (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
41a7dea167SDimitry Andric 
420b57cec5SDimitry Andric #include <altivec.h>
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric /* We need definitions from the SSE header files.  */
450b57cec5SDimitry Andric #include <xmmintrin.h>
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric /* SSE2 */
480b57cec5SDimitry Andric typedef __vector double __v2df;
49*06c3fb27SDimitry Andric typedef __vector float __v4f;
500b57cec5SDimitry Andric typedef __vector long long __v2di;
510b57cec5SDimitry Andric typedef __vector unsigned long long __v2du;
520b57cec5SDimitry Andric typedef __vector int __v4si;
530b57cec5SDimitry Andric typedef __vector unsigned int __v4su;
540b57cec5SDimitry Andric typedef __vector short __v8hi;
550b57cec5SDimitry Andric typedef __vector unsigned short __v8hu;
560b57cec5SDimitry Andric typedef __vector signed char __v16qi;
570b57cec5SDimitry Andric typedef __vector unsigned char __v16qu;
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric /* The Intel API is flexible enough that we must allow aliasing with other
600b57cec5SDimitry Andric    vector types, and their scalar components.  */
610b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__));
620b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __may_alias__));
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric /* Unaligned version of the same types.  */
6581ad6265SDimitry Andric typedef long long __m128i_u
6681ad6265SDimitry Andric     __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
6781ad6265SDimitry Andric typedef double __m128d_u
6881ad6265SDimitry Andric     __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric /* Define two value permute mask.  */
710b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric /* Create a vector with element 0 as F and the rest zero.  */
7481ad6265SDimitry Andric extern __inline __m128d
7581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_sd(double __F)7681ad6265SDimitry Andric     _mm_set_sd(double __F) {
770b57cec5SDimitry Andric   return __extension__(__m128d){__F, 0.0};
780b57cec5SDimitry Andric }
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric /* Create a vector with both elements equal to F.  */
8181ad6265SDimitry Andric extern __inline __m128d
8281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pd(double __F)8381ad6265SDimitry Andric     _mm_set1_pd(double __F) {
840b57cec5SDimitry Andric   return __extension__(__m128d){__F, __F};
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric 
8781ad6265SDimitry Andric extern __inline __m128d
8881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd1(double __F)8981ad6265SDimitry Andric     _mm_set_pd1(double __F) {
900b57cec5SDimitry Andric   return _mm_set1_pd(__F);
910b57cec5SDimitry Andric }
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric /* Create a vector with the lower value X and upper value W.  */
9481ad6265SDimitry Andric extern __inline __m128d
9581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd(double __W,double __X)9681ad6265SDimitry Andric     _mm_set_pd(double __W, double __X) {
970b57cec5SDimitry Andric   return __extension__(__m128d){__X, __W};
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric /* Create a vector with the lower value W and upper value X.  */
10181ad6265SDimitry Andric extern __inline __m128d
10281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pd(double __W,double __X)10381ad6265SDimitry Andric     _mm_setr_pd(double __W, double __X) {
1040b57cec5SDimitry Andric   return __extension__(__m128d){__W, __X};
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric /* Create an undefined vector.  */
10881ad6265SDimitry Andric extern __inline __m128d
10981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_pd(void)11081ad6265SDimitry Andric     _mm_undefined_pd(void) {
1110b57cec5SDimitry Andric   __m128d __Y = __Y;
1120b57cec5SDimitry Andric   return __Y;
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric /* Create a vector of zeros.  */
11681ad6265SDimitry Andric extern __inline __m128d
11781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_pd(void)11881ad6265SDimitry Andric     _mm_setzero_pd(void) {
1190b57cec5SDimitry Andric   return (__m128d)vec_splats(0);
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric /* Sets the low DPFP value of A from the low value of B.  */
12381ad6265SDimitry Andric extern __inline __m128d
12481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sd(__m128d __A,__m128d __B)12581ad6265SDimitry Andric     _mm_move_sd(__m128d __A, __m128d __B) {
12681ad6265SDimitry Andric   __v2df __result = (__v2df)__A;
12781ad6265SDimitry Andric   __result[0] = ((__v2df)__B)[0];
12881ad6265SDimitry Andric   return (__m128d)__result;
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric /* Load two DPFP values from P.  The address must be 16-byte aligned.  */
13281ad6265SDimitry Andric extern __inline __m128d
13381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd(double const * __P)13481ad6265SDimitry Andric     _mm_load_pd(double const *__P) {
1350b57cec5SDimitry Andric   return ((__m128d)vec_ld(0, (__v16qu *)__P));
1360b57cec5SDimitry Andric }
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric /* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
13981ad6265SDimitry Andric extern __inline __m128d
14081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_pd(double const * __P)14181ad6265SDimitry Andric     _mm_loadu_pd(double const *__P) {
1420b57cec5SDimitry Andric   return (vec_vsx_ld(0, __P));
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric /* Create a vector with all two elements equal to *P.  */
14681ad6265SDimitry Andric extern __inline __m128d
14781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load1_pd(double const * __P)14881ad6265SDimitry Andric     _mm_load1_pd(double const *__P) {
1490b57cec5SDimitry Andric   return (vec_splats(*__P));
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric /* Create a vector with element 0 as *P and the rest zero.  */
15381ad6265SDimitry Andric extern __inline __m128d
15481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sd(double const * __P)15581ad6265SDimitry Andric     _mm_load_sd(double const *__P) {
1560b57cec5SDimitry Andric   return _mm_set_sd(*__P);
1570b57cec5SDimitry Andric }
1580b57cec5SDimitry Andric 
15981ad6265SDimitry Andric extern __inline __m128d
16081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd1(double const * __P)16181ad6265SDimitry Andric     _mm_load_pd1(double const *__P) {
1620b57cec5SDimitry Andric   return _mm_load1_pd(__P);
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric /* Load two DPFP values in reverse order.  The address must be aligned.  */
16681ad6265SDimitry Andric extern __inline __m128d
16781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadr_pd(double const * __P)16881ad6265SDimitry Andric     _mm_loadr_pd(double const *__P) {
1690b57cec5SDimitry Andric   __v2df __tmp = _mm_load_pd(__P);
1700b57cec5SDimitry Andric   return (__m128d)vec_xxpermdi(__tmp, __tmp, 2);
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric /* Store two DPFP values.  The address must be 16-byte aligned.  */
17481ad6265SDimitry Andric extern __inline void
17581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd(double * __P,__m128d __A)17681ad6265SDimitry Andric     _mm_store_pd(double *__P, __m128d __A) {
1770b57cec5SDimitry Andric   vec_st((__v16qu)__A, 0, (__v16qu *)__P);
1780b57cec5SDimitry Andric }
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric /* Store two DPFP values.  The address need not be 16-byte aligned.  */
18181ad6265SDimitry Andric extern __inline void
18281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_pd(double * __P,__m128d __A)18381ad6265SDimitry Andric     _mm_storeu_pd(double *__P, __m128d __A) {
1840b57cec5SDimitry Andric   *(__m128d_u *)__P = __A;
1850b57cec5SDimitry Andric }
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric /* Stores the lower DPFP value.  */
18881ad6265SDimitry Andric extern __inline void
18981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sd(double * __P,__m128d __A)19081ad6265SDimitry Andric     _mm_store_sd(double *__P, __m128d __A) {
1910b57cec5SDimitry Andric   *__P = ((__v2df)__A)[0];
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric 
19481ad6265SDimitry Andric extern __inline double
19581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_f64(__m128d __A)19681ad6265SDimitry Andric     _mm_cvtsd_f64(__m128d __A) {
1970b57cec5SDimitry Andric   return ((__v2df)__A)[0];
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric 
20081ad6265SDimitry Andric extern __inline void
20181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pd(double * __P,__m128d __A)20281ad6265SDimitry Andric     _mm_storel_pd(double *__P, __m128d __A) {
2030b57cec5SDimitry Andric   _mm_store_sd(__P, __A);
2040b57cec5SDimitry Andric }
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric /* Stores the upper DPFP value.  */
20781ad6265SDimitry Andric extern __inline void
20881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pd(double * __P,__m128d __A)20981ad6265SDimitry Andric     _mm_storeh_pd(double *__P, __m128d __A) {
2100b57cec5SDimitry Andric   *__P = ((__v2df)__A)[1];
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric /* Store the lower DPFP value across two words.
2130b57cec5SDimitry Andric    The address must be 16-byte aligned.  */
21481ad6265SDimitry Andric extern __inline void
21581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store1_pd(double * __P,__m128d __A)21681ad6265SDimitry Andric     _mm_store1_pd(double *__P, __m128d __A) {
2170b57cec5SDimitry Andric   _mm_store_pd(__P, vec_splat(__A, 0));
2180b57cec5SDimitry Andric }
2190b57cec5SDimitry Andric 
22081ad6265SDimitry Andric extern __inline void
22181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd1(double * __P,__m128d __A)22281ad6265SDimitry Andric     _mm_store_pd1(double *__P, __m128d __A) {
2230b57cec5SDimitry Andric   _mm_store1_pd(__P, __A);
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric /* Store two DPFP values in reverse order.  The address must be aligned.  */
22781ad6265SDimitry Andric extern __inline void
22881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storer_pd(double * __P,__m128d __A)22981ad6265SDimitry Andric     _mm_storer_pd(double *__P, __m128d __A) {
2300b57cec5SDimitry Andric   _mm_store_pd(__P, vec_xxpermdi(__A, __A, 2));
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric /* Intel intrinsic.  */
23481ad6265SDimitry Andric extern __inline long long
23581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64(__m128i __A)23681ad6265SDimitry Andric     _mm_cvtsi128_si64(__m128i __A) {
2370b57cec5SDimitry Andric   return ((__v2di)__A)[0];
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric /* Microsoft intrinsic.  */
24181ad6265SDimitry Andric extern __inline long long
24281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64x(__m128i __A)24381ad6265SDimitry Andric     _mm_cvtsi128_si64x(__m128i __A) {
2440b57cec5SDimitry Andric   return ((__v2di)__A)[0];
2450b57cec5SDimitry Andric }
2460b57cec5SDimitry Andric 
24781ad6265SDimitry Andric extern __inline __m128d
24881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pd(__m128d __A,__m128d __B)24981ad6265SDimitry Andric     _mm_add_pd(__m128d __A, __m128d __B) {
2500b57cec5SDimitry Andric   return (__m128d)((__v2df)__A + (__v2df)__B);
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric /* Add the lower double-precision (64-bit) floating-point element in
2540b57cec5SDimitry Andric    a and b, store the result in the lower element of dst, and copy
2550b57cec5SDimitry Andric    the upper element from a to the upper element of dst. */
25681ad6265SDimitry Andric extern __inline __m128d
25781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_sd(__m128d __A,__m128d __B)25881ad6265SDimitry Andric     _mm_add_sd(__m128d __A, __m128d __B) {
2590b57cec5SDimitry Andric   __A[0] = __A[0] + __B[0];
2600b57cec5SDimitry Andric   return (__A);
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
26381ad6265SDimitry Andric extern __inline __m128d
26481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pd(__m128d __A,__m128d __B)26581ad6265SDimitry Andric     _mm_sub_pd(__m128d __A, __m128d __B) {
2660b57cec5SDimitry Andric   return (__m128d)((__v2df)__A - (__v2df)__B);
2670b57cec5SDimitry Andric }
2680b57cec5SDimitry Andric 
26981ad6265SDimitry Andric extern __inline __m128d
27081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_sd(__m128d __A,__m128d __B)27181ad6265SDimitry Andric     _mm_sub_sd(__m128d __A, __m128d __B) {
2720b57cec5SDimitry Andric   __A[0] = __A[0] - __B[0];
2730b57cec5SDimitry Andric   return (__A);
2740b57cec5SDimitry Andric }
2750b57cec5SDimitry Andric 
27681ad6265SDimitry Andric extern __inline __m128d
27781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_pd(__m128d __A,__m128d __B)27881ad6265SDimitry Andric     _mm_mul_pd(__m128d __A, __m128d __B) {
2790b57cec5SDimitry Andric   return (__m128d)((__v2df)__A * (__v2df)__B);
2800b57cec5SDimitry Andric }
2810b57cec5SDimitry Andric 
28281ad6265SDimitry Andric extern __inline __m128d
28381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_sd(__m128d __A,__m128d __B)28481ad6265SDimitry Andric     _mm_mul_sd(__m128d __A, __m128d __B) {
2850b57cec5SDimitry Andric   __A[0] = __A[0] * __B[0];
2860b57cec5SDimitry Andric   return (__A);
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric 
28981ad6265SDimitry Andric extern __inline __m128d
29081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_pd(__m128d __A,__m128d __B)29181ad6265SDimitry Andric     _mm_div_pd(__m128d __A, __m128d __B) {
2920b57cec5SDimitry Andric   return (__m128d)((__v2df)__A / (__v2df)__B);
2930b57cec5SDimitry Andric }
2940b57cec5SDimitry Andric 
29581ad6265SDimitry Andric extern __inline __m128d
29681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_sd(__m128d __A,__m128d __B)29781ad6265SDimitry Andric     _mm_div_sd(__m128d __A, __m128d __B) {
2980b57cec5SDimitry Andric   __A[0] = __A[0] / __B[0];
2990b57cec5SDimitry Andric   return (__A);
3000b57cec5SDimitry Andric }
3010b57cec5SDimitry Andric 
30281ad6265SDimitry Andric extern __inline __m128d
30381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_pd(__m128d __A)30481ad6265SDimitry Andric     _mm_sqrt_pd(__m128d __A) {
3050b57cec5SDimitry Andric   return (vec_sqrt(__A));
3060b57cec5SDimitry Andric }
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric /* Return pair {sqrt (B[0]), A[1]}.  */
30981ad6265SDimitry Andric extern __inline __m128d
31081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_sd(__m128d __A,__m128d __B)31181ad6265SDimitry Andric     _mm_sqrt_sd(__m128d __A, __m128d __B) {
31281ad6265SDimitry Andric   __v2df __c;
31381ad6265SDimitry Andric   __c = vec_sqrt((__v2df)_mm_set1_pd(__B[0]));
31481ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric 
31781ad6265SDimitry Andric extern __inline __m128d
31881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pd(__m128d __A,__m128d __B)31981ad6265SDimitry Andric     _mm_min_pd(__m128d __A, __m128d __B) {
3200b57cec5SDimitry Andric   return (vec_min(__A, __B));
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric 
32381ad6265SDimitry Andric extern __inline __m128d
32481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_sd(__m128d __A,__m128d __B)32581ad6265SDimitry Andric     _mm_min_sd(__m128d __A, __m128d __B) {
32681ad6265SDimitry Andric   __v2df __a, __b, __c;
32781ad6265SDimitry Andric   __a = vec_splats(__A[0]);
32881ad6265SDimitry Andric   __b = vec_splats(__B[0]);
32981ad6265SDimitry Andric   __c = vec_min(__a, __b);
33081ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
3310b57cec5SDimitry Andric }
3320b57cec5SDimitry Andric 
33381ad6265SDimitry Andric extern __inline __m128d
33481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pd(__m128d __A,__m128d __B)33581ad6265SDimitry Andric     _mm_max_pd(__m128d __A, __m128d __B) {
3360b57cec5SDimitry Andric   return (vec_max(__A, __B));
3370b57cec5SDimitry Andric }
3380b57cec5SDimitry Andric 
33981ad6265SDimitry Andric extern __inline __m128d
34081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_sd(__m128d __A,__m128d __B)34181ad6265SDimitry Andric     _mm_max_sd(__m128d __A, __m128d __B) {
34281ad6265SDimitry Andric   __v2df __a, __b, __c;
34381ad6265SDimitry Andric   __a = vec_splats(__A[0]);
34481ad6265SDimitry Andric   __b = vec_splats(__B[0]);
34581ad6265SDimitry Andric   __c = vec_max(__a, __b);
34681ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric 
34981ad6265SDimitry Andric extern __inline __m128d
35081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pd(__m128d __A,__m128d __B)35181ad6265SDimitry Andric     _mm_cmpeq_pd(__m128d __A, __m128d __B) {
3520b57cec5SDimitry Andric   return ((__m128d)vec_cmpeq((__v2df)__A, (__v2df)__B));
3530b57cec5SDimitry Andric }
3540b57cec5SDimitry Andric 
35581ad6265SDimitry Andric extern __inline __m128d
35681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_pd(__m128d __A,__m128d __B)35781ad6265SDimitry Andric     _mm_cmplt_pd(__m128d __A, __m128d __B) {
3580b57cec5SDimitry Andric   return ((__m128d)vec_cmplt((__v2df)__A, (__v2df)__B));
3590b57cec5SDimitry Andric }
3600b57cec5SDimitry Andric 
36181ad6265SDimitry Andric extern __inline __m128d
36281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_pd(__m128d __A,__m128d __B)36381ad6265SDimitry Andric     _mm_cmple_pd(__m128d __A, __m128d __B) {
3640b57cec5SDimitry Andric   return ((__m128d)vec_cmple((__v2df)__A, (__v2df)__B));
3650b57cec5SDimitry Andric }
3660b57cec5SDimitry Andric 
36781ad6265SDimitry Andric extern __inline __m128d
36881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pd(__m128d __A,__m128d __B)36981ad6265SDimitry Andric     _mm_cmpgt_pd(__m128d __A, __m128d __B) {
3700b57cec5SDimitry Andric   return ((__m128d)vec_cmpgt((__v2df)__A, (__v2df)__B));
3710b57cec5SDimitry Andric }
3720b57cec5SDimitry Andric 
37381ad6265SDimitry Andric extern __inline __m128d
37481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_pd(__m128d __A,__m128d __B)37581ad6265SDimitry Andric     _mm_cmpge_pd(__m128d __A, __m128d __B) {
3760b57cec5SDimitry Andric   return ((__m128d)vec_cmpge((__v2df)__A, (__v2df)__B));
3770b57cec5SDimitry Andric }
3780b57cec5SDimitry Andric 
37981ad6265SDimitry Andric extern __inline __m128d
38081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_pd(__m128d __A,__m128d __B)38181ad6265SDimitry Andric     _mm_cmpneq_pd(__m128d __A, __m128d __B) {
38281ad6265SDimitry Andric   __v2df __temp = (__v2df)vec_cmpeq((__v2df)__A, (__v2df)__B);
38381ad6265SDimitry Andric   return ((__m128d)vec_nor(__temp, __temp));
3840b57cec5SDimitry Andric }
3850b57cec5SDimitry Andric 
38681ad6265SDimitry Andric extern __inline __m128d
38781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_pd(__m128d __A,__m128d __B)38881ad6265SDimitry Andric     _mm_cmpnlt_pd(__m128d __A, __m128d __B) {
3890b57cec5SDimitry Andric   return ((__m128d)vec_cmpge((__v2df)__A, (__v2df)__B));
3900b57cec5SDimitry Andric }
3910b57cec5SDimitry Andric 
39281ad6265SDimitry Andric extern __inline __m128d
39381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_pd(__m128d __A,__m128d __B)39481ad6265SDimitry Andric     _mm_cmpnle_pd(__m128d __A, __m128d __B) {
3950b57cec5SDimitry Andric   return ((__m128d)vec_cmpgt((__v2df)__A, (__v2df)__B));
3960b57cec5SDimitry Andric }
3970b57cec5SDimitry Andric 
39881ad6265SDimitry Andric extern __inline __m128d
39981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_pd(__m128d __A,__m128d __B)40081ad6265SDimitry Andric     _mm_cmpngt_pd(__m128d __A, __m128d __B) {
4010b57cec5SDimitry Andric   return ((__m128d)vec_cmple((__v2df)__A, (__v2df)__B));
4020b57cec5SDimitry Andric }
4030b57cec5SDimitry Andric 
40481ad6265SDimitry Andric extern __inline __m128d
40581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_pd(__m128d __A,__m128d __B)40681ad6265SDimitry Andric     _mm_cmpnge_pd(__m128d __A, __m128d __B) {
4070b57cec5SDimitry Andric   return ((__m128d)vec_cmplt((__v2df)__A, (__v2df)__B));
4080b57cec5SDimitry Andric }
4090b57cec5SDimitry Andric 
41081ad6265SDimitry Andric extern __inline __m128d
41181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_pd(__m128d __A,__m128d __B)41281ad6265SDimitry Andric     _mm_cmpord_pd(__m128d __A, __m128d __B) {
41381ad6265SDimitry Andric   __v2du __c, __d;
4140b57cec5SDimitry Andric   /* Compare against self will return false (0's) if NAN.  */
41581ad6265SDimitry Andric   __c = (__v2du)vec_cmpeq(__A, __A);
41681ad6265SDimitry Andric   __d = (__v2du)vec_cmpeq(__B, __B);
4170b57cec5SDimitry Andric   /* A != NAN and B != NAN.  */
41881ad6265SDimitry Andric   return ((__m128d)vec_and(__c, __d));
4190b57cec5SDimitry Andric }
4200b57cec5SDimitry Andric 
42181ad6265SDimitry Andric extern __inline __m128d
42281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_pd(__m128d __A,__m128d __B)42381ad6265SDimitry Andric     _mm_cmpunord_pd(__m128d __A, __m128d __B) {
4240b57cec5SDimitry Andric #if _ARCH_PWR8
42581ad6265SDimitry Andric   __v2du __c, __d;
4260b57cec5SDimitry Andric   /* Compare against self will return false (0's) if NAN.  */
42781ad6265SDimitry Andric   __c = (__v2du)vec_cmpeq((__v2df)__A, (__v2df)__A);
42881ad6265SDimitry Andric   __d = (__v2du)vec_cmpeq((__v2df)__B, (__v2df)__B);
4290b57cec5SDimitry Andric   /* A == NAN OR B == NAN converts too:
4300b57cec5SDimitry Andric      NOT(A != NAN) OR NOT(B != NAN).  */
43181ad6265SDimitry Andric   __c = vec_nor(__c, __c);
43281ad6265SDimitry Andric   return ((__m128d)vec_orc(__c, __d));
4330b57cec5SDimitry Andric #else
43481ad6265SDimitry Andric   __v2du __c, __d;
4350b57cec5SDimitry Andric   /* Compare against self will return false (0's) if NAN.  */
43681ad6265SDimitry Andric   __c = (__v2du)vec_cmpeq((__v2df)__A, (__v2df)__A);
43781ad6265SDimitry Andric   __d = (__v2du)vec_cmpeq((__v2df)__B, (__v2df)__B);
4380b57cec5SDimitry Andric   /* Convert the true ('1's) is NAN.  */
43981ad6265SDimitry Andric   __c = vec_nor(__c, __c);
44081ad6265SDimitry Andric   __d = vec_nor(__d, __d);
44181ad6265SDimitry Andric   return ((__m128d)vec_or(__c, __d));
4420b57cec5SDimitry Andric #endif
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric 
44581ad6265SDimitry Andric extern __inline __m128d
44681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_sd(__m128d __A,__m128d __B)44781ad6265SDimitry Andric     _mm_cmpeq_sd(__m128d __A, __m128d __B) {
44881ad6265SDimitry Andric   __v2df __a, __b, __c;
4490b57cec5SDimitry Andric   /* PowerISA VSX does not allow partial (for just lower double)
4500b57cec5SDimitry Andric      results. So to insure we don't generate spurious exceptions
4510b57cec5SDimitry Andric      (from the upper double values) we splat the lower double
4520b57cec5SDimitry Andric      before we do the operation. */
45381ad6265SDimitry Andric   __a = vec_splats(__A[0]);
45481ad6265SDimitry Andric   __b = vec_splats(__B[0]);
45581ad6265SDimitry Andric   __c = (__v2df)vec_cmpeq(__a, __b);
4560b57cec5SDimitry Andric   /* Then we merge the lower double result with the original upper
4570b57cec5SDimitry Andric      double from __A.  */
45881ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric 
46181ad6265SDimitry Andric extern __inline __m128d
46281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_sd(__m128d __A,__m128d __B)46381ad6265SDimitry Andric     _mm_cmplt_sd(__m128d __A, __m128d __B) {
46481ad6265SDimitry Andric   __v2df __a, __b, __c;
46581ad6265SDimitry Andric   __a = vec_splats(__A[0]);
46681ad6265SDimitry Andric   __b = vec_splats(__B[0]);
46781ad6265SDimitry Andric   __c = (__v2df)vec_cmplt(__a, __b);
46881ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
4690b57cec5SDimitry Andric }
4700b57cec5SDimitry Andric 
47181ad6265SDimitry Andric extern __inline __m128d
47281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_sd(__m128d __A,__m128d __B)47381ad6265SDimitry Andric     _mm_cmple_sd(__m128d __A, __m128d __B) {
47481ad6265SDimitry Andric   __v2df __a, __b, __c;
47581ad6265SDimitry Andric   __a = vec_splats(__A[0]);
47681ad6265SDimitry Andric   __b = vec_splats(__B[0]);
47781ad6265SDimitry Andric   __c = (__v2df)vec_cmple(__a, __b);
47881ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
4790b57cec5SDimitry Andric }
4800b57cec5SDimitry Andric 
48181ad6265SDimitry Andric extern __inline __m128d
48281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_sd(__m128d __A,__m128d __B)48381ad6265SDimitry Andric     _mm_cmpgt_sd(__m128d __A, __m128d __B) {
48481ad6265SDimitry Andric   __v2df __a, __b, __c;
48581ad6265SDimitry Andric   __a = vec_splats(__A[0]);
48681ad6265SDimitry Andric   __b = vec_splats(__B[0]);
48781ad6265SDimitry Andric   __c = (__v2df)vec_cmpgt(__a, __b);
48881ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
4890b57cec5SDimitry Andric }
4900b57cec5SDimitry Andric 
49181ad6265SDimitry Andric extern __inline __m128d
49281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_sd(__m128d __A,__m128d __B)49381ad6265SDimitry Andric     _mm_cmpge_sd(__m128d __A, __m128d __B) {
49481ad6265SDimitry Andric   __v2df __a, __b, __c;
49581ad6265SDimitry Andric   __a = vec_splats(__A[0]);
49681ad6265SDimitry Andric   __b = vec_splats(__B[0]);
49781ad6265SDimitry Andric   __c = (__v2df)vec_cmpge(__a, __b);
49881ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric 
50181ad6265SDimitry Andric extern __inline __m128d
50281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_sd(__m128d __A,__m128d __B)50381ad6265SDimitry Andric     _mm_cmpneq_sd(__m128d __A, __m128d __B) {
50481ad6265SDimitry Andric   __v2df __a, __b, __c;
50581ad6265SDimitry Andric   __a = vec_splats(__A[0]);
50681ad6265SDimitry Andric   __b = vec_splats(__B[0]);
50781ad6265SDimitry Andric   __c = (__v2df)vec_cmpeq(__a, __b);
50881ad6265SDimitry Andric   __c = vec_nor(__c, __c);
50981ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
5100b57cec5SDimitry Andric }
5110b57cec5SDimitry Andric 
51281ad6265SDimitry Andric extern __inline __m128d
51381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_sd(__m128d __A,__m128d __B)51481ad6265SDimitry Andric     _mm_cmpnlt_sd(__m128d __A, __m128d __B) {
51581ad6265SDimitry Andric   __v2df __a, __b, __c;
51681ad6265SDimitry Andric   __a = vec_splats(__A[0]);
51781ad6265SDimitry Andric   __b = vec_splats(__B[0]);
5180b57cec5SDimitry Andric   /* Not less than is just greater than or equal.  */
51981ad6265SDimitry Andric   __c = (__v2df)vec_cmpge(__a, __b);
52081ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
5210b57cec5SDimitry Andric }
5220b57cec5SDimitry Andric 
52381ad6265SDimitry Andric extern __inline __m128d
52481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_sd(__m128d __A,__m128d __B)52581ad6265SDimitry Andric     _mm_cmpnle_sd(__m128d __A, __m128d __B) {
52681ad6265SDimitry Andric   __v2df __a, __b, __c;
52781ad6265SDimitry Andric   __a = vec_splats(__A[0]);
52881ad6265SDimitry Andric   __b = vec_splats(__B[0]);
5290b57cec5SDimitry Andric   /* Not less than or equal is just greater than.  */
53081ad6265SDimitry Andric   __c = (__v2df)vec_cmpge(__a, __b);
53181ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
5320b57cec5SDimitry Andric }
5330b57cec5SDimitry Andric 
53481ad6265SDimitry Andric extern __inline __m128d
53581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_sd(__m128d __A,__m128d __B)53681ad6265SDimitry Andric     _mm_cmpngt_sd(__m128d __A, __m128d __B) {
53781ad6265SDimitry Andric   __v2df __a, __b, __c;
53881ad6265SDimitry Andric   __a = vec_splats(__A[0]);
53981ad6265SDimitry Andric   __b = vec_splats(__B[0]);
5400b57cec5SDimitry Andric   /* Not greater than is just less than or equal.  */
54181ad6265SDimitry Andric   __c = (__v2df)vec_cmple(__a, __b);
54281ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
5430b57cec5SDimitry Andric }
5440b57cec5SDimitry Andric 
54581ad6265SDimitry Andric extern __inline __m128d
54681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_sd(__m128d __A,__m128d __B)54781ad6265SDimitry Andric     _mm_cmpnge_sd(__m128d __A, __m128d __B) {
54881ad6265SDimitry Andric   __v2df __a, __b, __c;
54981ad6265SDimitry Andric   __a = vec_splats(__A[0]);
55081ad6265SDimitry Andric   __b = vec_splats(__B[0]);
5510b57cec5SDimitry Andric   /* Not greater than or equal is just less than.  */
55281ad6265SDimitry Andric   __c = (__v2df)vec_cmplt(__a, __b);
55381ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__c[0], __A[1]);
5540b57cec5SDimitry Andric }
5550b57cec5SDimitry Andric 
55681ad6265SDimitry Andric extern __inline __m128d
55781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_sd(__m128d __A,__m128d __B)55881ad6265SDimitry Andric     _mm_cmpord_sd(__m128d __A, __m128d __B) {
55981ad6265SDimitry Andric   __v2df __r;
56081ad6265SDimitry Andric   __r = (__v2df)_mm_cmpord_pd(vec_splats(__A[0]), vec_splats(__B[0]));
56181ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__r[0], ((__v2df)__A)[1]);
5620b57cec5SDimitry Andric }
5630b57cec5SDimitry Andric 
56481ad6265SDimitry Andric extern __inline __m128d
56581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_sd(__m128d __A,__m128d __B)56681ad6265SDimitry Andric     _mm_cmpunord_sd(__m128d __A, __m128d __B) {
56781ad6265SDimitry Andric   __v2df __r;
56881ad6265SDimitry Andric   __r = _mm_cmpunord_pd(vec_splats(__A[0]), vec_splats(__B[0]));
56981ad6265SDimitry Andric   return (__m128d)_mm_setr_pd(__r[0], __A[1]);
5700b57cec5SDimitry Andric }
5710b57cec5SDimitry Andric 
5720b57cec5SDimitry Andric /* FIXME
5730b57cec5SDimitry Andric    The __mm_comi??_sd and __mm_ucomi??_sd implementations below are
5740b57cec5SDimitry Andric    exactly the same because GCC for PowerPC only generates unordered
5750b57cec5SDimitry Andric    compares (scalar and vector).
5760b57cec5SDimitry Andric    Technically __mm_comieq_sp et all should be using the ordered
5770b57cec5SDimitry Andric    compare and signal for QNaNs.  The __mm_ucomieq_sd et all should
5780b57cec5SDimitry Andric    be OK.   */
57981ad6265SDimitry Andric extern __inline int
58081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_sd(__m128d __A,__m128d __B)58181ad6265SDimitry Andric     _mm_comieq_sd(__m128d __A, __m128d __B) {
5820b57cec5SDimitry Andric   return (__A[0] == __B[0]);
5830b57cec5SDimitry Andric }
5840b57cec5SDimitry Andric 
58581ad6265SDimitry Andric extern __inline int
58681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_sd(__m128d __A,__m128d __B)58781ad6265SDimitry Andric     _mm_comilt_sd(__m128d __A, __m128d __B) {
5880b57cec5SDimitry Andric   return (__A[0] < __B[0]);
5890b57cec5SDimitry Andric }
5900b57cec5SDimitry Andric 
59181ad6265SDimitry Andric extern __inline int
59281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_sd(__m128d __A,__m128d __B)59381ad6265SDimitry Andric     _mm_comile_sd(__m128d __A, __m128d __B) {
5940b57cec5SDimitry Andric   return (__A[0] <= __B[0]);
5950b57cec5SDimitry Andric }
5960b57cec5SDimitry Andric 
59781ad6265SDimitry Andric extern __inline int
59881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_sd(__m128d __A,__m128d __B)59981ad6265SDimitry Andric     _mm_comigt_sd(__m128d __A, __m128d __B) {
6000b57cec5SDimitry Andric   return (__A[0] > __B[0]);
6010b57cec5SDimitry Andric }
6020b57cec5SDimitry Andric 
60381ad6265SDimitry Andric extern __inline int
60481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_sd(__m128d __A,__m128d __B)60581ad6265SDimitry Andric     _mm_comige_sd(__m128d __A, __m128d __B) {
6060b57cec5SDimitry Andric   return (__A[0] >= __B[0]);
6070b57cec5SDimitry Andric }
6080b57cec5SDimitry Andric 
60981ad6265SDimitry Andric extern __inline int
61081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_sd(__m128d __A,__m128d __B)61181ad6265SDimitry Andric     _mm_comineq_sd(__m128d __A, __m128d __B) {
6120b57cec5SDimitry Andric   return (__A[0] != __B[0]);
6130b57cec5SDimitry Andric }
6140b57cec5SDimitry Andric 
61581ad6265SDimitry Andric extern __inline int
61681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_sd(__m128d __A,__m128d __B)61781ad6265SDimitry Andric     _mm_ucomieq_sd(__m128d __A, __m128d __B) {
6180b57cec5SDimitry Andric   return (__A[0] == __B[0]);
6190b57cec5SDimitry Andric }
6200b57cec5SDimitry Andric 
62181ad6265SDimitry Andric extern __inline int
62281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_sd(__m128d __A,__m128d __B)62381ad6265SDimitry Andric     _mm_ucomilt_sd(__m128d __A, __m128d __B) {
6240b57cec5SDimitry Andric   return (__A[0] < __B[0]);
6250b57cec5SDimitry Andric }
6260b57cec5SDimitry Andric 
62781ad6265SDimitry Andric extern __inline int
62881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_sd(__m128d __A,__m128d __B)62981ad6265SDimitry Andric     _mm_ucomile_sd(__m128d __A, __m128d __B) {
6300b57cec5SDimitry Andric   return (__A[0] <= __B[0]);
6310b57cec5SDimitry Andric }
6320b57cec5SDimitry Andric 
63381ad6265SDimitry Andric extern __inline int
63481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_sd(__m128d __A,__m128d __B)63581ad6265SDimitry Andric     _mm_ucomigt_sd(__m128d __A, __m128d __B) {
6360b57cec5SDimitry Andric   return (__A[0] > __B[0]);
6370b57cec5SDimitry Andric }
6380b57cec5SDimitry Andric 
63981ad6265SDimitry Andric extern __inline int
64081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_sd(__m128d __A,__m128d __B)64181ad6265SDimitry Andric     _mm_ucomige_sd(__m128d __A, __m128d __B) {
6420b57cec5SDimitry Andric   return (__A[0] >= __B[0]);
6430b57cec5SDimitry Andric }
6440b57cec5SDimitry Andric 
64581ad6265SDimitry Andric extern __inline int
64681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_sd(__m128d __A,__m128d __B)64781ad6265SDimitry Andric     _mm_ucomineq_sd(__m128d __A, __m128d __B) {
6480b57cec5SDimitry Andric   return (__A[0] != __B[0]);
6490b57cec5SDimitry Andric }
6500b57cec5SDimitry Andric 
6510b57cec5SDimitry Andric /* Create a vector of Qi, where i is the element number.  */
65281ad6265SDimitry Andric extern __inline __m128i
65381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64x(long long __q1,long long __q0)65481ad6265SDimitry Andric     _mm_set_epi64x(long long __q1, long long __q0) {
6550b57cec5SDimitry Andric   return __extension__(__m128i)(__v2di){__q0, __q1};
6560b57cec5SDimitry Andric }
6570b57cec5SDimitry Andric 
65881ad6265SDimitry Andric extern __inline __m128i
65981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64(__m64 __q1,__m64 __q0)66081ad6265SDimitry Andric     _mm_set_epi64(__m64 __q1, __m64 __q0) {
6610b57cec5SDimitry Andric   return _mm_set_epi64x((long long)__q1, (long long)__q0);
6620b57cec5SDimitry Andric }
6630b57cec5SDimitry Andric 
66481ad6265SDimitry Andric extern __inline __m128i
66581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi32(int __q3,int __q2,int __q1,int __q0)66681ad6265SDimitry Andric     _mm_set_epi32(int __q3, int __q2, int __q1, int __q0) {
6670b57cec5SDimitry Andric   return __extension__(__m128i)(__v4si){__q0, __q1, __q2, __q3};
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
67081ad6265SDimitry Andric extern __inline __m128i
67181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi16(short __q7,short __q6,short __q5,short __q4,short __q3,short __q2,short __q1,short __q0)67281ad6265SDimitry Andric     _mm_set_epi16(short __q7, short __q6, short __q5, short __q4, short __q3,
67381ad6265SDimitry Andric                   short __q2, short __q1, short __q0) {
67481ad6265SDimitry Andric   return __extension__(__m128i)(__v8hi){__q0, __q1, __q2, __q3,
67581ad6265SDimitry Andric                                         __q4, __q5, __q6, __q7};
6760b57cec5SDimitry Andric }
6770b57cec5SDimitry Andric 
67881ad6265SDimitry Andric extern __inline __m128i
67981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi8(char __q15,char __q14,char __q13,char __q12,char __q11,char __q10,char __q09,char __q08,char __q07,char __q06,char __q05,char __q04,char __q03,char __q02,char __q01,char __q00)68081ad6265SDimitry Andric     _mm_set_epi8(char __q15, char __q14, char __q13, char __q12, char __q11,
68181ad6265SDimitry Andric                  char __q10, char __q09, char __q08, char __q07, char __q06,
68281ad6265SDimitry Andric                  char __q05, char __q04, char __q03, char __q02, char __q01,
68381ad6265SDimitry Andric                  char __q00) {
6840b57cec5SDimitry Andric   return __extension__(__m128i)(__v16qi){
6850b57cec5SDimitry Andric       __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
68681ad6265SDimitry Andric       __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15};
6870b57cec5SDimitry Andric }
6880b57cec5SDimitry Andric 
6890b57cec5SDimitry Andric /* Set all of the elements of the vector to A.  */
69081ad6265SDimitry Andric extern __inline __m128i
69181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64x(long long __A)69281ad6265SDimitry Andric     _mm_set1_epi64x(long long __A) {
6930b57cec5SDimitry Andric   return _mm_set_epi64x(__A, __A);
6940b57cec5SDimitry Andric }
6950b57cec5SDimitry Andric 
69681ad6265SDimitry Andric extern __inline __m128i
69781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64(__m64 __A)69881ad6265SDimitry Andric     _mm_set1_epi64(__m64 __A) {
6990b57cec5SDimitry Andric   return _mm_set_epi64(__A, __A);
7000b57cec5SDimitry Andric }
7010b57cec5SDimitry Andric 
70281ad6265SDimitry Andric extern __inline __m128i
70381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi32(int __A)70481ad6265SDimitry Andric     _mm_set1_epi32(int __A) {
7050b57cec5SDimitry Andric   return _mm_set_epi32(__A, __A, __A, __A);
7060b57cec5SDimitry Andric }
7070b57cec5SDimitry Andric 
70881ad6265SDimitry Andric extern __inline __m128i
70981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi16(short __A)71081ad6265SDimitry Andric     _mm_set1_epi16(short __A) {
7110b57cec5SDimitry Andric   return _mm_set_epi16(__A, __A, __A, __A, __A, __A, __A, __A);
7120b57cec5SDimitry Andric }
7130b57cec5SDimitry Andric 
71481ad6265SDimitry Andric extern __inline __m128i
71581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi8(char __A)71681ad6265SDimitry Andric     _mm_set1_epi8(char __A) {
71781ad6265SDimitry Andric   return _mm_set_epi8(__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A,
71881ad6265SDimitry Andric                       __A, __A, __A, __A, __A);
7190b57cec5SDimitry Andric }
7200b57cec5SDimitry Andric 
7210b57cec5SDimitry Andric /* Create a vector of Qi, where i is the element number.
7220b57cec5SDimitry Andric    The parameter order is reversed from the _mm_set_epi* functions.  */
72381ad6265SDimitry Andric extern __inline __m128i
72481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi64(__m64 __q0,__m64 __q1)72581ad6265SDimitry Andric     _mm_setr_epi64(__m64 __q0, __m64 __q1) {
7260b57cec5SDimitry Andric   return _mm_set_epi64(__q1, __q0);
7270b57cec5SDimitry Andric }
7280b57cec5SDimitry Andric 
72981ad6265SDimitry Andric extern __inline __m128i
73081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi32(int __q0,int __q1,int __q2,int __q3)73181ad6265SDimitry Andric     _mm_setr_epi32(int __q0, int __q1, int __q2, int __q3) {
7320b57cec5SDimitry Andric   return _mm_set_epi32(__q3, __q2, __q1, __q0);
7330b57cec5SDimitry Andric }
7340b57cec5SDimitry Andric 
73581ad6265SDimitry Andric extern __inline __m128i
73681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi16(short __q0,short __q1,short __q2,short __q3,short __q4,short __q5,short __q6,short __q7)73781ad6265SDimitry Andric     _mm_setr_epi16(short __q0, short __q1, short __q2, short __q3, short __q4,
73881ad6265SDimitry Andric                    short __q5, short __q6, short __q7) {
7390b57cec5SDimitry Andric   return _mm_set_epi16(__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
7400b57cec5SDimitry Andric }
7410b57cec5SDimitry Andric 
74281ad6265SDimitry Andric extern __inline __m128i
74381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi8(char __q00,char __q01,char __q02,char __q03,char __q04,char __q05,char __q06,char __q07,char __q08,char __q09,char __q10,char __q11,char __q12,char __q13,char __q14,char __q15)74481ad6265SDimitry Andric     _mm_setr_epi8(char __q00, char __q01, char __q02, char __q03, char __q04,
74581ad6265SDimitry Andric                   char __q05, char __q06, char __q07, char __q08, char __q09,
74681ad6265SDimitry Andric                   char __q10, char __q11, char __q12, char __q13, char __q14,
74781ad6265SDimitry Andric                   char __q15) {
7480b57cec5SDimitry Andric   return _mm_set_epi8(__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
7490b57cec5SDimitry Andric                       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric /* Create a vector with element 0 as *P and the rest zero.  */
75381ad6265SDimitry Andric extern __inline __m128i
75481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_si128(__m128i const * __P)75581ad6265SDimitry Andric     _mm_load_si128(__m128i const *__P) {
7560b57cec5SDimitry Andric   return *__P;
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric 
75981ad6265SDimitry Andric extern __inline __m128i
76081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_si128(__m128i_u const * __P)76181ad6265SDimitry Andric     _mm_loadu_si128(__m128i_u const *__P) {
7620b57cec5SDimitry Andric   return (__m128i)(vec_vsx_ld(0, (signed int const *)__P));
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric 
76581ad6265SDimitry Andric extern __inline __m128i
76681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_epi64(__m128i_u const * __P)76781ad6265SDimitry Andric     _mm_loadl_epi64(__m128i_u const *__P) {
7680b57cec5SDimitry Andric   return _mm_set_epi64((__m64)0LL, *(__m64 *)__P);
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric 
77181ad6265SDimitry Andric extern __inline void
77281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_si128(__m128i * __P,__m128i __B)77381ad6265SDimitry Andric     _mm_store_si128(__m128i *__P, __m128i __B) {
7740b57cec5SDimitry Andric   vec_st((__v16qu)__B, 0, (__v16qu *)__P);
7750b57cec5SDimitry Andric }
7760b57cec5SDimitry Andric 
77781ad6265SDimitry Andric extern __inline void
77881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_si128(__m128i_u * __P,__m128i __B)77981ad6265SDimitry Andric     _mm_storeu_si128(__m128i_u *__P, __m128i __B) {
7800b57cec5SDimitry Andric   *__P = __B;
7810b57cec5SDimitry Andric }
7820b57cec5SDimitry Andric 
78381ad6265SDimitry Andric extern __inline void
78481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_epi64(__m128i_u * __P,__m128i __B)78581ad6265SDimitry Andric     _mm_storel_epi64(__m128i_u *__P, __m128i __B) {
7860b57cec5SDimitry Andric   *(long long *)__P = ((__v2di)__B)[0];
7870b57cec5SDimitry Andric }
7880b57cec5SDimitry Andric 
78981ad6265SDimitry Andric extern __inline __m64
79081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movepi64_pi64(__m128i_u __B)79181ad6265SDimitry Andric     _mm_movepi64_pi64(__m128i_u __B) {
7920b57cec5SDimitry Andric   return (__m64)((__v2di)__B)[0];
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric 
79581ad6265SDimitry Andric extern __inline __m128i
79681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movpi64_epi64(__m64 __A)79781ad6265SDimitry Andric     _mm_movpi64_epi64(__m64 __A) {
7980b57cec5SDimitry Andric   return _mm_set_epi64((__m64)0LL, __A);
7990b57cec5SDimitry Andric }
8000b57cec5SDimitry Andric 
80181ad6265SDimitry Andric extern __inline __m128i
80281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_epi64(__m128i __A)80381ad6265SDimitry Andric     _mm_move_epi64(__m128i __A) {
8040b57cec5SDimitry Andric   return _mm_set_epi64((__m64)0LL, (__m64)__A[0]);
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric 
8070b57cec5SDimitry Andric /* Create an undefined vector.  */
80881ad6265SDimitry Andric extern __inline __m128i
80981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_si128(void)81081ad6265SDimitry Andric     _mm_undefined_si128(void) {
8110b57cec5SDimitry Andric   __m128i __Y = __Y;
8120b57cec5SDimitry Andric   return __Y;
8130b57cec5SDimitry Andric }
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric /* Create a vector of zeros.  */
81681ad6265SDimitry Andric extern __inline __m128i
81781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si128(void)81881ad6265SDimitry Andric     _mm_setzero_si128(void) {
8190b57cec5SDimitry Andric   return __extension__(__m128i)(__v4si){0, 0, 0, 0};
8200b57cec5SDimitry Andric }
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric #ifdef _ARCH_PWR8
82381ad6265SDimitry Andric extern __inline __m128d
82481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_pd(__m128i __A)82581ad6265SDimitry Andric     _mm_cvtepi32_pd(__m128i __A) {
82681ad6265SDimitry Andric   __v2di __val;
8270b57cec5SDimitry Andric   /* For LE need to generate Vector Unpack Low Signed Word.
8280b57cec5SDimitry Andric      Which is generated from unpackh.  */
82981ad6265SDimitry Andric   __val = (__v2di)vec_unpackh((__v4si)__A);
8300b57cec5SDimitry Andric 
83181ad6265SDimitry Andric   return (__m128d)vec_ctf(__val, 0);
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric #endif
8340b57cec5SDimitry Andric 
83581ad6265SDimitry Andric extern __inline __m128
83681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_ps(__m128i __A)83781ad6265SDimitry Andric     _mm_cvtepi32_ps(__m128i __A) {
8380b57cec5SDimitry Andric   return ((__m128)vec_ctf((__v4si)__A, 0));
8390b57cec5SDimitry Andric }
8400b57cec5SDimitry Andric 
84181ad6265SDimitry Andric extern __inline __m128i
84281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_epi32(__m128d __A)84381ad6265SDimitry Andric     _mm_cvtpd_epi32(__m128d __A) {
84481ad6265SDimitry Andric   __v2df __rounded = vec_rint(__A);
84581ad6265SDimitry Andric   __v4si __result, __temp;
84681ad6265SDimitry Andric   const __v4si __vzero = {0, 0, 0, 0};
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric   /* VSX Vector truncate Double-Precision to integer and Convert to
8490b57cec5SDimitry Andric    Signed Integer Word format with Saturate.  */
85081ad6265SDimitry Andric   __asm__("xvcvdpsxws %x0,%x1" : "=wa"(__temp) : "wa"(__rounded) :);
8510b57cec5SDimitry Andric 
8520b57cec5SDimitry Andric #ifdef _ARCH_PWR8
85381ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
85481ad6265SDimitry Andric   __temp = vec_mergeo(__temp, __temp);
85581ad6265SDimitry Andric #else
85681ad6265SDimitry Andric   __temp = vec_mergee(__temp, __temp);
85781ad6265SDimitry Andric #endif
85881ad6265SDimitry Andric   __result = (__v4si)vec_vpkudum((__vector long long)__temp,
85981ad6265SDimitry Andric                                  (__vector long long)__vzero);
8600b57cec5SDimitry Andric #else
8610b57cec5SDimitry Andric   {
86281ad6265SDimitry Andric     const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
8630b57cec5SDimitry Andric                               0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f};
86481ad6265SDimitry Andric     __result = (__v4si)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm);
8650b57cec5SDimitry Andric   }
8660b57cec5SDimitry Andric #endif
86781ad6265SDimitry Andric   return (__m128i)__result;
8680b57cec5SDimitry Andric }
8690b57cec5SDimitry Andric 
87081ad6265SDimitry Andric extern __inline __m64
87181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_pi32(__m128d __A)87281ad6265SDimitry Andric     _mm_cvtpd_pi32(__m128d __A) {
87381ad6265SDimitry Andric   __m128i __result = _mm_cvtpd_epi32(__A);
8740b57cec5SDimitry Andric 
87581ad6265SDimitry Andric   return (__m64)__result[0];
8760b57cec5SDimitry Andric }
8770b57cec5SDimitry Andric 
87881ad6265SDimitry Andric extern __inline __m128
87981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_ps(__m128d __A)88081ad6265SDimitry Andric     _mm_cvtpd_ps(__m128d __A) {
88181ad6265SDimitry Andric   __v4sf __result;
88281ad6265SDimitry Andric   __v4si __temp;
88381ad6265SDimitry Andric   const __v4si __vzero = {0, 0, 0, 0};
8840b57cec5SDimitry Andric 
88581ad6265SDimitry Andric   __asm__("xvcvdpsp %x0,%x1" : "=wa"(__temp) : "wa"(__A) :);
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric #ifdef _ARCH_PWR8
88881ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
88981ad6265SDimitry Andric   __temp = vec_mergeo(__temp, __temp);
89081ad6265SDimitry Andric #else
89181ad6265SDimitry Andric   __temp = vec_mergee(__temp, __temp);
89281ad6265SDimitry Andric #endif
89381ad6265SDimitry Andric   __result = (__v4sf)vec_vpkudum((__vector long long)__temp,
89481ad6265SDimitry Andric                                  (__vector long long)__vzero);
8950b57cec5SDimitry Andric #else
8960b57cec5SDimitry Andric   {
89781ad6265SDimitry Andric     const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
8980b57cec5SDimitry Andric                               0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f};
89981ad6265SDimitry Andric     __result = (__v4sf)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm);
9000b57cec5SDimitry Andric   }
9010b57cec5SDimitry Andric #endif
90281ad6265SDimitry Andric   return ((__m128)__result);
9030b57cec5SDimitry Andric }
9040b57cec5SDimitry Andric 
90581ad6265SDimitry Andric extern __inline __m128i
90681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_epi32(__m128d __A)90781ad6265SDimitry Andric     _mm_cvttpd_epi32(__m128d __A) {
90881ad6265SDimitry Andric   __v4si __result;
90981ad6265SDimitry Andric   __v4si __temp;
91081ad6265SDimitry Andric   const __v4si __vzero = {0, 0, 0, 0};
9110b57cec5SDimitry Andric 
9120b57cec5SDimitry Andric   /* VSX Vector truncate Double-Precision to integer and Convert to
9130b57cec5SDimitry Andric    Signed Integer Word format with Saturate.  */
91481ad6265SDimitry Andric   __asm__("xvcvdpsxws %x0,%x1" : "=wa"(__temp) : "wa"(__A) :);
9150b57cec5SDimitry Andric 
9160b57cec5SDimitry Andric #ifdef _ARCH_PWR8
91781ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
91881ad6265SDimitry Andric   __temp = vec_mergeo(__temp, __temp);
91981ad6265SDimitry Andric #else
92081ad6265SDimitry Andric   __temp = vec_mergee(__temp, __temp);
92181ad6265SDimitry Andric #endif
92281ad6265SDimitry Andric   __result = (__v4si)vec_vpkudum((__vector long long)__temp,
92381ad6265SDimitry Andric                                  (__vector long long)__vzero);
9240b57cec5SDimitry Andric #else
9250b57cec5SDimitry Andric   {
92681ad6265SDimitry Andric     const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
9270b57cec5SDimitry Andric                               0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f};
92881ad6265SDimitry Andric     __result = (__v4si)vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm);
9290b57cec5SDimitry Andric   }
9300b57cec5SDimitry Andric #endif
9310b57cec5SDimitry Andric 
93281ad6265SDimitry Andric   return ((__m128i)__result);
9330b57cec5SDimitry Andric }
9340b57cec5SDimitry Andric 
93581ad6265SDimitry Andric extern __inline __m64
93681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_pi32(__m128d __A)93781ad6265SDimitry Andric     _mm_cvttpd_pi32(__m128d __A) {
93881ad6265SDimitry Andric   __m128i __result = _mm_cvttpd_epi32(__A);
9390b57cec5SDimitry Andric 
94081ad6265SDimitry Andric   return (__m64)__result[0];
9410b57cec5SDimitry Andric }
9420b57cec5SDimitry Andric 
94381ad6265SDimitry Andric extern __inline int
94481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si32(__m128i __A)94581ad6265SDimitry Andric     _mm_cvtsi128_si32(__m128i __A) {
9460b57cec5SDimitry Andric   return ((__v4si)__A)[0];
9470b57cec5SDimitry Andric }
9480b57cec5SDimitry Andric 
9490b57cec5SDimitry Andric #ifdef _ARCH_PWR8
95081ad6265SDimitry Andric extern __inline __m128d
95181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32_pd(__m64 __A)95281ad6265SDimitry Andric     _mm_cvtpi32_pd(__m64 __A) {
95381ad6265SDimitry Andric   __v4si __temp;
95481ad6265SDimitry Andric   __v2di __tmp2;
955*06c3fb27SDimitry Andric   __v4f __result;
9560b57cec5SDimitry Andric 
95781ad6265SDimitry Andric   __temp = (__v4si)vec_splats(__A);
95881ad6265SDimitry Andric   __tmp2 = (__v2di)vec_unpackl(__temp);
95981ad6265SDimitry Andric   __result = vec_ctf((__vector signed long long)__tmp2, 0);
96081ad6265SDimitry Andric   return (__m128d)__result;
9610b57cec5SDimitry Andric }
9620b57cec5SDimitry Andric #endif
9630b57cec5SDimitry Andric 
96481ad6265SDimitry Andric extern __inline __m128i
96581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_epi32(__m128 __A)96681ad6265SDimitry Andric     _mm_cvtps_epi32(__m128 __A) {
96781ad6265SDimitry Andric   __v4sf __rounded;
96881ad6265SDimitry Andric   __v4si __result;
9690b57cec5SDimitry Andric 
97081ad6265SDimitry Andric   __rounded = vec_rint((__v4sf)__A);
97181ad6265SDimitry Andric   __result = vec_cts(__rounded, 0);
97281ad6265SDimitry Andric   return (__m128i)__result;
9730b57cec5SDimitry Andric }
9740b57cec5SDimitry Andric 
97581ad6265SDimitry Andric extern __inline __m128i
97681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_epi32(__m128 __A)97781ad6265SDimitry Andric     _mm_cvttps_epi32(__m128 __A) {
97881ad6265SDimitry Andric   __v4si __result;
9790b57cec5SDimitry Andric 
98081ad6265SDimitry Andric   __result = vec_cts((__v4sf)__A, 0);
98181ad6265SDimitry Andric   return (__m128i)__result;
9820b57cec5SDimitry Andric }
9830b57cec5SDimitry Andric 
98481ad6265SDimitry Andric extern __inline __m128d
98581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pd(__m128 __A)98681ad6265SDimitry Andric     _mm_cvtps_pd(__m128 __A) {
9870b57cec5SDimitry Andric   /* Check if vec_doubleh is defined by <altivec.h>. If so use that. */
9880b57cec5SDimitry Andric #ifdef vec_doubleh
9890b57cec5SDimitry Andric   return (__m128d)vec_doubleh((__v4sf)__A);
9900b57cec5SDimitry Andric #else
9910b57cec5SDimitry Andric   /* Otherwise the compiler is not current and so need to generate the
9920b57cec5SDimitry Andric      equivalent code.  */
99381ad6265SDimitry Andric   __v4sf __a = (__v4sf)__A;
99481ad6265SDimitry Andric   __v4sf __temp;
99581ad6265SDimitry Andric   __v2df __result;
9960b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
9970b57cec5SDimitry Andric   /* The input float values are in elements {[0], [1]} but the convert
9980b57cec5SDimitry Andric      instruction needs them in elements {[1], [3]}, So we use two
9990b57cec5SDimitry Andric      shift left double vector word immediates to get the elements
10000b57cec5SDimitry Andric      lined up.  */
100181ad6265SDimitry Andric   __temp = __builtin_vsx_xxsldwi(__a, __a, 3);
100281ad6265SDimitry Andric   __temp = __builtin_vsx_xxsldwi(__a, __temp, 2);
10030b57cec5SDimitry Andric #else
10040b57cec5SDimitry Andric   /* The input float values are in elements {[0], [1]} but the convert
10050b57cec5SDimitry Andric      instruction needs them in elements {[0], [2]}, So we use two
10060b57cec5SDimitry Andric      shift left double vector word immediates to get the elements
10070b57cec5SDimitry Andric      lined up.  */
100881ad6265SDimitry Andric   __temp = vec_vmrghw(__a, __a);
10090b57cec5SDimitry Andric #endif
101081ad6265SDimitry Andric   __asm__(" xvcvspdp %x0,%x1" : "=wa"(__result) : "wa"(__temp) :);
101181ad6265SDimitry Andric   return (__m128d)__result;
10120b57cec5SDimitry Andric #endif
10130b57cec5SDimitry Andric }
10140b57cec5SDimitry Andric 
101581ad6265SDimitry Andric extern __inline int
101681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si32(__m128d __A)101781ad6265SDimitry Andric     _mm_cvtsd_si32(__m128d __A) {
101881ad6265SDimitry Andric   __v2df __rounded = vec_rint((__v2df)__A);
101981ad6265SDimitry Andric   int __result = ((__v2df)__rounded)[0];
10200b57cec5SDimitry Andric 
102181ad6265SDimitry Andric   return __result;
10220b57cec5SDimitry Andric }
10230b57cec5SDimitry Andric /* Intel intrinsic.  */
102481ad6265SDimitry Andric extern __inline long long
102581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64(__m128d __A)102681ad6265SDimitry Andric     _mm_cvtsd_si64(__m128d __A) {
102781ad6265SDimitry Andric   __v2df __rounded = vec_rint((__v2df)__A);
102881ad6265SDimitry Andric   long long __result = ((__v2df)__rounded)[0];
10290b57cec5SDimitry Andric 
103081ad6265SDimitry Andric   return __result;
10310b57cec5SDimitry Andric }
10320b57cec5SDimitry Andric 
10330b57cec5SDimitry Andric /* Microsoft intrinsic.  */
103481ad6265SDimitry Andric extern __inline long long
103581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64x(__m128d __A)103681ad6265SDimitry Andric     _mm_cvtsd_si64x(__m128d __A) {
10370b57cec5SDimitry Andric   return _mm_cvtsd_si64((__v2df)__A);
10380b57cec5SDimitry Andric }
10390b57cec5SDimitry Andric 
104081ad6265SDimitry Andric extern __inline int
104181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si32(__m128d __A)104281ad6265SDimitry Andric     _mm_cvttsd_si32(__m128d __A) {
104381ad6265SDimitry Andric   int __result = ((__v2df)__A)[0];
10440b57cec5SDimitry Andric 
104581ad6265SDimitry Andric   return __result;
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric 
10480b57cec5SDimitry Andric /* Intel intrinsic.  */
104981ad6265SDimitry Andric extern __inline long long
105081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64(__m128d __A)105181ad6265SDimitry Andric     _mm_cvttsd_si64(__m128d __A) {
105281ad6265SDimitry Andric   long long __result = ((__v2df)__A)[0];
10530b57cec5SDimitry Andric 
105481ad6265SDimitry Andric   return __result;
10550b57cec5SDimitry Andric }
10560b57cec5SDimitry Andric 
10570b57cec5SDimitry Andric /* Microsoft intrinsic.  */
105881ad6265SDimitry Andric extern __inline long long
105981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64x(__m128d __A)106081ad6265SDimitry Andric     _mm_cvttsd_si64x(__m128d __A) {
10610b57cec5SDimitry Andric   return _mm_cvttsd_si64(__A);
10620b57cec5SDimitry Andric }
10630b57cec5SDimitry Andric 
106481ad6265SDimitry Andric extern __inline __m128
106581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_ss(__m128 __A,__m128d __B)106681ad6265SDimitry Andric     _mm_cvtsd_ss(__m128 __A, __m128d __B) {
106781ad6265SDimitry Andric   __v4sf __result = (__v4sf)__A;
10680b57cec5SDimitry Andric 
10690b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
107081ad6265SDimitry Andric   __v4sf __temp_s;
10710b57cec5SDimitry Andric   /* Copy double element[0] to element [1] for conversion.  */
107281ad6265SDimitry Andric   __v2df __temp_b = vec_splat((__v2df)__B, 0);
10730b57cec5SDimitry Andric 
10740b57cec5SDimitry Andric   /* Pre-rotate __A left 3 (logically right 1) elements.  */
107581ad6265SDimitry Andric   __result = __builtin_vsx_xxsldwi(__result, __result, 3);
10760b57cec5SDimitry Andric   /* Convert double to single float scalar in a vector.  */
107781ad6265SDimitry Andric   __asm__("xscvdpsp %x0,%x1" : "=wa"(__temp_s) : "wa"(__temp_b) :);
10780b57cec5SDimitry Andric   /* Shift the resulting scalar into vector element [0].  */
107981ad6265SDimitry Andric   __result = __builtin_vsx_xxsldwi(__result, __temp_s, 1);
10800b57cec5SDimitry Andric #else
108181ad6265SDimitry Andric   __result[0] = ((__v2df)__B)[0];
10820b57cec5SDimitry Andric #endif
108381ad6265SDimitry Andric   return (__m128)__result;
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric 
108681ad6265SDimitry Andric extern __inline __m128d
108781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_sd(__m128d __A,int __B)108881ad6265SDimitry Andric     _mm_cvtsi32_sd(__m128d __A, int __B) {
108981ad6265SDimitry Andric   __v2df __result = (__v2df)__A;
109081ad6265SDimitry Andric   double __db = __B;
109181ad6265SDimitry Andric   __result[0] = __db;
109281ad6265SDimitry Andric   return (__m128d)__result;
10930b57cec5SDimitry Andric }
10940b57cec5SDimitry Andric 
10950b57cec5SDimitry Andric /* Intel intrinsic.  */
109681ad6265SDimitry Andric extern __inline __m128d
109781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_sd(__m128d __A,long long __B)109881ad6265SDimitry Andric     _mm_cvtsi64_sd(__m128d __A, long long __B) {
109981ad6265SDimitry Andric   __v2df __result = (__v2df)__A;
110081ad6265SDimitry Andric   double __db = __B;
110181ad6265SDimitry Andric   __result[0] = __db;
110281ad6265SDimitry Andric   return (__m128d)__result;
11030b57cec5SDimitry Andric }
11040b57cec5SDimitry Andric 
11050b57cec5SDimitry Andric /* Microsoft intrinsic.  */
110681ad6265SDimitry Andric extern __inline __m128d
110781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_sd(__m128d __A,long long __B)110881ad6265SDimitry Andric     _mm_cvtsi64x_sd(__m128d __A, long long __B) {
11090b57cec5SDimitry Andric   return _mm_cvtsi64_sd(__A, __B);
11100b57cec5SDimitry Andric }
11110b57cec5SDimitry Andric 
111281ad6265SDimitry Andric extern __inline __m128d
111381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_sd(__m128d __A,__m128 __B)111481ad6265SDimitry Andric     _mm_cvtss_sd(__m128d __A, __m128 __B) {
11150b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
11160b57cec5SDimitry Andric   /* Use splat to move element [0] into position for the convert. */
111781ad6265SDimitry Andric   __v4sf __temp = vec_splat((__v4sf)__B, 0);
111881ad6265SDimitry Andric   __v2df __res;
11190b57cec5SDimitry Andric   /* Convert single float scalar to double in a vector.  */
112081ad6265SDimitry Andric   __asm__("xscvspdp %x0,%x1" : "=wa"(__res) : "wa"(__temp) :);
112181ad6265SDimitry Andric   return (__m128d)vec_mergel(__res, (__v2df)__A);
11220b57cec5SDimitry Andric #else
112381ad6265SDimitry Andric   __v2df __res = (__v2df)__A;
112481ad6265SDimitry Andric   __res[0] = ((__v4sf)__B)[0];
112581ad6265SDimitry Andric   return (__m128d)__res;
11260b57cec5SDimitry Andric #endif
11270b57cec5SDimitry Andric }
11280b57cec5SDimitry Andric 
112981ad6265SDimitry Andric extern __inline __m128d
113081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pd(__m128d __A,__m128d __B,const int __mask)113181ad6265SDimitry Andric     _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) {
113281ad6265SDimitry Andric   __vector double __result;
113381ad6265SDimitry Andric   const int __litmsk = __mask & 0x3;
11340b57cec5SDimitry Andric 
113581ad6265SDimitry Andric   if (__litmsk == 0)
113681ad6265SDimitry Andric     __result = vec_mergeh(__A, __B);
11370b57cec5SDimitry Andric #if __GNUC__ < 6
113881ad6265SDimitry Andric   else if (__litmsk == 1)
113981ad6265SDimitry Andric     __result = vec_xxpermdi(__B, __A, 2);
114081ad6265SDimitry Andric   else if (__litmsk == 2)
114181ad6265SDimitry Andric     __result = vec_xxpermdi(__B, __A, 1);
11420b57cec5SDimitry Andric #else
114381ad6265SDimitry Andric   else if (__litmsk == 1)
114481ad6265SDimitry Andric     __result = vec_xxpermdi(__A, __B, 2);
114581ad6265SDimitry Andric   else if (__litmsk == 2)
114681ad6265SDimitry Andric     __result = vec_xxpermdi(__A, __B, 1);
11470b57cec5SDimitry Andric #endif
11480b57cec5SDimitry Andric   else
114981ad6265SDimitry Andric     __result = vec_mergel(__A, __B);
11500b57cec5SDimitry Andric 
115181ad6265SDimitry Andric   return __result;
11520b57cec5SDimitry Andric }
11530b57cec5SDimitry Andric 
115481ad6265SDimitry Andric extern __inline __m128d
115581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pd(__m128d __A,__m128d __B)115681ad6265SDimitry Andric     _mm_unpackhi_pd(__m128d __A, __m128d __B) {
11570b57cec5SDimitry Andric   return (__m128d)vec_mergel((__v2df)__A, (__v2df)__B);
11580b57cec5SDimitry Andric }
11590b57cec5SDimitry Andric 
116081ad6265SDimitry Andric extern __inline __m128d
116181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pd(__m128d __A,__m128d __B)116281ad6265SDimitry Andric     _mm_unpacklo_pd(__m128d __A, __m128d __B) {
11630b57cec5SDimitry Andric   return (__m128d)vec_mergeh((__v2df)__A, (__v2df)__B);
11640b57cec5SDimitry Andric }
11650b57cec5SDimitry Andric 
116681ad6265SDimitry Andric extern __inline __m128d
116781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pd(__m128d __A,double const * __B)116881ad6265SDimitry Andric     _mm_loadh_pd(__m128d __A, double const *__B) {
116981ad6265SDimitry Andric   __v2df __result = (__v2df)__A;
117081ad6265SDimitry Andric   __result[1] = *__B;
117181ad6265SDimitry Andric   return (__m128d)__result;
11720b57cec5SDimitry Andric }
11730b57cec5SDimitry Andric 
117481ad6265SDimitry Andric extern __inline __m128d
117581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pd(__m128d __A,double const * __B)117681ad6265SDimitry Andric     _mm_loadl_pd(__m128d __A, double const *__B) {
117781ad6265SDimitry Andric   __v2df __result = (__v2df)__A;
117881ad6265SDimitry Andric   __result[0] = *__B;
117981ad6265SDimitry Andric   return (__m128d)__result;
11800b57cec5SDimitry Andric }
11810b57cec5SDimitry Andric 
11820b57cec5SDimitry Andric #ifdef _ARCH_PWR8
11830b57cec5SDimitry Andric /* Intrinsic functions that require PowerISA 2.07 minimum.  */
11840b57cec5SDimitry Andric 
11850b57cec5SDimitry Andric /* Creates a 2-bit mask from the most significant bits of the DPFP values.  */
118681ad6265SDimitry Andric extern __inline int
118781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pd(__m128d __A)118881ad6265SDimitry Andric     _mm_movemask_pd(__m128d __A) {
118981ad6265SDimitry Andric #ifdef _ARCH_PWR10
119081ad6265SDimitry Andric   return vec_extractm((__v2du)__A);
119181ad6265SDimitry Andric #else
119281ad6265SDimitry Andric   __vector unsigned long long __result;
119381ad6265SDimitry Andric   static const __vector unsigned int __perm_mask = {
11940b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
11950b57cec5SDimitry Andric       0x80800040, 0x80808080, 0x80808080, 0x80808080
11960b57cec5SDimitry Andric #else
11970b57cec5SDimitry Andric       0x80808080, 0x80808080, 0x80808080, 0x80804000
11980b57cec5SDimitry Andric #endif
11990b57cec5SDimitry Andric   };
12000b57cec5SDimitry Andric 
120181ad6265SDimitry Andric   __result = ((__vector unsigned long long)vec_vbpermq(
120281ad6265SDimitry Andric       (__vector unsigned char)__A, (__vector unsigned char)__perm_mask));
12030b57cec5SDimitry Andric 
12040b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
120581ad6265SDimitry Andric   return __result[1];
12060b57cec5SDimitry Andric #else
120781ad6265SDimitry Andric   return __result[0];
12080b57cec5SDimitry Andric #endif
120981ad6265SDimitry Andric #endif /* !_ARCH_PWR10 */
12100b57cec5SDimitry Andric }
12110b57cec5SDimitry Andric #endif /* _ARCH_PWR8 */
12120b57cec5SDimitry Andric 
121381ad6265SDimitry Andric extern __inline __m128i
121481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi16(__m128i __A,__m128i __B)121581ad6265SDimitry Andric     _mm_packs_epi16(__m128i __A, __m128i __B) {
12160b57cec5SDimitry Andric   return (__m128i)vec_packs((__v8hi)__A, (__v8hi)__B);
12170b57cec5SDimitry Andric }
12180b57cec5SDimitry Andric 
121981ad6265SDimitry Andric extern __inline __m128i
122081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi32(__m128i __A,__m128i __B)122181ad6265SDimitry Andric     _mm_packs_epi32(__m128i __A, __m128i __B) {
12220b57cec5SDimitry Andric   return (__m128i)vec_packs((__v4si)__A, (__v4si)__B);
12230b57cec5SDimitry Andric }
12240b57cec5SDimitry Andric 
122581ad6265SDimitry Andric extern __inline __m128i
122681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi16(__m128i __A,__m128i __B)122781ad6265SDimitry Andric     _mm_packus_epi16(__m128i __A, __m128i __B) {
12280b57cec5SDimitry Andric   return (__m128i)vec_packsu((__v8hi)__A, (__v8hi)__B);
12290b57cec5SDimitry Andric }
12300b57cec5SDimitry Andric 
123181ad6265SDimitry Andric extern __inline __m128i
123281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi8(__m128i __A,__m128i __B)123381ad6265SDimitry Andric     _mm_unpackhi_epi8(__m128i __A, __m128i __B) {
12340b57cec5SDimitry Andric   return (__m128i)vec_mergel((__v16qu)__A, (__v16qu)__B);
12350b57cec5SDimitry Andric }
12360b57cec5SDimitry Andric 
123781ad6265SDimitry Andric extern __inline __m128i
123881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi16(__m128i __A,__m128i __B)123981ad6265SDimitry Andric     _mm_unpackhi_epi16(__m128i __A, __m128i __B) {
12400b57cec5SDimitry Andric   return (__m128i)vec_mergel((__v8hu)__A, (__v8hu)__B);
12410b57cec5SDimitry Andric }
12420b57cec5SDimitry Andric 
124381ad6265SDimitry Andric extern __inline __m128i
124481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi32(__m128i __A,__m128i __B)124581ad6265SDimitry Andric     _mm_unpackhi_epi32(__m128i __A, __m128i __B) {
12460b57cec5SDimitry Andric   return (__m128i)vec_mergel((__v4su)__A, (__v4su)__B);
12470b57cec5SDimitry Andric }
12480b57cec5SDimitry Andric 
124981ad6265SDimitry Andric extern __inline __m128i
125081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi64(__m128i __A,__m128i __B)125181ad6265SDimitry Andric     _mm_unpackhi_epi64(__m128i __A, __m128i __B) {
125281ad6265SDimitry Andric   return (__m128i)vec_mergel((__vector long long)__A, (__vector long long)__B);
12530b57cec5SDimitry Andric }
12540b57cec5SDimitry Andric 
125581ad6265SDimitry Andric extern __inline __m128i
125681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi8(__m128i __A,__m128i __B)125781ad6265SDimitry Andric     _mm_unpacklo_epi8(__m128i __A, __m128i __B) {
12580b57cec5SDimitry Andric   return (__m128i)vec_mergeh((__v16qu)__A, (__v16qu)__B);
12590b57cec5SDimitry Andric }
12600b57cec5SDimitry Andric 
126181ad6265SDimitry Andric extern __inline __m128i
126281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi16(__m128i __A,__m128i __B)126381ad6265SDimitry Andric     _mm_unpacklo_epi16(__m128i __A, __m128i __B) {
12640b57cec5SDimitry Andric   return (__m128i)vec_mergeh((__v8hi)__A, (__v8hi)__B);
12650b57cec5SDimitry Andric }
12660b57cec5SDimitry Andric 
126781ad6265SDimitry Andric extern __inline __m128i
126881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi32(__m128i __A,__m128i __B)126981ad6265SDimitry Andric     _mm_unpacklo_epi32(__m128i __A, __m128i __B) {
12700b57cec5SDimitry Andric   return (__m128i)vec_mergeh((__v4si)__A, (__v4si)__B);
12710b57cec5SDimitry Andric }
12720b57cec5SDimitry Andric 
127381ad6265SDimitry Andric extern __inline __m128i
127481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi64(__m128i __A,__m128i __B)127581ad6265SDimitry Andric     _mm_unpacklo_epi64(__m128i __A, __m128i __B) {
127681ad6265SDimitry Andric   return (__m128i)vec_mergeh((__vector long long)__A, (__vector long long)__B);
12770b57cec5SDimitry Andric }
12780b57cec5SDimitry Andric 
127981ad6265SDimitry Andric extern __inline __m128i
128081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi8(__m128i __A,__m128i __B)128181ad6265SDimitry Andric     _mm_add_epi8(__m128i __A, __m128i __B) {
12820b57cec5SDimitry Andric   return (__m128i)((__v16qu)__A + (__v16qu)__B);
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric 
128581ad6265SDimitry Andric extern __inline __m128i
128681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi16(__m128i __A,__m128i __B)128781ad6265SDimitry Andric     _mm_add_epi16(__m128i __A, __m128i __B) {
12880b57cec5SDimitry Andric   return (__m128i)((__v8hu)__A + (__v8hu)__B);
12890b57cec5SDimitry Andric }
12900b57cec5SDimitry Andric 
129181ad6265SDimitry Andric extern __inline __m128i
129281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi32(__m128i __A,__m128i __B)129381ad6265SDimitry Andric     _mm_add_epi32(__m128i __A, __m128i __B) {
12940b57cec5SDimitry Andric   return (__m128i)((__v4su)__A + (__v4su)__B);
12950b57cec5SDimitry Andric }
12960b57cec5SDimitry Andric 
129781ad6265SDimitry Andric extern __inline __m128i
129881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi64(__m128i __A,__m128i __B)129981ad6265SDimitry Andric     _mm_add_epi64(__m128i __A, __m128i __B) {
13000b57cec5SDimitry Andric   return (__m128i)((__v2du)__A + (__v2du)__B);
13010b57cec5SDimitry Andric }
13020b57cec5SDimitry Andric 
130381ad6265SDimitry Andric extern __inline __m128i
130481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi8(__m128i __A,__m128i __B)130581ad6265SDimitry Andric     _mm_adds_epi8(__m128i __A, __m128i __B) {
13060b57cec5SDimitry Andric   return (__m128i)vec_adds((__v16qi)__A, (__v16qi)__B);
13070b57cec5SDimitry Andric }
13080b57cec5SDimitry Andric 
130981ad6265SDimitry Andric extern __inline __m128i
131081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi16(__m128i __A,__m128i __B)131181ad6265SDimitry Andric     _mm_adds_epi16(__m128i __A, __m128i __B) {
13120b57cec5SDimitry Andric   return (__m128i)vec_adds((__v8hi)__A, (__v8hi)__B);
13130b57cec5SDimitry Andric }
13140b57cec5SDimitry Andric 
131581ad6265SDimitry Andric extern __inline __m128i
131681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu8(__m128i __A,__m128i __B)131781ad6265SDimitry Andric     _mm_adds_epu8(__m128i __A, __m128i __B) {
13180b57cec5SDimitry Andric   return (__m128i)vec_adds((__v16qu)__A, (__v16qu)__B);
13190b57cec5SDimitry Andric }
13200b57cec5SDimitry Andric 
132181ad6265SDimitry Andric extern __inline __m128i
132281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu16(__m128i __A,__m128i __B)132381ad6265SDimitry Andric     _mm_adds_epu16(__m128i __A, __m128i __B) {
13240b57cec5SDimitry Andric   return (__m128i)vec_adds((__v8hu)__A, (__v8hu)__B);
13250b57cec5SDimitry Andric }
13260b57cec5SDimitry Andric 
132781ad6265SDimitry Andric extern __inline __m128i
132881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi8(__m128i __A,__m128i __B)132981ad6265SDimitry Andric     _mm_sub_epi8(__m128i __A, __m128i __B) {
13300b57cec5SDimitry Andric   return (__m128i)((__v16qu)__A - (__v16qu)__B);
13310b57cec5SDimitry Andric }
13320b57cec5SDimitry Andric 
133381ad6265SDimitry Andric extern __inline __m128i
133481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi16(__m128i __A,__m128i __B)133581ad6265SDimitry Andric     _mm_sub_epi16(__m128i __A, __m128i __B) {
13360b57cec5SDimitry Andric   return (__m128i)((__v8hu)__A - (__v8hu)__B);
13370b57cec5SDimitry Andric }
13380b57cec5SDimitry Andric 
133981ad6265SDimitry Andric extern __inline __m128i
134081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi32(__m128i __A,__m128i __B)134181ad6265SDimitry Andric     _mm_sub_epi32(__m128i __A, __m128i __B) {
13420b57cec5SDimitry Andric   return (__m128i)((__v4su)__A - (__v4su)__B);
13430b57cec5SDimitry Andric }
13440b57cec5SDimitry Andric 
134581ad6265SDimitry Andric extern __inline __m128i
134681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi64(__m128i __A,__m128i __B)134781ad6265SDimitry Andric     _mm_sub_epi64(__m128i __A, __m128i __B) {
13480b57cec5SDimitry Andric   return (__m128i)((__v2du)__A - (__v2du)__B);
13490b57cec5SDimitry Andric }
13500b57cec5SDimitry Andric 
135181ad6265SDimitry Andric extern __inline __m128i
135281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi8(__m128i __A,__m128i __B)135381ad6265SDimitry Andric     _mm_subs_epi8(__m128i __A, __m128i __B) {
13540b57cec5SDimitry Andric   return (__m128i)vec_subs((__v16qi)__A, (__v16qi)__B);
13550b57cec5SDimitry Andric }
13560b57cec5SDimitry Andric 
135781ad6265SDimitry Andric extern __inline __m128i
135881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi16(__m128i __A,__m128i __B)135981ad6265SDimitry Andric     _mm_subs_epi16(__m128i __A, __m128i __B) {
13600b57cec5SDimitry Andric   return (__m128i)vec_subs((__v8hi)__A, (__v8hi)__B);
13610b57cec5SDimitry Andric }
13620b57cec5SDimitry Andric 
136381ad6265SDimitry Andric extern __inline __m128i
136481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu8(__m128i __A,__m128i __B)136581ad6265SDimitry Andric     _mm_subs_epu8(__m128i __A, __m128i __B) {
13660b57cec5SDimitry Andric   return (__m128i)vec_subs((__v16qu)__A, (__v16qu)__B);
13670b57cec5SDimitry Andric }
13680b57cec5SDimitry Andric 
136981ad6265SDimitry Andric extern __inline __m128i
137081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu16(__m128i __A,__m128i __B)137181ad6265SDimitry Andric     _mm_subs_epu16(__m128i __A, __m128i __B) {
13720b57cec5SDimitry Andric   return (__m128i)vec_subs((__v8hu)__A, (__v8hu)__B);
13730b57cec5SDimitry Andric }
13740b57cec5SDimitry Andric 
137581ad6265SDimitry Andric extern __inline __m128i
137681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_epi16(__m128i __A,__m128i __B)137781ad6265SDimitry Andric     _mm_madd_epi16(__m128i __A, __m128i __B) {
137881ad6265SDimitry Andric   __vector signed int __zero = {0, 0, 0, 0};
13790b57cec5SDimitry Andric 
138081ad6265SDimitry Andric   return (__m128i)vec_vmsumshm((__v8hi)__A, (__v8hi)__B, __zero);
13810b57cec5SDimitry Andric }
13820b57cec5SDimitry Andric 
138381ad6265SDimitry Andric extern __inline __m128i
138481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epi16(__m128i __A,__m128i __B)138581ad6265SDimitry Andric     _mm_mulhi_epi16(__m128i __A, __m128i __B) {
138681ad6265SDimitry Andric   __vector signed int __w0, __w1;
13870b57cec5SDimitry Andric 
138881ad6265SDimitry Andric   __vector unsigned char __xform1 = {
13890b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
139081ad6265SDimitry Andric       0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
139181ad6265SDimitry Andric       0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
13920b57cec5SDimitry Andric #else
139381ad6265SDimitry Andric       0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08,
139481ad6265SDimitry Andric       0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
13950b57cec5SDimitry Andric #endif
13960b57cec5SDimitry Andric   };
13970b57cec5SDimitry Andric 
139881ad6265SDimitry Andric   __w0 = vec_vmulesh((__v8hi)__A, (__v8hi)__B);
139981ad6265SDimitry Andric   __w1 = vec_vmulosh((__v8hi)__A, (__v8hi)__B);
140081ad6265SDimitry Andric   return (__m128i)vec_perm(__w0, __w1, __xform1);
14010b57cec5SDimitry Andric }
14020b57cec5SDimitry Andric 
140381ad6265SDimitry Andric extern __inline __m128i
140481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi16(__m128i __A,__m128i __B)140581ad6265SDimitry Andric     _mm_mullo_epi16(__m128i __A, __m128i __B) {
14060b57cec5SDimitry Andric   return (__m128i)((__v8hi)__A * (__v8hi)__B);
14070b57cec5SDimitry Andric }
14080b57cec5SDimitry Andric 
140981ad6265SDimitry Andric extern __inline __m64
141081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_su32(__m64 __A,__m64 __B)141181ad6265SDimitry Andric     _mm_mul_su32(__m64 __A, __m64 __B) {
141281ad6265SDimitry Andric   unsigned int __a = __A;
141381ad6265SDimitry Andric   unsigned int __b = __B;
14140b57cec5SDimitry Andric 
141581ad6265SDimitry Andric   return ((__m64)__a * (__m64)__b);
14160b57cec5SDimitry Andric }
14170b57cec5SDimitry Andric 
141881ad6265SDimitry Andric #ifdef _ARCH_PWR8
141981ad6265SDimitry Andric extern __inline __m128i
142081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epu32(__m128i __A,__m128i __B)142181ad6265SDimitry Andric     _mm_mul_epu32(__m128i __A, __m128i __B) {
14220b57cec5SDimitry Andric #if __GNUC__ < 8
142381ad6265SDimitry Andric   __v2du __result;
14240b57cec5SDimitry Andric 
14250b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
14260b57cec5SDimitry Andric   /* VMX Vector Multiply Odd Unsigned Word.  */
142781ad6265SDimitry Andric   __asm__("vmulouw %0,%1,%2" : "=v"(__result) : "v"(__A), "v"(__B) :);
14280b57cec5SDimitry Andric #else
14290b57cec5SDimitry Andric   /* VMX Vector Multiply Even Unsigned Word.  */
143081ad6265SDimitry Andric   __asm__("vmuleuw %0,%1,%2" : "=v"(__result) : "v"(__A), "v"(__B) :);
14310b57cec5SDimitry Andric #endif
143281ad6265SDimitry Andric   return (__m128i)__result;
14330b57cec5SDimitry Andric #else
14340b57cec5SDimitry Andric   return (__m128i)vec_mule((__v4su)__A, (__v4su)__B);
14350b57cec5SDimitry Andric #endif
14360b57cec5SDimitry Andric }
143781ad6265SDimitry Andric #endif
14380b57cec5SDimitry Andric 
143981ad6265SDimitry Andric extern __inline __m128i
144081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi16(__m128i __A,int __B)144181ad6265SDimitry Andric     _mm_slli_epi16(__m128i __A, int __B) {
144281ad6265SDimitry Andric   __v8hu __lshift;
144381ad6265SDimitry Andric   __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0};
14440b57cec5SDimitry Andric 
144581ad6265SDimitry Andric   if (__B >= 0 && __B < 16) {
14460b57cec5SDimitry Andric     if (__builtin_constant_p(__B))
144781ad6265SDimitry Andric       __lshift = (__v8hu)vec_splat_s16(__B);
14480b57cec5SDimitry Andric     else
144981ad6265SDimitry Andric       __lshift = vec_splats((unsigned short)__B);
14500b57cec5SDimitry Andric 
145181ad6265SDimitry Andric     __result = vec_sl((__v8hi)__A, __lshift);
14520b57cec5SDimitry Andric   }
14530b57cec5SDimitry Andric 
145481ad6265SDimitry Andric   return (__m128i)__result;
14550b57cec5SDimitry Andric }
14560b57cec5SDimitry Andric 
145781ad6265SDimitry Andric extern __inline __m128i
145881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi32(__m128i __A,int __B)145981ad6265SDimitry Andric     _mm_slli_epi32(__m128i __A, int __B) {
146081ad6265SDimitry Andric   __v4su __lshift;
146181ad6265SDimitry Andric   __v4si __result = {0, 0, 0, 0};
14620b57cec5SDimitry Andric 
146381ad6265SDimitry Andric   if (__B >= 0 && __B < 32) {
14640b57cec5SDimitry Andric     if (__builtin_constant_p(__B) && __B < 16)
146581ad6265SDimitry Andric       __lshift = (__v4su)vec_splat_s32(__B);
14660b57cec5SDimitry Andric     else
146781ad6265SDimitry Andric       __lshift = vec_splats((unsigned int)__B);
14680b57cec5SDimitry Andric 
146981ad6265SDimitry Andric     __result = vec_sl((__v4si)__A, __lshift);
14700b57cec5SDimitry Andric   }
14710b57cec5SDimitry Andric 
147281ad6265SDimitry Andric   return (__m128i)__result;
14730b57cec5SDimitry Andric }
14740b57cec5SDimitry Andric 
14750b57cec5SDimitry Andric #ifdef _ARCH_PWR8
147681ad6265SDimitry Andric extern __inline __m128i
147781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi64(__m128i __A,int __B)147881ad6265SDimitry Andric     _mm_slli_epi64(__m128i __A, int __B) {
147981ad6265SDimitry Andric   __v2du __lshift;
148081ad6265SDimitry Andric   __v2di __result = {0, 0};
14810b57cec5SDimitry Andric 
148281ad6265SDimitry Andric   if (__B >= 0 && __B < 64) {
14830b57cec5SDimitry Andric     if (__builtin_constant_p(__B) && __B < 16)
148481ad6265SDimitry Andric       __lshift = (__v2du)vec_splat_s32(__B);
14850b57cec5SDimitry Andric     else
148681ad6265SDimitry Andric       __lshift = (__v2du)vec_splats((unsigned int)__B);
14870b57cec5SDimitry Andric 
148881ad6265SDimitry Andric     __result = vec_sl((__v2di)__A, __lshift);
14890b57cec5SDimitry Andric   }
14900b57cec5SDimitry Andric 
149181ad6265SDimitry Andric   return (__m128i)__result;
14920b57cec5SDimitry Andric }
14930b57cec5SDimitry Andric #endif
14940b57cec5SDimitry Andric 
149581ad6265SDimitry Andric extern __inline __m128i
149681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi16(__m128i __A,int __B)149781ad6265SDimitry Andric     _mm_srai_epi16(__m128i __A, int __B) {
149881ad6265SDimitry Andric   __v8hu __rshift = {15, 15, 15, 15, 15, 15, 15, 15};
149981ad6265SDimitry Andric   __v8hi __result;
15000b57cec5SDimitry Andric 
150181ad6265SDimitry Andric   if (__B < 16) {
15020b57cec5SDimitry Andric     if (__builtin_constant_p(__B))
150381ad6265SDimitry Andric       __rshift = (__v8hu)vec_splat_s16(__B);
15040b57cec5SDimitry Andric     else
150581ad6265SDimitry Andric       __rshift = vec_splats((unsigned short)__B);
15060b57cec5SDimitry Andric   }
150781ad6265SDimitry Andric   __result = vec_sra((__v8hi)__A, __rshift);
15080b57cec5SDimitry Andric 
150981ad6265SDimitry Andric   return (__m128i)__result;
15100b57cec5SDimitry Andric }
15110b57cec5SDimitry Andric 
151281ad6265SDimitry Andric extern __inline __m128i
151381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi32(__m128i __A,int __B)151481ad6265SDimitry Andric     _mm_srai_epi32(__m128i __A, int __B) {
151581ad6265SDimitry Andric   __v4su __rshift = {31, 31, 31, 31};
151681ad6265SDimitry Andric   __v4si __result;
15170b57cec5SDimitry Andric 
151881ad6265SDimitry Andric   if (__B < 32) {
151981ad6265SDimitry Andric     if (__builtin_constant_p(__B)) {
15200b57cec5SDimitry Andric       if (__B < 16)
152181ad6265SDimitry Andric         __rshift = (__v4su)vec_splat_s32(__B);
15220b57cec5SDimitry Andric       else
152381ad6265SDimitry Andric         __rshift = (__v4su)vec_splats((unsigned int)__B);
152481ad6265SDimitry Andric     } else
152581ad6265SDimitry Andric       __rshift = vec_splats((unsigned int)__B);
15260b57cec5SDimitry Andric   }
152781ad6265SDimitry Andric   __result = vec_sra((__v4si)__A, __rshift);
15280b57cec5SDimitry Andric 
152981ad6265SDimitry Andric   return (__m128i)__result;
15300b57cec5SDimitry Andric }
15310b57cec5SDimitry Andric 
153281ad6265SDimitry Andric extern __inline __m128i
153381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_bslli_si128(__m128i __A,const int __N)153481ad6265SDimitry Andric     _mm_bslli_si128(__m128i __A, const int __N) {
153581ad6265SDimitry Andric   __v16qu __result;
153681ad6265SDimitry Andric   const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
15370b57cec5SDimitry Andric 
15380b57cec5SDimitry Andric   if (__N < 16)
153981ad6265SDimitry Andric     __result = vec_sld((__v16qu)__A, __zeros, __N);
15400b57cec5SDimitry Andric   else
154181ad6265SDimitry Andric     __result = __zeros;
15420b57cec5SDimitry Andric 
154381ad6265SDimitry Andric   return (__m128i)__result;
15440b57cec5SDimitry Andric }
15450b57cec5SDimitry Andric 
154681ad6265SDimitry Andric extern __inline __m128i
154781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_bsrli_si128(__m128i __A,const int __N)154881ad6265SDimitry Andric     _mm_bsrli_si128(__m128i __A, const int __N) {
154981ad6265SDimitry Andric   __v16qu __result;
155081ad6265SDimitry Andric   const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
15510b57cec5SDimitry Andric 
15520b57cec5SDimitry Andric   if (__N < 16)
15530b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
15540b57cec5SDimitry Andric     if (__builtin_constant_p(__N))
15550b57cec5SDimitry Andric       /* Would like to use Vector Shift Left Double by Octet
15560b57cec5SDimitry Andric          Immediate here to use the immediate form and avoid
15570b57cec5SDimitry Andric          load of __N * 8 value into a separate VR.  */
155881ad6265SDimitry Andric       __result = vec_sld(__zeros, (__v16qu)__A, (16 - __N));
15590b57cec5SDimitry Andric     else
15600b57cec5SDimitry Andric #endif
15610b57cec5SDimitry Andric     {
156281ad6265SDimitry Andric       __v16qu __shift = vec_splats((unsigned char)(__N * 8));
15630b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
156481ad6265SDimitry Andric       __result = vec_sro((__v16qu)__A, __shift);
15650b57cec5SDimitry Andric #else
156681ad6265SDimitry Andric     __result = vec_slo((__v16qu)__A, __shift);
15670b57cec5SDimitry Andric #endif
15680b57cec5SDimitry Andric     }
15690b57cec5SDimitry Andric   else
157081ad6265SDimitry Andric     __result = __zeros;
15710b57cec5SDimitry Andric 
157281ad6265SDimitry Andric   return (__m128i)__result;
15730b57cec5SDimitry Andric }
15740b57cec5SDimitry Andric 
157581ad6265SDimitry Andric extern __inline __m128i
157681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si128(__m128i __A,const int __N)157781ad6265SDimitry Andric     _mm_srli_si128(__m128i __A, const int __N) {
15780b57cec5SDimitry Andric   return _mm_bsrli_si128(__A, __N);
15790b57cec5SDimitry Andric }
15800b57cec5SDimitry Andric 
158181ad6265SDimitry Andric extern __inline __m128i
158281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si128(__m128i __A,const int _imm5)158381ad6265SDimitry Andric     _mm_slli_si128(__m128i __A, const int _imm5) {
158481ad6265SDimitry Andric   __v16qu __result;
158581ad6265SDimitry Andric   const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   if (_imm5 < 16)
15880b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
158981ad6265SDimitry Andric     __result = vec_sld((__v16qu)__A, __zeros, _imm5);
15900b57cec5SDimitry Andric #else
159181ad6265SDimitry Andric     __result = vec_sld(__zeros, (__v16qu)__A, (16 - _imm5));
15920b57cec5SDimitry Andric #endif
15930b57cec5SDimitry Andric   else
159481ad6265SDimitry Andric     __result = __zeros;
15950b57cec5SDimitry Andric 
159681ad6265SDimitry Andric   return (__m128i)__result;
15970b57cec5SDimitry Andric }
15980b57cec5SDimitry Andric 
159981ad6265SDimitry Andric extern __inline __m128i
160081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
16010b57cec5SDimitry Andric 
_mm_srli_epi16(__m128i __A,int __B)160281ad6265SDimitry Andric     _mm_srli_epi16(__m128i __A, int __B) {
160381ad6265SDimitry Andric   __v8hu __rshift;
160481ad6265SDimitry Andric   __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0};
16050b57cec5SDimitry Andric 
160681ad6265SDimitry Andric   if (__B < 16) {
16070b57cec5SDimitry Andric     if (__builtin_constant_p(__B))
160881ad6265SDimitry Andric       __rshift = (__v8hu)vec_splat_s16(__B);
16090b57cec5SDimitry Andric     else
161081ad6265SDimitry Andric       __rshift = vec_splats((unsigned short)__B);
16110b57cec5SDimitry Andric 
161281ad6265SDimitry Andric     __result = vec_sr((__v8hi)__A, __rshift);
16130b57cec5SDimitry Andric   }
16140b57cec5SDimitry Andric 
161581ad6265SDimitry Andric   return (__m128i)__result;
16160b57cec5SDimitry Andric }
16170b57cec5SDimitry Andric 
161881ad6265SDimitry Andric extern __inline __m128i
161981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi32(__m128i __A,int __B)162081ad6265SDimitry Andric     _mm_srli_epi32(__m128i __A, int __B) {
162181ad6265SDimitry Andric   __v4su __rshift;
162281ad6265SDimitry Andric   __v4si __result = {0, 0, 0, 0};
16230b57cec5SDimitry Andric 
162481ad6265SDimitry Andric   if (__B < 32) {
162581ad6265SDimitry Andric     if (__builtin_constant_p(__B)) {
16260b57cec5SDimitry Andric       if (__B < 16)
162781ad6265SDimitry Andric         __rshift = (__v4su)vec_splat_s32(__B);
16280b57cec5SDimitry Andric       else
162981ad6265SDimitry Andric         __rshift = (__v4su)vec_splats((unsigned int)__B);
163081ad6265SDimitry Andric     } else
163181ad6265SDimitry Andric       __rshift = vec_splats((unsigned int)__B);
16320b57cec5SDimitry Andric 
163381ad6265SDimitry Andric     __result = vec_sr((__v4si)__A, __rshift);
16340b57cec5SDimitry Andric   }
16350b57cec5SDimitry Andric 
163681ad6265SDimitry Andric   return (__m128i)__result;
16370b57cec5SDimitry Andric }
16380b57cec5SDimitry Andric 
16390b57cec5SDimitry Andric #ifdef _ARCH_PWR8
164081ad6265SDimitry Andric extern __inline __m128i
164181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi64(__m128i __A,int __B)164281ad6265SDimitry Andric     _mm_srli_epi64(__m128i __A, int __B) {
164381ad6265SDimitry Andric   __v2du __rshift;
164481ad6265SDimitry Andric   __v2di __result = {0, 0};
16450b57cec5SDimitry Andric 
164681ad6265SDimitry Andric   if (__B < 64) {
164781ad6265SDimitry Andric     if (__builtin_constant_p(__B)) {
16480b57cec5SDimitry Andric       if (__B < 16)
164981ad6265SDimitry Andric         __rshift = (__v2du)vec_splat_s32(__B);
16500b57cec5SDimitry Andric       else
165181ad6265SDimitry Andric         __rshift = (__v2du)vec_splats((unsigned long long)__B);
165281ad6265SDimitry Andric     } else
165381ad6265SDimitry Andric       __rshift = (__v2du)vec_splats((unsigned int)__B);
16540b57cec5SDimitry Andric 
165581ad6265SDimitry Andric     __result = vec_sr((__v2di)__A, __rshift);
16560b57cec5SDimitry Andric   }
16570b57cec5SDimitry Andric 
165881ad6265SDimitry Andric   return (__m128i)__result;
16590b57cec5SDimitry Andric }
16600b57cec5SDimitry Andric #endif
16610b57cec5SDimitry Andric 
166281ad6265SDimitry Andric extern __inline __m128i
166381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi16(__m128i __A,__m128i __B)166481ad6265SDimitry Andric     _mm_sll_epi16(__m128i __A, __m128i __B) {
166581ad6265SDimitry Andric   __v8hu __lshift;
166681ad6265SDimitry Andric   __vector __bool short __shmask;
166781ad6265SDimitry Andric   const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15};
166881ad6265SDimitry Andric   __v8hu __result;
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
167181ad6265SDimitry Andric   __lshift = vec_splat((__v8hu)__B, 0);
16720b57cec5SDimitry Andric #else
167381ad6265SDimitry Andric   __lshift = vec_splat((__v8hu)__B, 3);
16740b57cec5SDimitry Andric #endif
167581ad6265SDimitry Andric   __shmask = vec_cmple(__lshift, __shmax);
167681ad6265SDimitry Andric   __result = vec_sl((__v8hu)__A, __lshift);
167781ad6265SDimitry Andric   __result = vec_sel((__v8hu)__shmask, __result, __shmask);
16780b57cec5SDimitry Andric 
167981ad6265SDimitry Andric   return (__m128i)__result;
16800b57cec5SDimitry Andric }
16810b57cec5SDimitry Andric 
168281ad6265SDimitry Andric extern __inline __m128i
168381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi32(__m128i __A,__m128i __B)168481ad6265SDimitry Andric     _mm_sll_epi32(__m128i __A, __m128i __B) {
168581ad6265SDimitry Andric   __v4su __lshift;
168681ad6265SDimitry Andric   __vector __bool int __shmask;
168781ad6265SDimitry Andric   const __v4su __shmax = {32, 32, 32, 32};
168881ad6265SDimitry Andric   __v4su __result;
16890b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
169081ad6265SDimitry Andric   __lshift = vec_splat((__v4su)__B, 0);
16910b57cec5SDimitry Andric #else
169281ad6265SDimitry Andric   __lshift = vec_splat((__v4su)__B, 1);
16930b57cec5SDimitry Andric #endif
169481ad6265SDimitry Andric   __shmask = vec_cmplt(__lshift, __shmax);
169581ad6265SDimitry Andric   __result = vec_sl((__v4su)__A, __lshift);
169681ad6265SDimitry Andric   __result = vec_sel((__v4su)__shmask, __result, __shmask);
16970b57cec5SDimitry Andric 
169881ad6265SDimitry Andric   return (__m128i)__result;
16990b57cec5SDimitry Andric }
17000b57cec5SDimitry Andric 
17010b57cec5SDimitry Andric #ifdef _ARCH_PWR8
170281ad6265SDimitry Andric extern __inline __m128i
170381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi64(__m128i __A,__m128i __B)170481ad6265SDimitry Andric     _mm_sll_epi64(__m128i __A, __m128i __B) {
170581ad6265SDimitry Andric   __v2du __lshift;
170681ad6265SDimitry Andric   __vector __bool long long __shmask;
170781ad6265SDimitry Andric   const __v2du __shmax = {64, 64};
170881ad6265SDimitry Andric   __v2du __result;
17090b57cec5SDimitry Andric 
171081ad6265SDimitry Andric   __lshift = vec_splat((__v2du)__B, 0);
171181ad6265SDimitry Andric   __shmask = vec_cmplt(__lshift, __shmax);
171281ad6265SDimitry Andric   __result = vec_sl((__v2du)__A, __lshift);
171381ad6265SDimitry Andric   __result = vec_sel((__v2du)__shmask, __result, __shmask);
17140b57cec5SDimitry Andric 
171581ad6265SDimitry Andric   return (__m128i)__result;
17160b57cec5SDimitry Andric }
17170b57cec5SDimitry Andric #endif
17180b57cec5SDimitry Andric 
171981ad6265SDimitry Andric extern __inline __m128i
172081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi16(__m128i __A,__m128i __B)172181ad6265SDimitry Andric     _mm_sra_epi16(__m128i __A, __m128i __B) {
172281ad6265SDimitry Andric   const __v8hu __rshmax = {15, 15, 15, 15, 15, 15, 15, 15};
172381ad6265SDimitry Andric   __v8hu __rshift;
172481ad6265SDimitry Andric   __v8hi __result;
17250b57cec5SDimitry Andric 
17260b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
172781ad6265SDimitry Andric   __rshift = vec_splat((__v8hu)__B, 0);
17280b57cec5SDimitry Andric #else
172981ad6265SDimitry Andric   __rshift = vec_splat((__v8hu)__B, 3);
17300b57cec5SDimitry Andric #endif
173181ad6265SDimitry Andric   __rshift = vec_min(__rshift, __rshmax);
173281ad6265SDimitry Andric   __result = vec_sra((__v8hi)__A, __rshift);
17330b57cec5SDimitry Andric 
173481ad6265SDimitry Andric   return (__m128i)__result;
17350b57cec5SDimitry Andric }
17360b57cec5SDimitry Andric 
173781ad6265SDimitry Andric extern __inline __m128i
173881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi32(__m128i __A,__m128i __B)173981ad6265SDimitry Andric     _mm_sra_epi32(__m128i __A, __m128i __B) {
174081ad6265SDimitry Andric   const __v4su __rshmax = {31, 31, 31, 31};
174181ad6265SDimitry Andric   __v4su __rshift;
174281ad6265SDimitry Andric   __v4si __result;
17430b57cec5SDimitry Andric 
17440b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
174581ad6265SDimitry Andric   __rshift = vec_splat((__v4su)__B, 0);
17460b57cec5SDimitry Andric #else
174781ad6265SDimitry Andric   __rshift = vec_splat((__v4su)__B, 1);
17480b57cec5SDimitry Andric #endif
174981ad6265SDimitry Andric   __rshift = vec_min(__rshift, __rshmax);
175081ad6265SDimitry Andric   __result = vec_sra((__v4si)__A, __rshift);
17510b57cec5SDimitry Andric 
175281ad6265SDimitry Andric   return (__m128i)__result;
17530b57cec5SDimitry Andric }
17540b57cec5SDimitry Andric 
175581ad6265SDimitry Andric extern __inline __m128i
175681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi16(__m128i __A,__m128i __B)175781ad6265SDimitry Andric     _mm_srl_epi16(__m128i __A, __m128i __B) {
175881ad6265SDimitry Andric   __v8hu __rshift;
175981ad6265SDimitry Andric   __vector __bool short __shmask;
176081ad6265SDimitry Andric   const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15};
176181ad6265SDimitry Andric   __v8hu __result;
17620b57cec5SDimitry Andric 
17630b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
176481ad6265SDimitry Andric   __rshift = vec_splat((__v8hu)__B, 0);
17650b57cec5SDimitry Andric #else
176681ad6265SDimitry Andric   __rshift = vec_splat((__v8hu)__B, 3);
17670b57cec5SDimitry Andric #endif
176881ad6265SDimitry Andric   __shmask = vec_cmple(__rshift, __shmax);
176981ad6265SDimitry Andric   __result = vec_sr((__v8hu)__A, __rshift);
177081ad6265SDimitry Andric   __result = vec_sel((__v8hu)__shmask, __result, __shmask);
17710b57cec5SDimitry Andric 
177281ad6265SDimitry Andric   return (__m128i)__result;
17730b57cec5SDimitry Andric }
17740b57cec5SDimitry Andric 
177581ad6265SDimitry Andric extern __inline __m128i
177681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi32(__m128i __A,__m128i __B)177781ad6265SDimitry Andric     _mm_srl_epi32(__m128i __A, __m128i __B) {
177881ad6265SDimitry Andric   __v4su __rshift;
177981ad6265SDimitry Andric   __vector __bool int __shmask;
178081ad6265SDimitry Andric   const __v4su __shmax = {32, 32, 32, 32};
178181ad6265SDimitry Andric   __v4su __result;
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
178481ad6265SDimitry Andric   __rshift = vec_splat((__v4su)__B, 0);
17850b57cec5SDimitry Andric #else
178681ad6265SDimitry Andric   __rshift = vec_splat((__v4su)__B, 1);
17870b57cec5SDimitry Andric #endif
178881ad6265SDimitry Andric   __shmask = vec_cmplt(__rshift, __shmax);
178981ad6265SDimitry Andric   __result = vec_sr((__v4su)__A, __rshift);
179081ad6265SDimitry Andric   __result = vec_sel((__v4su)__shmask, __result, __shmask);
17910b57cec5SDimitry Andric 
179281ad6265SDimitry Andric   return (__m128i)__result;
17930b57cec5SDimitry Andric }
17940b57cec5SDimitry Andric 
17950b57cec5SDimitry Andric #ifdef _ARCH_PWR8
179681ad6265SDimitry Andric extern __inline __m128i
179781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi64(__m128i __A,__m128i __B)179881ad6265SDimitry Andric     _mm_srl_epi64(__m128i __A, __m128i __B) {
179981ad6265SDimitry Andric   __v2du __rshift;
180081ad6265SDimitry Andric   __vector __bool long long __shmask;
180181ad6265SDimitry Andric   const __v2du __shmax = {64, 64};
180281ad6265SDimitry Andric   __v2du __result;
18030b57cec5SDimitry Andric 
180481ad6265SDimitry Andric   __rshift = vec_splat((__v2du)__B, 0);
180581ad6265SDimitry Andric   __shmask = vec_cmplt(__rshift, __shmax);
180681ad6265SDimitry Andric   __result = vec_sr((__v2du)__A, __rshift);
180781ad6265SDimitry Andric   __result = vec_sel((__v2du)__shmask, __result, __shmask);
18080b57cec5SDimitry Andric 
180981ad6265SDimitry Andric   return (__m128i)__result;
18100b57cec5SDimitry Andric }
18110b57cec5SDimitry Andric #endif
18120b57cec5SDimitry Andric 
181381ad6265SDimitry Andric extern __inline __m128d
181481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_pd(__m128d __A,__m128d __B)181581ad6265SDimitry Andric     _mm_and_pd(__m128d __A, __m128d __B) {
18160b57cec5SDimitry Andric   return (vec_and((__v2df)__A, (__v2df)__B));
18170b57cec5SDimitry Andric }
18180b57cec5SDimitry Andric 
181981ad6265SDimitry Andric extern __inline __m128d
182081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_pd(__m128d __A,__m128d __B)182181ad6265SDimitry Andric     _mm_andnot_pd(__m128d __A, __m128d __B) {
18220b57cec5SDimitry Andric   return (vec_andc((__v2df)__B, (__v2df)__A));
18230b57cec5SDimitry Andric }
18240b57cec5SDimitry Andric 
182581ad6265SDimitry Andric extern __inline __m128d
182681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_pd(__m128d __A,__m128d __B)182781ad6265SDimitry Andric     _mm_or_pd(__m128d __A, __m128d __B) {
18280b57cec5SDimitry Andric   return (vec_or((__v2df)__A, (__v2df)__B));
18290b57cec5SDimitry Andric }
18300b57cec5SDimitry Andric 
183181ad6265SDimitry Andric extern __inline __m128d
183281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_pd(__m128d __A,__m128d __B)183381ad6265SDimitry Andric     _mm_xor_pd(__m128d __A, __m128d __B) {
18340b57cec5SDimitry Andric   return (vec_xor((__v2df)__A, (__v2df)__B));
18350b57cec5SDimitry Andric }
18360b57cec5SDimitry Andric 
183781ad6265SDimitry Andric extern __inline __m128i
183881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si128(__m128i __A,__m128i __B)183981ad6265SDimitry Andric     _mm_and_si128(__m128i __A, __m128i __B) {
18400b57cec5SDimitry Andric   return (__m128i)vec_and((__v2di)__A, (__v2di)__B);
18410b57cec5SDimitry Andric }
18420b57cec5SDimitry Andric 
184381ad6265SDimitry Andric extern __inline __m128i
184481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si128(__m128i __A,__m128i __B)184581ad6265SDimitry Andric     _mm_andnot_si128(__m128i __A, __m128i __B) {
18460b57cec5SDimitry Andric   return (__m128i)vec_andc((__v2di)__B, (__v2di)__A);
18470b57cec5SDimitry Andric }
18480b57cec5SDimitry Andric 
184981ad6265SDimitry Andric extern __inline __m128i
185081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si128(__m128i __A,__m128i __B)185181ad6265SDimitry Andric     _mm_or_si128(__m128i __A, __m128i __B) {
18520b57cec5SDimitry Andric   return (__m128i)vec_or((__v2di)__A, (__v2di)__B);
18530b57cec5SDimitry Andric }
18540b57cec5SDimitry Andric 
185581ad6265SDimitry Andric extern __inline __m128i
185681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si128(__m128i __A,__m128i __B)185781ad6265SDimitry Andric     _mm_xor_si128(__m128i __A, __m128i __B) {
18580b57cec5SDimitry Andric   return (__m128i)vec_xor((__v2di)__A, (__v2di)__B);
18590b57cec5SDimitry Andric }
18600b57cec5SDimitry Andric 
186181ad6265SDimitry Andric extern __inline __m128i
186281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi8(__m128i __A,__m128i __B)186381ad6265SDimitry Andric     _mm_cmpeq_epi8(__m128i __A, __m128i __B) {
18640b57cec5SDimitry Andric   return (__m128i)vec_cmpeq((__v16qi)__A, (__v16qi)__B);
18650b57cec5SDimitry Andric }
18660b57cec5SDimitry Andric 
186781ad6265SDimitry Andric extern __inline __m128i
186881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi16(__m128i __A,__m128i __B)186981ad6265SDimitry Andric     _mm_cmpeq_epi16(__m128i __A, __m128i __B) {
18700b57cec5SDimitry Andric   return (__m128i)vec_cmpeq((__v8hi)__A, (__v8hi)__B);
18710b57cec5SDimitry Andric }
18720b57cec5SDimitry Andric 
187381ad6265SDimitry Andric extern __inline __m128i
187481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi32(__m128i __A,__m128i __B)187581ad6265SDimitry Andric     _mm_cmpeq_epi32(__m128i __A, __m128i __B) {
18760b57cec5SDimitry Andric   return (__m128i)vec_cmpeq((__v4si)__A, (__v4si)__B);
18770b57cec5SDimitry Andric }
18780b57cec5SDimitry Andric 
187981ad6265SDimitry Andric extern __inline __m128i
188081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi8(__m128i __A,__m128i __B)188181ad6265SDimitry Andric     _mm_cmplt_epi8(__m128i __A, __m128i __B) {
18820b57cec5SDimitry Andric   return (__m128i)vec_cmplt((__v16qi)__A, (__v16qi)__B);
18830b57cec5SDimitry Andric }
18840b57cec5SDimitry Andric 
188581ad6265SDimitry Andric extern __inline __m128i
188681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi16(__m128i __A,__m128i __B)188781ad6265SDimitry Andric     _mm_cmplt_epi16(__m128i __A, __m128i __B) {
18880b57cec5SDimitry Andric   return (__m128i)vec_cmplt((__v8hi)__A, (__v8hi)__B);
18890b57cec5SDimitry Andric }
18900b57cec5SDimitry Andric 
189181ad6265SDimitry Andric extern __inline __m128i
189281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi32(__m128i __A,__m128i __B)189381ad6265SDimitry Andric     _mm_cmplt_epi32(__m128i __A, __m128i __B) {
18940b57cec5SDimitry Andric   return (__m128i)vec_cmplt((__v4si)__A, (__v4si)__B);
18950b57cec5SDimitry Andric }
18960b57cec5SDimitry Andric 
189781ad6265SDimitry Andric extern __inline __m128i
189881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi8(__m128i __A,__m128i __B)189981ad6265SDimitry Andric     _mm_cmpgt_epi8(__m128i __A, __m128i __B) {
19000b57cec5SDimitry Andric   return (__m128i)vec_cmpgt((__v16qi)__A, (__v16qi)__B);
19010b57cec5SDimitry Andric }
19020b57cec5SDimitry Andric 
190381ad6265SDimitry Andric extern __inline __m128i
190481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi16(__m128i __A,__m128i __B)190581ad6265SDimitry Andric     _mm_cmpgt_epi16(__m128i __A, __m128i __B) {
19060b57cec5SDimitry Andric   return (__m128i)vec_cmpgt((__v8hi)__A, (__v8hi)__B);
19070b57cec5SDimitry Andric }
19080b57cec5SDimitry Andric 
190981ad6265SDimitry Andric extern __inline __m128i
191081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi32(__m128i __A,__m128i __B)191181ad6265SDimitry Andric     _mm_cmpgt_epi32(__m128i __A, __m128i __B) {
19120b57cec5SDimitry Andric   return (__m128i)vec_cmpgt((__v4si)__A, (__v4si)__B);
19130b57cec5SDimitry Andric }
19140b57cec5SDimitry Andric 
191581ad6265SDimitry Andric extern __inline int
191681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi16(__m128i const __A,int const __N)191781ad6265SDimitry Andric     _mm_extract_epi16(__m128i const __A, int const __N) {
19180b57cec5SDimitry Andric   return (unsigned short)((__v8hi)__A)[__N & 7];
19190b57cec5SDimitry Andric }
19200b57cec5SDimitry Andric 
192181ad6265SDimitry Andric extern __inline __m128i
192281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi16(__m128i const __A,int const __D,int const __N)192381ad6265SDimitry Andric     _mm_insert_epi16(__m128i const __A, int const __D, int const __N) {
192481ad6265SDimitry Andric   __v8hi __result = (__v8hi)__A;
19250b57cec5SDimitry Andric 
192681ad6265SDimitry Andric   __result[(__N & 7)] = __D;
19270b57cec5SDimitry Andric 
192881ad6265SDimitry Andric   return (__m128i)__result;
19290b57cec5SDimitry Andric }
19300b57cec5SDimitry Andric 
193181ad6265SDimitry Andric extern __inline __m128i
193281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi16(__m128i __A,__m128i __B)193381ad6265SDimitry Andric     _mm_max_epi16(__m128i __A, __m128i __B) {
19340b57cec5SDimitry Andric   return (__m128i)vec_max((__v8hi)__A, (__v8hi)__B);
19350b57cec5SDimitry Andric }
19360b57cec5SDimitry Andric 
193781ad6265SDimitry Andric extern __inline __m128i
193881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu8(__m128i __A,__m128i __B)193981ad6265SDimitry Andric     _mm_max_epu8(__m128i __A, __m128i __B) {
19400b57cec5SDimitry Andric   return (__m128i)vec_max((__v16qu)__A, (__v16qu)__B);
19410b57cec5SDimitry Andric }
19420b57cec5SDimitry Andric 
194381ad6265SDimitry Andric extern __inline __m128i
194481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi16(__m128i __A,__m128i __B)194581ad6265SDimitry Andric     _mm_min_epi16(__m128i __A, __m128i __B) {
19460b57cec5SDimitry Andric   return (__m128i)vec_min((__v8hi)__A, (__v8hi)__B);
19470b57cec5SDimitry Andric }
19480b57cec5SDimitry Andric 
194981ad6265SDimitry Andric extern __inline __m128i
195081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu8(__m128i __A,__m128i __B)195181ad6265SDimitry Andric     _mm_min_epu8(__m128i __A, __m128i __B) {
19520b57cec5SDimitry Andric   return (__m128i)vec_min((__v16qu)__A, (__v16qu)__B);
19530b57cec5SDimitry Andric }
19540b57cec5SDimitry Andric 
19550b57cec5SDimitry Andric #ifdef _ARCH_PWR8
19560b57cec5SDimitry Andric /* Intrinsic functions that require PowerISA 2.07 minimum.  */
19570b57cec5SDimitry Andric 
195881ad6265SDimitry Andric /* Return a mask created from the most significant bit of each 8-bit
195981ad6265SDimitry Andric    element in A.  */
196081ad6265SDimitry Andric extern __inline int
196181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_epi8(__m128i __A)196281ad6265SDimitry Andric     _mm_movemask_epi8(__m128i __A) {
196381ad6265SDimitry Andric #ifdef _ARCH_PWR10
196481ad6265SDimitry Andric   return vec_extractm((__v16qu)__A);
196581ad6265SDimitry Andric #else
196681ad6265SDimitry Andric   __vector unsigned long long __result;
196781ad6265SDimitry Andric   static const __vector unsigned char __perm_mask = {
19680b57cec5SDimitry Andric       0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
196981ad6265SDimitry Andric       0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00};
19700b57cec5SDimitry Andric 
197181ad6265SDimitry Andric   __result = ((__vector unsigned long long)vec_vbpermq(
197281ad6265SDimitry Andric       (__vector unsigned char)__A, (__vector unsigned char)__perm_mask));
19730b57cec5SDimitry Andric 
19740b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
197581ad6265SDimitry Andric   return __result[1];
19760b57cec5SDimitry Andric #else
197781ad6265SDimitry Andric   return __result[0];
19780b57cec5SDimitry Andric #endif
197981ad6265SDimitry Andric #endif /* !_ARCH_PWR10 */
19800b57cec5SDimitry Andric }
19810b57cec5SDimitry Andric #endif /* _ARCH_PWR8 */
19820b57cec5SDimitry Andric 
198381ad6265SDimitry Andric extern __inline __m128i
198481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epu16(__m128i __A,__m128i __B)198581ad6265SDimitry Andric     _mm_mulhi_epu16(__m128i __A, __m128i __B) {
198681ad6265SDimitry Andric   __v4su __w0, __w1;
198781ad6265SDimitry Andric   __v16qu __xform1 = {
19880b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
198981ad6265SDimitry Andric       0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
199081ad6265SDimitry Andric       0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
19910b57cec5SDimitry Andric #else
199281ad6265SDimitry Andric       0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08,
199381ad6265SDimitry Andric       0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
19940b57cec5SDimitry Andric #endif
19950b57cec5SDimitry Andric   };
19960b57cec5SDimitry Andric 
199781ad6265SDimitry Andric   __w0 = vec_vmuleuh((__v8hu)__A, (__v8hu)__B);
199881ad6265SDimitry Andric   __w1 = vec_vmulouh((__v8hu)__A, (__v8hu)__B);
199981ad6265SDimitry Andric   return (__m128i)vec_perm(__w0, __w1, __xform1);
20000b57cec5SDimitry Andric }
20010b57cec5SDimitry Andric 
200281ad6265SDimitry Andric extern __inline __m128i
200381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflehi_epi16(__m128i __A,const int __mask)200481ad6265SDimitry Andric     _mm_shufflehi_epi16(__m128i __A, const int __mask) {
200581ad6265SDimitry Andric   unsigned long __element_selector_98 = __mask & 0x03;
200681ad6265SDimitry Andric   unsigned long __element_selector_BA = (__mask >> 2) & 0x03;
200781ad6265SDimitry Andric   unsigned long __element_selector_DC = (__mask >> 4) & 0x03;
200881ad6265SDimitry Andric   unsigned long __element_selector_FE = (__mask >> 6) & 0x03;
200981ad6265SDimitry Andric   static const unsigned short __permute_selectors[4] = {
20100b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
20110b57cec5SDimitry Andric       0x0908, 0x0B0A, 0x0D0C, 0x0F0E
20120b57cec5SDimitry Andric #else
20130b57cec5SDimitry Andric       0x0809, 0x0A0B, 0x0C0D, 0x0E0F
20140b57cec5SDimitry Andric #endif
20150b57cec5SDimitry Andric   };
201681ad6265SDimitry Andric   __v2du __pmask =
20170b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
20180b57cec5SDimitry Andric       {0x1716151413121110UL, 0UL};
20190b57cec5SDimitry Andric #else
20200b57cec5SDimitry Andric       {0x1011121314151617UL, 0UL};
20210b57cec5SDimitry Andric #endif
202281ad6265SDimitry Andric   __m64_union __t;
202381ad6265SDimitry Andric   __v2du __a, __r;
20240b57cec5SDimitry Andric 
202581ad6265SDimitry Andric   __t.as_short[0] = __permute_selectors[__element_selector_98];
202681ad6265SDimitry Andric   __t.as_short[1] = __permute_selectors[__element_selector_BA];
202781ad6265SDimitry Andric   __t.as_short[2] = __permute_selectors[__element_selector_DC];
202881ad6265SDimitry Andric   __t.as_short[3] = __permute_selectors[__element_selector_FE];
202981ad6265SDimitry Andric   __pmask[1] = __t.as_m64;
203081ad6265SDimitry Andric   __a = (__v2du)__A;
203181ad6265SDimitry Andric   __r = vec_perm(__a, __a, (__vector unsigned char)__pmask);
203281ad6265SDimitry Andric   return (__m128i)__r;
20330b57cec5SDimitry Andric }
20340b57cec5SDimitry Andric 
203581ad6265SDimitry Andric extern __inline __m128i
203681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflelo_epi16(__m128i __A,const int __mask)203781ad6265SDimitry Andric     _mm_shufflelo_epi16(__m128i __A, const int __mask) {
203881ad6265SDimitry Andric   unsigned long __element_selector_10 = __mask & 0x03;
203981ad6265SDimitry Andric   unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
204081ad6265SDimitry Andric   unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
204181ad6265SDimitry Andric   unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
204281ad6265SDimitry Andric   static const unsigned short __permute_selectors[4] = {
20430b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
20440b57cec5SDimitry Andric       0x0100, 0x0302, 0x0504, 0x0706
20450b57cec5SDimitry Andric #else
20460b57cec5SDimitry Andric       0x0001, 0x0203, 0x0405, 0x0607
20470b57cec5SDimitry Andric #endif
20480b57cec5SDimitry Andric   };
204981ad6265SDimitry Andric   __v2du __pmask =
20500b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
20510b57cec5SDimitry Andric       {0UL, 0x1f1e1d1c1b1a1918UL};
20520b57cec5SDimitry Andric #else
20530b57cec5SDimitry Andric       {0UL, 0x18191a1b1c1d1e1fUL};
20540b57cec5SDimitry Andric #endif
205581ad6265SDimitry Andric   __m64_union __t;
205681ad6265SDimitry Andric   __v2du __a, __r;
205781ad6265SDimitry Andric   __t.as_short[0] = __permute_selectors[__element_selector_10];
205881ad6265SDimitry Andric   __t.as_short[1] = __permute_selectors[__element_selector_32];
205981ad6265SDimitry Andric   __t.as_short[2] = __permute_selectors[__element_selector_54];
206081ad6265SDimitry Andric   __t.as_short[3] = __permute_selectors[__element_selector_76];
206181ad6265SDimitry Andric   __pmask[0] = __t.as_m64;
206281ad6265SDimitry Andric   __a = (__v2du)__A;
206381ad6265SDimitry Andric   __r = vec_perm(__a, __a, (__vector unsigned char)__pmask);
206481ad6265SDimitry Andric   return (__m128i)__r;
20650b57cec5SDimitry Andric }
20660b57cec5SDimitry Andric 
206781ad6265SDimitry Andric extern __inline __m128i
206881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi32(__m128i __A,const int __mask)206981ad6265SDimitry Andric     _mm_shuffle_epi32(__m128i __A, const int __mask) {
207081ad6265SDimitry Andric   unsigned long __element_selector_10 = __mask & 0x03;
207181ad6265SDimitry Andric   unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
207281ad6265SDimitry Andric   unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
207381ad6265SDimitry Andric   unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
207481ad6265SDimitry Andric   static const unsigned int __permute_selectors[4] = {
20750b57cec5SDimitry Andric #ifdef __LITTLE_ENDIAN__
20760b57cec5SDimitry Andric       0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
20770b57cec5SDimitry Andric #else
20780b57cec5SDimitry Andric       0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
20790b57cec5SDimitry Andric #endif
20800b57cec5SDimitry Andric   };
208181ad6265SDimitry Andric   __v4su __t;
20820b57cec5SDimitry Andric 
208381ad6265SDimitry Andric   __t[0] = __permute_selectors[__element_selector_10];
208481ad6265SDimitry Andric   __t[1] = __permute_selectors[__element_selector_32];
208581ad6265SDimitry Andric   __t[2] = __permute_selectors[__element_selector_54] + 0x10101010;
208681ad6265SDimitry Andric   __t[3] = __permute_selectors[__element_selector_76] + 0x10101010;
208781ad6265SDimitry Andric   return (__m128i)vec_perm((__v4si)__A, (__v4si)__A,
208881ad6265SDimitry Andric                            (__vector unsigned char)__t);
20890b57cec5SDimitry Andric }
20900b57cec5SDimitry Andric 
209181ad6265SDimitry Andric extern __inline void
209281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmoveu_si128(__m128i __A,__m128i __B,char * __C)209381ad6265SDimitry Andric     _mm_maskmoveu_si128(__m128i __A, __m128i __B, char *__C) {
209481ad6265SDimitry Andric   __v2du __hibit = {0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL};
209581ad6265SDimitry Andric   __v16qu __mask, __tmp;
209681ad6265SDimitry Andric   __m128i_u *__p = (__m128i_u *)__C;
20970b57cec5SDimitry Andric 
209881ad6265SDimitry Andric   __tmp = (__v16qu)_mm_loadu_si128(__p);
209981ad6265SDimitry Andric   __mask = (__v16qu)vec_cmpgt((__v16qu)__B, (__v16qu)__hibit);
210081ad6265SDimitry Andric   __tmp = vec_sel(__tmp, (__v16qu)__A, __mask);
210181ad6265SDimitry Andric   _mm_storeu_si128(__p, (__m128i)__tmp);
21020b57cec5SDimitry Andric }
21030b57cec5SDimitry Andric 
210481ad6265SDimitry Andric extern __inline __m128i
210581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu8(__m128i __A,__m128i __B)210681ad6265SDimitry Andric     _mm_avg_epu8(__m128i __A, __m128i __B) {
21070b57cec5SDimitry Andric   return (__m128i)vec_avg((__v16qu)__A, (__v16qu)__B);
21080b57cec5SDimitry Andric }
21090b57cec5SDimitry Andric 
211081ad6265SDimitry Andric extern __inline __m128i
211181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu16(__m128i __A,__m128i __B)211281ad6265SDimitry Andric     _mm_avg_epu16(__m128i __A, __m128i __B) {
21130b57cec5SDimitry Andric   return (__m128i)vec_avg((__v8hu)__A, (__v8hu)__B);
21140b57cec5SDimitry Andric }
21150b57cec5SDimitry Andric 
211681ad6265SDimitry Andric extern __inline __m128i
211781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sad_epu8(__m128i __A,__m128i __B)211881ad6265SDimitry Andric     _mm_sad_epu8(__m128i __A, __m128i __B) {
211981ad6265SDimitry Andric   __v16qu __a, __b;
212081ad6265SDimitry Andric   __v16qu __vabsdiff;
212181ad6265SDimitry Andric   __v4si __vsum;
212281ad6265SDimitry Andric   const __v4su __zero = {0, 0, 0, 0};
212381ad6265SDimitry Andric   __v4si __result;
21240b57cec5SDimitry Andric 
212581ad6265SDimitry Andric   __a = (__v16qu)__A;
212681ad6265SDimitry Andric   __b = (__v16qu)__B;
212781ad6265SDimitry Andric #ifndef _ARCH_PWR9
212881ad6265SDimitry Andric   __v16qu __vmin = vec_min(__a, __b);
212981ad6265SDimitry Andric   __v16qu __vmax = vec_max(__a, __b);
213081ad6265SDimitry Andric   __vabsdiff = vec_sub(__vmax, __vmin);
21310b57cec5SDimitry Andric #else
213281ad6265SDimitry Andric   __vabsdiff = vec_absd(__a, __b);
21330b57cec5SDimitry Andric #endif
213481ad6265SDimitry Andric   /* Sum four groups of bytes into integers.  */
213581ad6265SDimitry Andric   __vsum = (__vector signed int)vec_sum4s(__vabsdiff, __zero);
213681ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
213781ad6265SDimitry Andric   /* Sum across four integers with two integer results.  */
213881ad6265SDimitry Andric   __asm__("vsum2sws %0,%1,%2" : "=v"(__result) : "v"(__vsum), "v"(__zero));
213981ad6265SDimitry Andric   /* Note: vec_sum2s could be used here, but on little-endian, vector
214081ad6265SDimitry Andric      shifts are added that are not needed for this use-case.
214181ad6265SDimitry Andric      A vector shift to correctly position the 32-bit integer results
214281ad6265SDimitry Andric      (currently at [0] and [2]) to [1] and [3] would then need to be
214381ad6265SDimitry Andric      swapped back again since the desired results are two 64-bit
214481ad6265SDimitry Andric      integers ([1]|[0] and [3]|[2]).  Thus, no shift is performed.  */
214581ad6265SDimitry Andric #else
214681ad6265SDimitry Andric   /* Sum across four integers with two integer results.  */
214781ad6265SDimitry Andric   __result = vec_sum2s(__vsum, (__vector signed int)__zero);
21480b57cec5SDimitry Andric   /* Rotate the sums into the correct position.  */
214981ad6265SDimitry Andric   __result = vec_sld(__result, __result, 6);
215081ad6265SDimitry Andric #endif
215181ad6265SDimitry Andric   return (__m128i)__result;
21520b57cec5SDimitry Andric }
21530b57cec5SDimitry Andric 
215481ad6265SDimitry Andric extern __inline void
215581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si32(int * __A,int __B)215681ad6265SDimitry Andric     _mm_stream_si32(int *__A, int __B) {
21570b57cec5SDimitry Andric   /* Use the data cache block touch for store transient.  */
215881ad6265SDimitry Andric   __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory");
21590b57cec5SDimitry Andric   *__A = __B;
21600b57cec5SDimitry Andric }
21610b57cec5SDimitry Andric 
216281ad6265SDimitry Andric extern __inline void
216381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si64(long long int * __A,long long int __B)216481ad6265SDimitry Andric     _mm_stream_si64(long long int *__A, long long int __B) {
21650b57cec5SDimitry Andric   /* Use the data cache block touch for store transient.  */
216681ad6265SDimitry Andric   __asm__("	dcbtstt	0,%0" : : "b"(__A) : "memory");
21670b57cec5SDimitry Andric   *__A = __B;
21680b57cec5SDimitry Andric }
21690b57cec5SDimitry Andric 
217081ad6265SDimitry Andric extern __inline void
217181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si128(__m128i * __A,__m128i __B)217281ad6265SDimitry Andric     _mm_stream_si128(__m128i *__A, __m128i __B) {
21730b57cec5SDimitry Andric   /* Use the data cache block touch for store transient.  */
217481ad6265SDimitry Andric   __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory");
21750b57cec5SDimitry Andric   *__A = __B;
21760b57cec5SDimitry Andric }
21770b57cec5SDimitry Andric 
217881ad6265SDimitry Andric extern __inline void
217981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pd(double * __A,__m128d __B)218081ad6265SDimitry Andric     _mm_stream_pd(double *__A, __m128d __B) {
21810b57cec5SDimitry Andric   /* Use the data cache block touch for store transient.  */
218281ad6265SDimitry Andric   __asm__("dcbtstt 0,%0" : : "b"(__A) : "memory");
21830b57cec5SDimitry Andric   *(__m128d *)__A = __B;
21840b57cec5SDimitry Andric }
21850b57cec5SDimitry Andric 
218681ad6265SDimitry Andric extern __inline void
218781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clflush(void const * __A)218881ad6265SDimitry Andric     _mm_clflush(void const *__A) {
21890b57cec5SDimitry Andric   /* Use the data cache block flush.  */
219081ad6265SDimitry Andric   __asm__("dcbf 0,%0" : : "b"(__A) : "memory");
21910b57cec5SDimitry Andric }
21920b57cec5SDimitry Andric 
219381ad6265SDimitry Andric extern __inline void
219481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lfence(void)219581ad6265SDimitry Andric     _mm_lfence(void) {
21960b57cec5SDimitry Andric   /* Use light weight sync for load to load ordering.  */
21970b57cec5SDimitry Andric   __atomic_thread_fence(__ATOMIC_RELEASE);
21980b57cec5SDimitry Andric }
21990b57cec5SDimitry Andric 
220081ad6265SDimitry Andric extern __inline void
220181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mfence(void)220281ad6265SDimitry Andric     _mm_mfence(void) {
22030b57cec5SDimitry Andric   /* Use heavy weight sync for any to any ordering.  */
22040b57cec5SDimitry Andric   __atomic_thread_fence(__ATOMIC_SEQ_CST);
22050b57cec5SDimitry Andric }
22060b57cec5SDimitry Andric 
220781ad6265SDimitry Andric extern __inline __m128i
220881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si128(int __A)220981ad6265SDimitry Andric     _mm_cvtsi32_si128(int __A) {
22100b57cec5SDimitry Andric   return _mm_set_epi32(0, 0, 0, __A);
22110b57cec5SDimitry Andric }
22120b57cec5SDimitry Andric 
221381ad6265SDimitry Andric extern __inline __m128i
221481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si128(long long __A)221581ad6265SDimitry Andric     _mm_cvtsi64_si128(long long __A) {
22160b57cec5SDimitry Andric   return __extension__(__m128i)(__v2di){__A, 0LL};
22170b57cec5SDimitry Andric }
22180b57cec5SDimitry Andric 
22190b57cec5SDimitry Andric /* Microsoft intrinsic.  */
222081ad6265SDimitry Andric extern __inline __m128i
222181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si128(long long __A)222281ad6265SDimitry Andric     _mm_cvtsi64x_si128(long long __A) {
22230b57cec5SDimitry Andric   return __extension__(__m128i)(__v2di){__A, 0LL};
22240b57cec5SDimitry Andric }
22250b57cec5SDimitry Andric 
22260b57cec5SDimitry Andric /* Casts between various SP, DP, INT vector types.  Note that these do no
22270b57cec5SDimitry Andric    conversion of values, they just change the type.  */
222881ad6265SDimitry Andric extern __inline __m128
222981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_ps(__m128d __A)223081ad6265SDimitry Andric     _mm_castpd_ps(__m128d __A) {
22310b57cec5SDimitry Andric   return (__m128)__A;
22320b57cec5SDimitry Andric }
22330b57cec5SDimitry Andric 
223481ad6265SDimitry Andric extern __inline __m128i
223581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_si128(__m128d __A)223681ad6265SDimitry Andric     _mm_castpd_si128(__m128d __A) {
22370b57cec5SDimitry Andric   return (__m128i)__A;
22380b57cec5SDimitry Andric }
22390b57cec5SDimitry Andric 
224081ad6265SDimitry Andric extern __inline __m128d
224181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_pd(__m128 __A)224281ad6265SDimitry Andric     _mm_castps_pd(__m128 __A) {
22430b57cec5SDimitry Andric   return (__m128d)__A;
22440b57cec5SDimitry Andric }
22450b57cec5SDimitry Andric 
224681ad6265SDimitry Andric extern __inline __m128i
224781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_si128(__m128 __A)224881ad6265SDimitry Andric     _mm_castps_si128(__m128 __A) {
22490b57cec5SDimitry Andric   return (__m128i)__A;
22500b57cec5SDimitry Andric }
22510b57cec5SDimitry Andric 
225281ad6265SDimitry Andric extern __inline __m128
225381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_ps(__m128i __A)225481ad6265SDimitry Andric     _mm_castsi128_ps(__m128i __A) {
22550b57cec5SDimitry Andric   return (__m128)__A;
22560b57cec5SDimitry Andric }
22570b57cec5SDimitry Andric 
225881ad6265SDimitry Andric extern __inline __m128d
225981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_pd(__m128i __A)226081ad6265SDimitry Andric     _mm_castsi128_pd(__m128i __A) {
22610b57cec5SDimitry Andric   return (__m128d)__A;
22620b57cec5SDimitry Andric }
22630b57cec5SDimitry Andric 
2264a7dea167SDimitry Andric #else
2265a7dea167SDimitry Andric #include_next <emmintrin.h>
2266bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) &&                                            \
2267fcaf7f86SDimitry Andric         *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
2268a7dea167SDimitry Andric 
22690b57cec5SDimitry Andric #endif /* EMMINTRIN_H_ */
2270