xref: /freebsd/contrib/llvm-project/clang/lib/Headers/ppc_wrappers/smmintrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1a7dea167SDimitry Andric /*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
2a7dea167SDimitry Andric  *
3a7dea167SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a7dea167SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5a7dea167SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a7dea167SDimitry Andric  *
7a7dea167SDimitry Andric  *===-----------------------------------------------------------------------===
8a7dea167SDimitry Andric  */
9a7dea167SDimitry Andric 
10a7dea167SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler
11a7dea167SDimitry Andric    User Guide and Reference, version 9.0.
12a7dea167SDimitry Andric 
13a7dea167SDimitry Andric    NOTE: This is NOT a complete implementation of the SSE4 intrinsics!  */
14a7dea167SDimitry Andric 
15a7dea167SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS
16a7dea167SDimitry Andric /* This header is distributed to simplify porting x86_64 code that
17*5f757f3fSDimitry Andric    makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
18a7dea167SDimitry Andric 
19a7dea167SDimitry Andric    It is the user's responsibility to determine if the results are
20a7dea167SDimitry Andric    acceptable and make additional changes as necessary.
21a7dea167SDimitry Andric 
22a7dea167SDimitry Andric    Note that much code that uses Intel intrinsics can be rewritten in
23a7dea167SDimitry Andric    standard C or GNU C extensions, which are more portable and better
24a7dea167SDimitry Andric    optimized across multiple targets.  */
25a7dea167SDimitry Andric #error                                                                         \
26a7dea167SDimitry Andric     "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
27a7dea167SDimitry Andric #endif
28a7dea167SDimitry Andric 
29a7dea167SDimitry Andric #ifndef SMMINTRIN_H_
30a7dea167SDimitry Andric #define SMMINTRIN_H_
31a7dea167SDimitry Andric 
32bdd1243dSDimitry Andric #if defined(__powerpc64__) &&                                                  \
33fcaf7f86SDimitry Andric     (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
34a7dea167SDimitry Andric 
35a7dea167SDimitry Andric #include <altivec.h>
36349cc55cSDimitry Andric #include <tmmintrin.h>
37a7dea167SDimitry Andric 
3881ad6265SDimitry Andric /* Rounding mode macros. */
3981ad6265SDimitry Andric #define _MM_FROUND_TO_NEAREST_INT 0x00
4081ad6265SDimitry Andric #define _MM_FROUND_TO_ZERO 0x01
4181ad6265SDimitry Andric #define _MM_FROUND_TO_POS_INF 0x02
4281ad6265SDimitry Andric #define _MM_FROUND_TO_NEG_INF 0x03
4381ad6265SDimitry Andric #define _MM_FROUND_CUR_DIRECTION 0x04
4481ad6265SDimitry Andric 
4581ad6265SDimitry Andric #define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
4681ad6265SDimitry Andric #define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
4781ad6265SDimitry Andric #define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
4881ad6265SDimitry Andric #define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
4981ad6265SDimitry Andric #define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
5081ad6265SDimitry Andric #define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
5181ad6265SDimitry Andric 
5281ad6265SDimitry Andric #define _MM_FROUND_RAISE_EXC 0x00
5381ad6265SDimitry Andric #define _MM_FROUND_NO_EXC 0x08
5481ad6265SDimitry Andric 
5581ad6265SDimitry Andric extern __inline __m128d
5681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5781ad6265SDimitry Andric     _mm_round_pd(__m128d __A, int __rounding) {
5881ad6265SDimitry Andric   __v2df __r;
5981ad6265SDimitry Andric   union {
6081ad6265SDimitry Andric     double __fr;
6181ad6265SDimitry Andric     long long __fpscr;
6281ad6265SDimitry Andric   } __enables_save, __fpscr_save;
6381ad6265SDimitry Andric 
6481ad6265SDimitry Andric   if (__rounding & _MM_FROUND_NO_EXC) {
6581ad6265SDimitry Andric     /* Save enabled exceptions, disable all exceptions,
6681ad6265SDimitry Andric        and preserve the rounding mode.  */
6781ad6265SDimitry Andric #ifdef _ARCH_PWR9
6881ad6265SDimitry Andric     __asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
6981ad6265SDimitry Andric     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
7081ad6265SDimitry Andric #else
71*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffs();
7281ad6265SDimitry Andric     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
7381ad6265SDimitry Andric     __fpscr_save.__fpscr &= ~0xf8;
74*5f757f3fSDimitry Andric     __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
7581ad6265SDimitry Andric #endif
7681ad6265SDimitry Andric     /* Insert an artificial "read/write" reference to the variable
7781ad6265SDimitry Andric        read below, to ensure the compiler does not schedule
7881ad6265SDimitry Andric        a read/use of the variable before the FPSCR is modified, above.
7981ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
8081ad6265SDimitry Andric      */
8181ad6265SDimitry Andric     __asm__("" : "+wa"(__A));
8281ad6265SDimitry Andric   }
8381ad6265SDimitry Andric 
8481ad6265SDimitry Andric   switch (__rounding) {
8581ad6265SDimitry Andric   case _MM_FROUND_TO_NEAREST_INT:
86*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9
87*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffsl();
88*5f757f3fSDimitry Andric #else
89*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffs();
90*5f757f3fSDimitry Andric     __fpscr_save.__fpscr &= 0x70007f0ffL;
91*5f757f3fSDimitry Andric #endif
9281ad6265SDimitry Andric     __attribute__((fallthrough));
9381ad6265SDimitry Andric   case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
94*5f757f3fSDimitry Andric     __builtin_ppc_set_fpscr_rn(0b00);
9581ad6265SDimitry Andric     /* Insert an artificial "read/write" reference to the variable
9681ad6265SDimitry Andric        read below, to ensure the compiler does not schedule
9781ad6265SDimitry Andric        a read/use of the variable before the FPSCR is modified, above.
9881ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
9981ad6265SDimitry Andric      */
10081ad6265SDimitry Andric     __asm__("" : "+wa"(__A));
10181ad6265SDimitry Andric 
10281ad6265SDimitry Andric     __r = vec_rint((__v2df)__A);
10381ad6265SDimitry Andric 
10481ad6265SDimitry Andric     /* Insert an artificial "read" reference to the variable written
10581ad6265SDimitry Andric        above, to ensure the compiler does not schedule the computation
10681ad6265SDimitry Andric        of the value after the manipulation of the FPSCR, below.
10781ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
10881ad6265SDimitry Andric      */
10981ad6265SDimitry Andric     __asm__("" : : "wa"(__r));
110*5f757f3fSDimitry Andric     __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
11181ad6265SDimitry Andric     break;
11281ad6265SDimitry Andric   case _MM_FROUND_TO_NEG_INF:
11381ad6265SDimitry Andric   case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
11481ad6265SDimitry Andric     __r = vec_floor((__v2df)__A);
11581ad6265SDimitry Andric     break;
11681ad6265SDimitry Andric   case _MM_FROUND_TO_POS_INF:
11781ad6265SDimitry Andric   case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
11881ad6265SDimitry Andric     __r = vec_ceil((__v2df)__A);
11981ad6265SDimitry Andric     break;
12081ad6265SDimitry Andric   case _MM_FROUND_TO_ZERO:
12181ad6265SDimitry Andric   case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
12281ad6265SDimitry Andric     __r = vec_trunc((__v2df)__A);
12381ad6265SDimitry Andric     break;
12481ad6265SDimitry Andric   case _MM_FROUND_CUR_DIRECTION:
12581ad6265SDimitry Andric     __r = vec_rint((__v2df)__A);
12681ad6265SDimitry Andric     break;
12781ad6265SDimitry Andric   }
12881ad6265SDimitry Andric   if (__rounding & _MM_FROUND_NO_EXC) {
12981ad6265SDimitry Andric     /* Insert an artificial "read" reference to the variable written
13081ad6265SDimitry Andric        above, to ensure the compiler does not schedule the computation
13181ad6265SDimitry Andric        of the value after the manipulation of the FPSCR, below.
13281ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
13381ad6265SDimitry Andric      */
13481ad6265SDimitry Andric     __asm__("" : : "wa"(__r));
13581ad6265SDimitry Andric     /* Restore enabled exceptions.  */
136*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9
137*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffsl();
138*5f757f3fSDimitry Andric #else
139*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffs();
140*5f757f3fSDimitry Andric     __fpscr_save.__fpscr &= 0x70007f0ffL;
141*5f757f3fSDimitry Andric #endif
14281ad6265SDimitry Andric     __fpscr_save.__fpscr |= __enables_save.__fpscr;
143*5f757f3fSDimitry Andric     __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
14481ad6265SDimitry Andric   }
14581ad6265SDimitry Andric   return (__m128d)__r;
14681ad6265SDimitry Andric }
14781ad6265SDimitry Andric 
14881ad6265SDimitry Andric extern __inline __m128d
14981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
15081ad6265SDimitry Andric     _mm_round_sd(__m128d __A, __m128d __B, int __rounding) {
15181ad6265SDimitry Andric   __B = _mm_round_pd(__B, __rounding);
15281ad6265SDimitry Andric   __v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]};
15381ad6265SDimitry Andric   return (__m128d)__r;
15481ad6265SDimitry Andric }
15581ad6265SDimitry Andric 
15681ad6265SDimitry Andric extern __inline __m128
15781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
15881ad6265SDimitry Andric     _mm_round_ps(__m128 __A, int __rounding) {
15981ad6265SDimitry Andric   __v4sf __r;
16081ad6265SDimitry Andric   union {
16181ad6265SDimitry Andric     double __fr;
16281ad6265SDimitry Andric     long long __fpscr;
16381ad6265SDimitry Andric   } __enables_save, __fpscr_save;
16481ad6265SDimitry Andric 
16581ad6265SDimitry Andric   if (__rounding & _MM_FROUND_NO_EXC) {
16681ad6265SDimitry Andric     /* Save enabled exceptions, disable all exceptions,
16781ad6265SDimitry Andric        and preserve the rounding mode.  */
16881ad6265SDimitry Andric #ifdef _ARCH_PWR9
16981ad6265SDimitry Andric     __asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
17081ad6265SDimitry Andric     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
17181ad6265SDimitry Andric #else
172*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffs();
17381ad6265SDimitry Andric     __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
17481ad6265SDimitry Andric     __fpscr_save.__fpscr &= ~0xf8;
175*5f757f3fSDimitry Andric     __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
17681ad6265SDimitry Andric #endif
17781ad6265SDimitry Andric     /* Insert an artificial "read/write" reference to the variable
17881ad6265SDimitry Andric        read below, to ensure the compiler does not schedule
17981ad6265SDimitry Andric        a read/use of the variable before the FPSCR is modified, above.
18081ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
18181ad6265SDimitry Andric      */
18281ad6265SDimitry Andric     __asm__("" : "+wa"(__A));
18381ad6265SDimitry Andric   }
18481ad6265SDimitry Andric 
18581ad6265SDimitry Andric   switch (__rounding) {
18681ad6265SDimitry Andric   case _MM_FROUND_TO_NEAREST_INT:
187*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9
188*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffsl();
189*5f757f3fSDimitry Andric #else
190*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffs();
191*5f757f3fSDimitry Andric     __fpscr_save.__fpscr &= 0x70007f0ffL;
192*5f757f3fSDimitry Andric #endif
19381ad6265SDimitry Andric     __attribute__((fallthrough));
19481ad6265SDimitry Andric   case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
195*5f757f3fSDimitry Andric     __builtin_ppc_set_fpscr_rn(0b00);
19681ad6265SDimitry Andric     /* Insert an artificial "read/write" reference to the variable
19781ad6265SDimitry Andric        read below, to ensure the compiler does not schedule
19881ad6265SDimitry Andric        a read/use of the variable before the FPSCR is modified, above.
19981ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
20081ad6265SDimitry Andric      */
20181ad6265SDimitry Andric     __asm__("" : "+wa"(__A));
20281ad6265SDimitry Andric 
20381ad6265SDimitry Andric     __r = vec_rint((__v4sf)__A);
20481ad6265SDimitry Andric 
20581ad6265SDimitry Andric     /* Insert an artificial "read" reference to the variable written
20681ad6265SDimitry Andric        above, to ensure the compiler does not schedule the computation
20781ad6265SDimitry Andric        of the value after the manipulation of the FPSCR, below.
20881ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
20981ad6265SDimitry Andric      */
21081ad6265SDimitry Andric     __asm__("" : : "wa"(__r));
211*5f757f3fSDimitry Andric     __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
21281ad6265SDimitry Andric     break;
21381ad6265SDimitry Andric   case _MM_FROUND_TO_NEG_INF:
21481ad6265SDimitry Andric   case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
21581ad6265SDimitry Andric     __r = vec_floor((__v4sf)__A);
21681ad6265SDimitry Andric     break;
21781ad6265SDimitry Andric   case _MM_FROUND_TO_POS_INF:
21881ad6265SDimitry Andric   case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
21981ad6265SDimitry Andric     __r = vec_ceil((__v4sf)__A);
22081ad6265SDimitry Andric     break;
22181ad6265SDimitry Andric   case _MM_FROUND_TO_ZERO:
22281ad6265SDimitry Andric   case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
22381ad6265SDimitry Andric     __r = vec_trunc((__v4sf)__A);
22481ad6265SDimitry Andric     break;
22581ad6265SDimitry Andric   case _MM_FROUND_CUR_DIRECTION:
22681ad6265SDimitry Andric     __r = vec_rint((__v4sf)__A);
22781ad6265SDimitry Andric     break;
22881ad6265SDimitry Andric   }
22981ad6265SDimitry Andric   if (__rounding & _MM_FROUND_NO_EXC) {
23081ad6265SDimitry Andric     /* Insert an artificial "read" reference to the variable written
23181ad6265SDimitry Andric        above, to ensure the compiler does not schedule the computation
23281ad6265SDimitry Andric        of the value after the manipulation of the FPSCR, below.
23381ad6265SDimitry Andric        This can be removed if and when GCC PR102783 is fixed.
23481ad6265SDimitry Andric      */
23581ad6265SDimitry Andric     __asm__("" : : "wa"(__r));
23681ad6265SDimitry Andric     /* Restore enabled exceptions.  */
237*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9
238*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffsl();
239*5f757f3fSDimitry Andric #else
240*5f757f3fSDimitry Andric     __fpscr_save.__fr = __builtin_ppc_mffs();
241*5f757f3fSDimitry Andric     __fpscr_save.__fpscr &= 0x70007f0ffL;
242*5f757f3fSDimitry Andric #endif
24381ad6265SDimitry Andric     __fpscr_save.__fpscr |= __enables_save.__fpscr;
244*5f757f3fSDimitry Andric     __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
24581ad6265SDimitry Andric   }
24681ad6265SDimitry Andric   return (__m128)__r;
24781ad6265SDimitry Andric }
24881ad6265SDimitry Andric 
24981ad6265SDimitry Andric extern __inline __m128
25081ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
25181ad6265SDimitry Andric     _mm_round_ss(__m128 __A, __m128 __B, int __rounding) {
25281ad6265SDimitry Andric   __B = _mm_round_ps(__B, __rounding);
25381ad6265SDimitry Andric   __v4sf __r = (__v4sf)__A;
25481ad6265SDimitry Andric   __r[0] = ((__v4sf)__B)[0];
25581ad6265SDimitry Andric   return (__m128)__r;
25681ad6265SDimitry Andric }
25781ad6265SDimitry Andric 
25881ad6265SDimitry Andric #define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
25981ad6265SDimitry Andric #define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
26081ad6265SDimitry Andric 
26181ad6265SDimitry Andric #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
26281ad6265SDimitry Andric #define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
26381ad6265SDimitry Andric 
26481ad6265SDimitry Andric #define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
26581ad6265SDimitry Andric #define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
26681ad6265SDimitry Andric 
26781ad6265SDimitry Andric #define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
26881ad6265SDimitry Andric #define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
26981ad6265SDimitry Andric 
27081ad6265SDimitry Andric extern __inline __m128i
27181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
27281ad6265SDimitry Andric     _mm_insert_epi8(__m128i const __A, int const __D, int const __N) {
27381ad6265SDimitry Andric   __v16qi __result = (__v16qi)__A;
27481ad6265SDimitry Andric 
27581ad6265SDimitry Andric   __result[__N & 0xf] = __D;
27681ad6265SDimitry Andric 
27781ad6265SDimitry Andric   return (__m128i)__result;
27881ad6265SDimitry Andric }
27981ad6265SDimitry Andric 
28081ad6265SDimitry Andric extern __inline __m128i
28181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
28281ad6265SDimitry Andric     _mm_insert_epi32(__m128i const __A, int const __D, int const __N) {
28381ad6265SDimitry Andric   __v4si __result = (__v4si)__A;
28481ad6265SDimitry Andric 
28581ad6265SDimitry Andric   __result[__N & 3] = __D;
28681ad6265SDimitry Andric 
28781ad6265SDimitry Andric   return (__m128i)__result;
28881ad6265SDimitry Andric }
28981ad6265SDimitry Andric 
29081ad6265SDimitry Andric extern __inline __m128i
29181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
29281ad6265SDimitry Andric     _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) {
29381ad6265SDimitry Andric   __v2di __result = (__v2di)__A;
29481ad6265SDimitry Andric 
29581ad6265SDimitry Andric   __result[__N & 1] = __D;
29681ad6265SDimitry Andric 
29781ad6265SDimitry Andric   return (__m128i)__result;
29881ad6265SDimitry Andric }
29981ad6265SDimitry Andric 
300a7dea167SDimitry Andric extern __inline int
301a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
302a7dea167SDimitry Andric     _mm_extract_epi8(__m128i __X, const int __N) {
303a7dea167SDimitry Andric   return (unsigned char)((__v16qi)__X)[__N & 15];
304a7dea167SDimitry Andric }
305a7dea167SDimitry Andric 
306a7dea167SDimitry Andric extern __inline int
307a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308a7dea167SDimitry Andric     _mm_extract_epi32(__m128i __X, const int __N) {
309a7dea167SDimitry Andric   return ((__v4si)__X)[__N & 3];
310a7dea167SDimitry Andric }
311a7dea167SDimitry Andric 
312a7dea167SDimitry Andric extern __inline int
313a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
314a7dea167SDimitry Andric     _mm_extract_epi64(__m128i __X, const int __N) {
315a7dea167SDimitry Andric   return ((__v2di)__X)[__N & 1];
316a7dea167SDimitry Andric }
317a7dea167SDimitry Andric 
318a7dea167SDimitry Andric extern __inline int
319a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
320a7dea167SDimitry Andric     _mm_extract_ps(__m128 __X, const int __N) {
321a7dea167SDimitry Andric   return ((__v4si)__X)[__N & 3];
322a7dea167SDimitry Andric }
323a7dea167SDimitry Andric 
32481ad6265SDimitry Andric #ifdef _ARCH_PWR8
325a7dea167SDimitry Andric extern __inline __m128i
326a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
327a7dea167SDimitry Andric     _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
32806c3fb27SDimitry Andric   __v16qu __charmask = vec_splats((unsigned char)__imm8);
329a7dea167SDimitry Andric   __charmask = vec_gb(__charmask);
33006c3fb27SDimitry Andric   __v8hu __shortmask = (__v8hu)vec_unpackh((__v16qi)__charmask);
331a7dea167SDimitry Andric #ifdef __BIG_ENDIAN__
332a7dea167SDimitry Andric   __shortmask = vec_reve(__shortmask);
333a7dea167SDimitry Andric #endif
334a7dea167SDimitry Andric   return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
335a7dea167SDimitry Andric }
33681ad6265SDimitry Andric #endif
337a7dea167SDimitry Andric 
338a7dea167SDimitry Andric extern __inline __m128i
339a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
340a7dea167SDimitry Andric     _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
34181ad6265SDimitry Andric #ifdef _ARCH_PWR10
34281ad6265SDimitry Andric   return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask);
34381ad6265SDimitry Andric #else
344a7dea167SDimitry Andric   const __v16qu __seven = vec_splats((unsigned char)0x07);
345a7dea167SDimitry Andric   __v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
34681ad6265SDimitry Andric   return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask);
34781ad6265SDimitry Andric #endif
34881ad6265SDimitry Andric }
34981ad6265SDimitry Andric 
35081ad6265SDimitry Andric extern __inline __m128
35181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
35281ad6265SDimitry Andric     _mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) {
35381ad6265SDimitry Andric   __v16qu __pcv[] = {
35481ad6265SDimitry Andric       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
35581ad6265SDimitry Andric       {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
35681ad6265SDimitry Andric       {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
35781ad6265SDimitry Andric       {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
35881ad6265SDimitry Andric       {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
35981ad6265SDimitry Andric       {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
36081ad6265SDimitry Andric       {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
36181ad6265SDimitry Andric       {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
36281ad6265SDimitry Andric       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
36381ad6265SDimitry Andric       {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
36481ad6265SDimitry Andric       {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
36581ad6265SDimitry Andric       {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
36681ad6265SDimitry Andric       {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
36781ad6265SDimitry Andric       {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
36881ad6265SDimitry Andric       {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
36981ad6265SDimitry Andric       {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
37081ad6265SDimitry Andric   };
37181ad6265SDimitry Andric   __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
37281ad6265SDimitry Andric   return (__m128)__r;
37381ad6265SDimitry Andric }
37481ad6265SDimitry Andric 
37581ad6265SDimitry Andric extern __inline __m128
37681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
37781ad6265SDimitry Andric     _mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) {
37881ad6265SDimitry Andric #ifdef _ARCH_PWR10
37981ad6265SDimitry Andric   return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask);
38081ad6265SDimitry Andric #else
38181ad6265SDimitry Andric   const __v4si __zero = {0};
38281ad6265SDimitry Andric   const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero);
38381ad6265SDimitry Andric   return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask);
38481ad6265SDimitry Andric #endif
38581ad6265SDimitry Andric }
38681ad6265SDimitry Andric 
38781ad6265SDimitry Andric extern __inline __m128d
38881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
38981ad6265SDimitry Andric     _mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) {
39081ad6265SDimitry Andric   __v16qu __pcv[] = {
39181ad6265SDimitry Andric       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
39281ad6265SDimitry Andric       {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
39381ad6265SDimitry Andric       {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
39481ad6265SDimitry Andric       {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
39581ad6265SDimitry Andric   __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
39681ad6265SDimitry Andric   return (__m128d)__r;
39781ad6265SDimitry Andric }
39881ad6265SDimitry Andric 
39981ad6265SDimitry Andric #ifdef _ARCH_PWR8
40081ad6265SDimitry Andric extern __inline __m128d
40181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
40281ad6265SDimitry Andric     _mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) {
40381ad6265SDimitry Andric #ifdef _ARCH_PWR10
40481ad6265SDimitry Andric   return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask);
40581ad6265SDimitry Andric #else
40681ad6265SDimitry Andric   const __v2di __zero = {0};
40781ad6265SDimitry Andric   const __vector __bool long long __boolmask =
40881ad6265SDimitry Andric       vec_cmplt((__v2di)__mask, __zero);
40981ad6265SDimitry Andric   return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask);
41081ad6265SDimitry Andric #endif
41181ad6265SDimitry Andric }
41281ad6265SDimitry Andric #endif
41381ad6265SDimitry Andric 
41481ad6265SDimitry Andric extern __inline int
41581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
41681ad6265SDimitry Andric     _mm_testz_si128(__m128i __A, __m128i __B) {
41781ad6265SDimitry Andric   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
41881ad6265SDimitry Andric   const __v16qu __zero = {0};
41981ad6265SDimitry Andric   return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero);
42081ad6265SDimitry Andric }
42181ad6265SDimitry Andric 
42281ad6265SDimitry Andric extern __inline int
42381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
42481ad6265SDimitry Andric     _mm_testc_si128(__m128i __A, __m128i __B) {
42581ad6265SDimitry Andric   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
42681ad6265SDimitry Andric   const __v16qu __zero = {0};
42781ad6265SDimitry Andric   const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A);
42881ad6265SDimitry Andric   return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero);
42981ad6265SDimitry Andric }
43081ad6265SDimitry Andric 
43181ad6265SDimitry Andric extern __inline int
43281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
43381ad6265SDimitry Andric     _mm_testnzc_si128(__m128i __A, __m128i __B) {
43481ad6265SDimitry Andric   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
43581ad6265SDimitry Andric   return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0;
43681ad6265SDimitry Andric }
43781ad6265SDimitry Andric 
43881ad6265SDimitry Andric #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
43981ad6265SDimitry Andric 
44081ad6265SDimitry Andric #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
44181ad6265SDimitry Andric 
44281ad6265SDimitry Andric #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
44381ad6265SDimitry Andric 
44481ad6265SDimitry Andric #ifdef _ARCH_PWR8
44581ad6265SDimitry Andric extern __inline __m128i
44681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
44781ad6265SDimitry Andric     _mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
44881ad6265SDimitry Andric   return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y);
44981ad6265SDimitry Andric }
45081ad6265SDimitry Andric #endif
45181ad6265SDimitry Andric 
45281ad6265SDimitry Andric extern __inline __m128i
45381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
45481ad6265SDimitry Andric     _mm_min_epi8(__m128i __X, __m128i __Y) {
45581ad6265SDimitry Andric   return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y);
456a7dea167SDimitry Andric }
457a7dea167SDimitry Andric 
458e8d8bef9SDimitry Andric extern __inline __m128i
459e8d8bef9SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
46081ad6265SDimitry Andric     _mm_min_epu16(__m128i __X, __m128i __Y) {
46181ad6265SDimitry Andric   return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y);
462e8d8bef9SDimitry Andric }
463e8d8bef9SDimitry Andric 
464e8d8bef9SDimitry Andric extern __inline __m128i
465e8d8bef9SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
46681ad6265SDimitry Andric     _mm_min_epi32(__m128i __X, __m128i __Y) {
46781ad6265SDimitry Andric   return (__m128i)vec_min((__v4si)__X, (__v4si)__Y);
468e8d8bef9SDimitry Andric }
469e8d8bef9SDimitry Andric 
470e8d8bef9SDimitry Andric extern __inline __m128i
471e8d8bef9SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47281ad6265SDimitry Andric     _mm_min_epu32(__m128i __X, __m128i __Y) {
47381ad6265SDimitry Andric   return (__m128i)vec_min((__v4su)__X, (__v4su)__Y);
474e8d8bef9SDimitry Andric }
475e8d8bef9SDimitry Andric 
47681ad6265SDimitry Andric extern __inline __m128i
47781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47881ad6265SDimitry Andric     _mm_max_epi8(__m128i __X, __m128i __Y) {
47981ad6265SDimitry Andric   return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y);
48081ad6265SDimitry Andric }
48181ad6265SDimitry Andric 
48281ad6265SDimitry Andric extern __inline __m128i
48381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
48481ad6265SDimitry Andric     _mm_max_epu16(__m128i __X, __m128i __Y) {
48581ad6265SDimitry Andric   return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y);
48681ad6265SDimitry Andric }
48781ad6265SDimitry Andric 
48881ad6265SDimitry Andric extern __inline __m128i
48981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
49081ad6265SDimitry Andric     _mm_max_epi32(__m128i __X, __m128i __Y) {
49181ad6265SDimitry Andric   return (__m128i)vec_max((__v4si)__X, (__v4si)__Y);
49281ad6265SDimitry Andric }
49381ad6265SDimitry Andric 
49481ad6265SDimitry Andric extern __inline __m128i
49581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
49681ad6265SDimitry Andric     _mm_max_epu32(__m128i __X, __m128i __Y) {
49781ad6265SDimitry Andric   return (__m128i)vec_max((__v4su)__X, (__v4su)__Y);
49881ad6265SDimitry Andric }
49981ad6265SDimitry Andric 
50081ad6265SDimitry Andric extern __inline __m128i
50181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50281ad6265SDimitry Andric     _mm_mullo_epi32(__m128i __X, __m128i __Y) {
50381ad6265SDimitry Andric   return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y);
50481ad6265SDimitry Andric }
50581ad6265SDimitry Andric 
50681ad6265SDimitry Andric #ifdef _ARCH_PWR8
50781ad6265SDimitry Andric extern __inline __m128i
50881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50981ad6265SDimitry Andric     _mm_mul_epi32(__m128i __X, __m128i __Y) {
51081ad6265SDimitry Andric   return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y);
51181ad6265SDimitry Andric }
51281ad6265SDimitry Andric #endif
51381ad6265SDimitry Andric 
51481ad6265SDimitry Andric extern __inline __m128i
51581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
51681ad6265SDimitry Andric     _mm_cvtepi8_epi16(__m128i __A) {
51781ad6265SDimitry Andric   return (__m128i)vec_unpackh((__v16qi)__A);
51881ad6265SDimitry Andric }
51981ad6265SDimitry Andric 
52081ad6265SDimitry Andric extern __inline __m128i
52181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52281ad6265SDimitry Andric     _mm_cvtepi8_epi32(__m128i __A) {
52381ad6265SDimitry Andric   __A = (__m128i)vec_unpackh((__v16qi)__A);
52481ad6265SDimitry Andric   return (__m128i)vec_unpackh((__v8hi)__A);
52581ad6265SDimitry Andric }
52681ad6265SDimitry Andric 
52781ad6265SDimitry Andric #ifdef _ARCH_PWR8
52881ad6265SDimitry Andric extern __inline __m128i
52981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
53081ad6265SDimitry Andric     _mm_cvtepi8_epi64(__m128i __A) {
53181ad6265SDimitry Andric   __A = (__m128i)vec_unpackh((__v16qi)__A);
53281ad6265SDimitry Andric   __A = (__m128i)vec_unpackh((__v8hi)__A);
53381ad6265SDimitry Andric   return (__m128i)vec_unpackh((__v4si)__A);
53481ad6265SDimitry Andric }
53581ad6265SDimitry Andric #endif
53681ad6265SDimitry Andric 
53781ad6265SDimitry Andric extern __inline __m128i
53881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
53981ad6265SDimitry Andric     _mm_cvtepi16_epi32(__m128i __A) {
54081ad6265SDimitry Andric   return (__m128i)vec_unpackh((__v8hi)__A);
54181ad6265SDimitry Andric }
54281ad6265SDimitry Andric 
54381ad6265SDimitry Andric #ifdef _ARCH_PWR8
54481ad6265SDimitry Andric extern __inline __m128i
54581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
54681ad6265SDimitry Andric     _mm_cvtepi16_epi64(__m128i __A) {
54781ad6265SDimitry Andric   __A = (__m128i)vec_unpackh((__v8hi)__A);
54881ad6265SDimitry Andric   return (__m128i)vec_unpackh((__v4si)__A);
54981ad6265SDimitry Andric }
55081ad6265SDimitry Andric #endif
55181ad6265SDimitry Andric 
55281ad6265SDimitry Andric #ifdef _ARCH_PWR8
55381ad6265SDimitry Andric extern __inline __m128i
55481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
55581ad6265SDimitry Andric     _mm_cvtepi32_epi64(__m128i __A) {
55681ad6265SDimitry Andric   return (__m128i)vec_unpackh((__v4si)__A);
55781ad6265SDimitry Andric }
55881ad6265SDimitry Andric #endif
55981ad6265SDimitry Andric 
56081ad6265SDimitry Andric extern __inline __m128i
56181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
56281ad6265SDimitry Andric     _mm_cvtepu8_epi16(__m128i __A) {
56381ad6265SDimitry Andric   const __v16qu __zero = {0};
56481ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
56581ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
56681ad6265SDimitry Andric #else  /* __BIG_ENDIAN__.  */
56781ad6265SDimitry Andric   __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
56881ad6265SDimitry Andric #endif /* __BIG_ENDIAN__.  */
56981ad6265SDimitry Andric   return __A;
57081ad6265SDimitry Andric }
57181ad6265SDimitry Andric 
57281ad6265SDimitry Andric extern __inline __m128i
57381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57481ad6265SDimitry Andric     _mm_cvtepu8_epi32(__m128i __A) {
57581ad6265SDimitry Andric   const __v16qu __zero = {0};
57681ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
57781ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
57881ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
57981ad6265SDimitry Andric #else  /* __BIG_ENDIAN__.  */
58081ad6265SDimitry Andric   __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
58181ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
58281ad6265SDimitry Andric #endif /* __BIG_ENDIAN__.  */
58381ad6265SDimitry Andric   return __A;
58481ad6265SDimitry Andric }
58581ad6265SDimitry Andric 
58681ad6265SDimitry Andric extern __inline __m128i
58781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
58881ad6265SDimitry Andric     _mm_cvtepu8_epi64(__m128i __A) {
58981ad6265SDimitry Andric   const __v16qu __zero = {0};
59081ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
59181ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
59281ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
59381ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
59481ad6265SDimitry Andric #else  /* __BIG_ENDIAN__.  */
59581ad6265SDimitry Andric   __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
59681ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
59781ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
59881ad6265SDimitry Andric #endif /* __BIG_ENDIAN__.  */
59981ad6265SDimitry Andric   return __A;
60081ad6265SDimitry Andric }
60181ad6265SDimitry Andric 
60281ad6265SDimitry Andric extern __inline __m128i
60381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
60481ad6265SDimitry Andric     _mm_cvtepu16_epi32(__m128i __A) {
60581ad6265SDimitry Andric   const __v8hu __zero = {0};
60681ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
60781ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
60881ad6265SDimitry Andric #else  /* __BIG_ENDIAN__.  */
60981ad6265SDimitry Andric   __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
61081ad6265SDimitry Andric #endif /* __BIG_ENDIAN__.  */
61181ad6265SDimitry Andric   return __A;
61281ad6265SDimitry Andric }
61381ad6265SDimitry Andric 
61481ad6265SDimitry Andric extern __inline __m128i
61581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
61681ad6265SDimitry Andric     _mm_cvtepu16_epi64(__m128i __A) {
61781ad6265SDimitry Andric   const __v8hu __zero = {0};
61881ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
61981ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
62081ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
62181ad6265SDimitry Andric #else  /* __BIG_ENDIAN__.  */
62281ad6265SDimitry Andric   __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
62381ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
62481ad6265SDimitry Andric #endif /* __BIG_ENDIAN__.  */
62581ad6265SDimitry Andric   return __A;
62681ad6265SDimitry Andric }
62781ad6265SDimitry Andric 
62881ad6265SDimitry Andric extern __inline __m128i
62981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63081ad6265SDimitry Andric     _mm_cvtepu32_epi64(__m128i __A) {
63181ad6265SDimitry Andric   const __v4su __zero = {0};
63281ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__
63381ad6265SDimitry Andric   __A = (__m128i)vec_mergeh((__v4su)__A, __zero);
63481ad6265SDimitry Andric #else  /* __BIG_ENDIAN__.  */
63581ad6265SDimitry Andric   __A = (__m128i)vec_mergeh(__zero, (__v4su)__A);
63681ad6265SDimitry Andric #endif /* __BIG_ENDIAN__.  */
63781ad6265SDimitry Andric   return __A;
63881ad6265SDimitry Andric }
63981ad6265SDimitry Andric 
64081ad6265SDimitry Andric /* Return horizontal packed word minimum and its index in bits [15:0]
64181ad6265SDimitry Andric    and bits [18:16] respectively.  */
64281ad6265SDimitry Andric extern __inline __m128i
64381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64481ad6265SDimitry Andric     _mm_minpos_epu16(__m128i __A) {
64581ad6265SDimitry Andric   union __u {
64681ad6265SDimitry Andric     __m128i __m;
64781ad6265SDimitry Andric     __v8hu __uh;
64881ad6265SDimitry Andric   };
64981ad6265SDimitry Andric   union __u __u = {.__m = __A}, __r = {.__m = {0}};
65081ad6265SDimitry Andric   unsigned short __ridx = 0;
65181ad6265SDimitry Andric   unsigned short __rmin = __u.__uh[__ridx];
65281ad6265SDimitry Andric   unsigned long __i;
65381ad6265SDimitry Andric   for (__i = 1; __i < 8; __i++) {
65481ad6265SDimitry Andric     if (__u.__uh[__i] < __rmin) {
65581ad6265SDimitry Andric       __rmin = __u.__uh[__i];
65681ad6265SDimitry Andric       __ridx = __i;
65781ad6265SDimitry Andric     }
65881ad6265SDimitry Andric   }
65981ad6265SDimitry Andric   __r.__uh[0] = __rmin;
66081ad6265SDimitry Andric   __r.__uh[1] = __ridx;
66181ad6265SDimitry Andric   return __r.__m;
66281ad6265SDimitry Andric }
66381ad6265SDimitry Andric 
66481ad6265SDimitry Andric extern __inline __m128i
66581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
66681ad6265SDimitry Andric     _mm_packus_epi32(__m128i __X, __m128i __Y) {
66781ad6265SDimitry Andric   return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y);
66881ad6265SDimitry Andric }
66981ad6265SDimitry Andric 
67081ad6265SDimitry Andric #ifdef _ARCH_PWR8
67181ad6265SDimitry Andric extern __inline __m128i
67281ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
67381ad6265SDimitry Andric     _mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
67481ad6265SDimitry Andric   return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y);
67581ad6265SDimitry Andric }
67681ad6265SDimitry Andric #endif
67781ad6265SDimitry Andric 
678a7dea167SDimitry Andric #else
679a7dea167SDimitry Andric #include_next <smmintrin.h>
680bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) &&                                            \
681fcaf7f86SDimitry Andric         *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
682a7dea167SDimitry Andric 
68381ad6265SDimitry Andric #endif /* SMMINTRIN_H_ */
684