1a7dea167SDimitry Andric /*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------=== 2a7dea167SDimitry Andric * 3a7dea167SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a7dea167SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 5a7dea167SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a7dea167SDimitry Andric * 7a7dea167SDimitry Andric *===-----------------------------------------------------------------------=== 8a7dea167SDimitry Andric */ 9a7dea167SDimitry Andric 10a7dea167SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler 11a7dea167SDimitry Andric User Guide and Reference, version 9.0. 12a7dea167SDimitry Andric 13a7dea167SDimitry Andric NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */ 14a7dea167SDimitry Andric 15a7dea167SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS 16a7dea167SDimitry Andric /* This header is distributed to simplify porting x86_64 code that 17*5f757f3fSDimitry Andric makes explicit use of Intel intrinsics to powerpc64/powerpc64le. 18a7dea167SDimitry Andric 19a7dea167SDimitry Andric It is the user's responsibility to determine if the results are 20a7dea167SDimitry Andric acceptable and make additional changes as necessary. 21a7dea167SDimitry Andric 22a7dea167SDimitry Andric Note that much code that uses Intel intrinsics can be rewritten in 23a7dea167SDimitry Andric standard C or GNU C extensions, which are more portable and better 24a7dea167SDimitry Andric optimized across multiple targets. */ 25a7dea167SDimitry Andric #error \ 26a7dea167SDimitry Andric "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." 27a7dea167SDimitry Andric #endif 28a7dea167SDimitry Andric 29a7dea167SDimitry Andric #ifndef SMMINTRIN_H_ 30a7dea167SDimitry Andric #define SMMINTRIN_H_ 31a7dea167SDimitry Andric 32bdd1243dSDimitry Andric #if defined(__powerpc64__) && \ 33fcaf7f86SDimitry Andric (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) 34a7dea167SDimitry Andric 35a7dea167SDimitry Andric #include <altivec.h> 36349cc55cSDimitry Andric #include <tmmintrin.h> 37a7dea167SDimitry Andric 3881ad6265SDimitry Andric /* Rounding mode macros. */ 3981ad6265SDimitry Andric #define _MM_FROUND_TO_NEAREST_INT 0x00 4081ad6265SDimitry Andric #define _MM_FROUND_TO_ZERO 0x01 4181ad6265SDimitry Andric #define _MM_FROUND_TO_POS_INF 0x02 4281ad6265SDimitry Andric #define _MM_FROUND_TO_NEG_INF 0x03 4381ad6265SDimitry Andric #define _MM_FROUND_CUR_DIRECTION 0x04 4481ad6265SDimitry Andric 4581ad6265SDimitry Andric #define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) 4681ad6265SDimitry Andric #define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) 4781ad6265SDimitry Andric #define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) 4881ad6265SDimitry Andric #define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) 4981ad6265SDimitry Andric #define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) 5081ad6265SDimitry Andric #define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) 5181ad6265SDimitry Andric 5281ad6265SDimitry Andric #define _MM_FROUND_RAISE_EXC 0x00 5381ad6265SDimitry Andric #define _MM_FROUND_NO_EXC 0x08 5481ad6265SDimitry Andric 5581ad6265SDimitry Andric extern __inline __m128d 5681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 5781ad6265SDimitry Andric _mm_round_pd(__m128d __A, int __rounding) { 5881ad6265SDimitry Andric __v2df __r; 5981ad6265SDimitry Andric union { 6081ad6265SDimitry Andric double __fr; 6181ad6265SDimitry Andric long long __fpscr; 6281ad6265SDimitry Andric } __enables_save, __fpscr_save; 6381ad6265SDimitry Andric 6481ad6265SDimitry Andric if (__rounding & _MM_FROUND_NO_EXC) { 6581ad6265SDimitry Andric /* Save enabled exceptions, disable all exceptions, 6681ad6265SDimitry Andric and preserve the rounding mode. */ 6781ad6265SDimitry Andric #ifdef _ARCH_PWR9 6881ad6265SDimitry Andric __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); 6981ad6265SDimitry Andric __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 7081ad6265SDimitry Andric #else 71*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffs(); 7281ad6265SDimitry Andric __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 7381ad6265SDimitry Andric __fpscr_save.__fpscr &= ~0xf8; 74*5f757f3fSDimitry Andric __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); 7581ad6265SDimitry Andric #endif 7681ad6265SDimitry Andric /* Insert an artificial "read/write" reference to the variable 7781ad6265SDimitry Andric read below, to ensure the compiler does not schedule 7881ad6265SDimitry Andric a read/use of the variable before the FPSCR is modified, above. 7981ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 8081ad6265SDimitry Andric */ 8181ad6265SDimitry Andric __asm__("" : "+wa"(__A)); 8281ad6265SDimitry Andric } 8381ad6265SDimitry Andric 8481ad6265SDimitry Andric switch (__rounding) { 8581ad6265SDimitry Andric case _MM_FROUND_TO_NEAREST_INT: 86*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9 87*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffsl(); 88*5f757f3fSDimitry Andric #else 89*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffs(); 90*5f757f3fSDimitry Andric __fpscr_save.__fpscr &= 0x70007f0ffL; 91*5f757f3fSDimitry Andric #endif 9281ad6265SDimitry Andric __attribute__((fallthrough)); 9381ad6265SDimitry Andric case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: 94*5f757f3fSDimitry Andric __builtin_ppc_set_fpscr_rn(0b00); 9581ad6265SDimitry Andric /* Insert an artificial "read/write" reference to the variable 9681ad6265SDimitry Andric read below, to ensure the compiler does not schedule 9781ad6265SDimitry Andric a read/use of the variable before the FPSCR is modified, above. 9881ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 9981ad6265SDimitry Andric */ 10081ad6265SDimitry Andric __asm__("" : "+wa"(__A)); 10181ad6265SDimitry Andric 10281ad6265SDimitry Andric __r = vec_rint((__v2df)__A); 10381ad6265SDimitry Andric 10481ad6265SDimitry Andric /* Insert an artificial "read" reference to the variable written 10581ad6265SDimitry Andric above, to ensure the compiler does not schedule the computation 10681ad6265SDimitry Andric of the value after the manipulation of the FPSCR, below. 10781ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 10881ad6265SDimitry Andric */ 10981ad6265SDimitry Andric __asm__("" : : "wa"(__r)); 110*5f757f3fSDimitry Andric __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); 11181ad6265SDimitry Andric break; 11281ad6265SDimitry Andric case _MM_FROUND_TO_NEG_INF: 11381ad6265SDimitry Andric case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: 11481ad6265SDimitry Andric __r = vec_floor((__v2df)__A); 11581ad6265SDimitry Andric break; 11681ad6265SDimitry Andric case _MM_FROUND_TO_POS_INF: 11781ad6265SDimitry Andric case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: 11881ad6265SDimitry Andric __r = vec_ceil((__v2df)__A); 11981ad6265SDimitry Andric break; 12081ad6265SDimitry Andric case _MM_FROUND_TO_ZERO: 12181ad6265SDimitry Andric case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: 12281ad6265SDimitry Andric __r = vec_trunc((__v2df)__A); 12381ad6265SDimitry Andric break; 12481ad6265SDimitry Andric case _MM_FROUND_CUR_DIRECTION: 12581ad6265SDimitry Andric __r = vec_rint((__v2df)__A); 12681ad6265SDimitry Andric break; 12781ad6265SDimitry Andric } 12881ad6265SDimitry Andric if (__rounding & _MM_FROUND_NO_EXC) { 12981ad6265SDimitry Andric /* Insert an artificial "read" reference to the variable written 13081ad6265SDimitry Andric above, to ensure the compiler does not schedule the computation 13181ad6265SDimitry Andric of the value after the manipulation of the FPSCR, below. 13281ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 13381ad6265SDimitry Andric */ 13481ad6265SDimitry Andric __asm__("" : : "wa"(__r)); 13581ad6265SDimitry Andric /* Restore enabled exceptions. */ 136*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9 137*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffsl(); 138*5f757f3fSDimitry Andric #else 139*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffs(); 140*5f757f3fSDimitry Andric __fpscr_save.__fpscr &= 0x70007f0ffL; 141*5f757f3fSDimitry Andric #endif 14281ad6265SDimitry Andric __fpscr_save.__fpscr |= __enables_save.__fpscr; 143*5f757f3fSDimitry Andric __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); 14481ad6265SDimitry Andric } 14581ad6265SDimitry Andric return (__m128d)__r; 14681ad6265SDimitry Andric } 14781ad6265SDimitry Andric 14881ad6265SDimitry Andric extern __inline __m128d 14981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 15081ad6265SDimitry Andric _mm_round_sd(__m128d __A, __m128d __B, int __rounding) { 15181ad6265SDimitry Andric __B = _mm_round_pd(__B, __rounding); 15281ad6265SDimitry Andric __v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]}; 15381ad6265SDimitry Andric return (__m128d)__r; 15481ad6265SDimitry Andric } 15581ad6265SDimitry Andric 15681ad6265SDimitry Andric extern __inline __m128 15781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 15881ad6265SDimitry Andric _mm_round_ps(__m128 __A, int __rounding) { 15981ad6265SDimitry Andric __v4sf __r; 16081ad6265SDimitry Andric union { 16181ad6265SDimitry Andric double __fr; 16281ad6265SDimitry Andric long long __fpscr; 16381ad6265SDimitry Andric } __enables_save, __fpscr_save; 16481ad6265SDimitry Andric 16581ad6265SDimitry Andric if (__rounding & _MM_FROUND_NO_EXC) { 16681ad6265SDimitry Andric /* Save enabled exceptions, disable all exceptions, 16781ad6265SDimitry Andric and preserve the rounding mode. */ 16881ad6265SDimitry Andric #ifdef _ARCH_PWR9 16981ad6265SDimitry Andric __asm__("mffsce %0" : "=f"(__fpscr_save.__fr)); 17081ad6265SDimitry Andric __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 17181ad6265SDimitry Andric #else 172*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffs(); 17381ad6265SDimitry Andric __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; 17481ad6265SDimitry Andric __fpscr_save.__fpscr &= ~0xf8; 175*5f757f3fSDimitry Andric __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); 17681ad6265SDimitry Andric #endif 17781ad6265SDimitry Andric /* Insert an artificial "read/write" reference to the variable 17881ad6265SDimitry Andric read below, to ensure the compiler does not schedule 17981ad6265SDimitry Andric a read/use of the variable before the FPSCR is modified, above. 18081ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 18181ad6265SDimitry Andric */ 18281ad6265SDimitry Andric __asm__("" : "+wa"(__A)); 18381ad6265SDimitry Andric } 18481ad6265SDimitry Andric 18581ad6265SDimitry Andric switch (__rounding) { 18681ad6265SDimitry Andric case _MM_FROUND_TO_NEAREST_INT: 187*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9 188*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffsl(); 189*5f757f3fSDimitry Andric #else 190*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffs(); 191*5f757f3fSDimitry Andric __fpscr_save.__fpscr &= 0x70007f0ffL; 192*5f757f3fSDimitry Andric #endif 19381ad6265SDimitry Andric __attribute__((fallthrough)); 19481ad6265SDimitry Andric case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: 195*5f757f3fSDimitry Andric __builtin_ppc_set_fpscr_rn(0b00); 19681ad6265SDimitry Andric /* Insert an artificial "read/write" reference to the variable 19781ad6265SDimitry Andric read below, to ensure the compiler does not schedule 19881ad6265SDimitry Andric a read/use of the variable before the FPSCR is modified, above. 19981ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 20081ad6265SDimitry Andric */ 20181ad6265SDimitry Andric __asm__("" : "+wa"(__A)); 20281ad6265SDimitry Andric 20381ad6265SDimitry Andric __r = vec_rint((__v4sf)__A); 20481ad6265SDimitry Andric 20581ad6265SDimitry Andric /* Insert an artificial "read" reference to the variable written 20681ad6265SDimitry Andric above, to ensure the compiler does not schedule the computation 20781ad6265SDimitry Andric of the value after the manipulation of the FPSCR, below. 20881ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 20981ad6265SDimitry Andric */ 21081ad6265SDimitry Andric __asm__("" : : "wa"(__r)); 211*5f757f3fSDimitry Andric __builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr); 21281ad6265SDimitry Andric break; 21381ad6265SDimitry Andric case _MM_FROUND_TO_NEG_INF: 21481ad6265SDimitry Andric case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: 21581ad6265SDimitry Andric __r = vec_floor((__v4sf)__A); 21681ad6265SDimitry Andric break; 21781ad6265SDimitry Andric case _MM_FROUND_TO_POS_INF: 21881ad6265SDimitry Andric case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: 21981ad6265SDimitry Andric __r = vec_ceil((__v4sf)__A); 22081ad6265SDimitry Andric break; 22181ad6265SDimitry Andric case _MM_FROUND_TO_ZERO: 22281ad6265SDimitry Andric case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: 22381ad6265SDimitry Andric __r = vec_trunc((__v4sf)__A); 22481ad6265SDimitry Andric break; 22581ad6265SDimitry Andric case _MM_FROUND_CUR_DIRECTION: 22681ad6265SDimitry Andric __r = vec_rint((__v4sf)__A); 22781ad6265SDimitry Andric break; 22881ad6265SDimitry Andric } 22981ad6265SDimitry Andric if (__rounding & _MM_FROUND_NO_EXC) { 23081ad6265SDimitry Andric /* Insert an artificial "read" reference to the variable written 23181ad6265SDimitry Andric above, to ensure the compiler does not schedule the computation 23281ad6265SDimitry Andric of the value after the manipulation of the FPSCR, below. 23381ad6265SDimitry Andric This can be removed if and when GCC PR102783 is fixed. 23481ad6265SDimitry Andric */ 23581ad6265SDimitry Andric __asm__("" : : "wa"(__r)); 23681ad6265SDimitry Andric /* Restore enabled exceptions. */ 237*5f757f3fSDimitry Andric #ifdef _ARCH_PWR9 238*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffsl(); 239*5f757f3fSDimitry Andric #else 240*5f757f3fSDimitry Andric __fpscr_save.__fr = __builtin_ppc_mffs(); 241*5f757f3fSDimitry Andric __fpscr_save.__fpscr &= 0x70007f0ffL; 242*5f757f3fSDimitry Andric #endif 24381ad6265SDimitry Andric __fpscr_save.__fpscr |= __enables_save.__fpscr; 244*5f757f3fSDimitry Andric __builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr); 24581ad6265SDimitry Andric } 24681ad6265SDimitry Andric return (__m128)__r; 24781ad6265SDimitry Andric } 24881ad6265SDimitry Andric 24981ad6265SDimitry Andric extern __inline __m128 25081ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 25181ad6265SDimitry Andric _mm_round_ss(__m128 __A, __m128 __B, int __rounding) { 25281ad6265SDimitry Andric __B = _mm_round_ps(__B, __rounding); 25381ad6265SDimitry Andric __v4sf __r = (__v4sf)__A; 25481ad6265SDimitry Andric __r[0] = ((__v4sf)__B)[0]; 25581ad6265SDimitry Andric return (__m128)__r; 25681ad6265SDimitry Andric } 25781ad6265SDimitry Andric 25881ad6265SDimitry Andric #define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL) 25981ad6265SDimitry Andric #define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL) 26081ad6265SDimitry Andric 26181ad6265SDimitry Andric #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) 26281ad6265SDimitry Andric #define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR) 26381ad6265SDimitry Andric 26481ad6265SDimitry Andric #define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL) 26581ad6265SDimitry Andric #define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL) 26681ad6265SDimitry Andric 26781ad6265SDimitry Andric #define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR) 26881ad6265SDimitry Andric #define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR) 26981ad6265SDimitry Andric 27081ad6265SDimitry Andric extern __inline __m128i 27181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 27281ad6265SDimitry Andric _mm_insert_epi8(__m128i const __A, int const __D, int const __N) { 27381ad6265SDimitry Andric __v16qi __result = (__v16qi)__A; 27481ad6265SDimitry Andric 27581ad6265SDimitry Andric __result[__N & 0xf] = __D; 27681ad6265SDimitry Andric 27781ad6265SDimitry Andric return (__m128i)__result; 27881ad6265SDimitry Andric } 27981ad6265SDimitry Andric 28081ad6265SDimitry Andric extern __inline __m128i 28181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 28281ad6265SDimitry Andric _mm_insert_epi32(__m128i const __A, int const __D, int const __N) { 28381ad6265SDimitry Andric __v4si __result = (__v4si)__A; 28481ad6265SDimitry Andric 28581ad6265SDimitry Andric __result[__N & 3] = __D; 28681ad6265SDimitry Andric 28781ad6265SDimitry Andric return (__m128i)__result; 28881ad6265SDimitry Andric } 28981ad6265SDimitry Andric 29081ad6265SDimitry Andric extern __inline __m128i 29181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 29281ad6265SDimitry Andric _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) { 29381ad6265SDimitry Andric __v2di __result = (__v2di)__A; 29481ad6265SDimitry Andric 29581ad6265SDimitry Andric __result[__N & 1] = __D; 29681ad6265SDimitry Andric 29781ad6265SDimitry Andric return (__m128i)__result; 29881ad6265SDimitry Andric } 29981ad6265SDimitry Andric 300a7dea167SDimitry Andric extern __inline int 301a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 302a7dea167SDimitry Andric _mm_extract_epi8(__m128i __X, const int __N) { 303a7dea167SDimitry Andric return (unsigned char)((__v16qi)__X)[__N & 15]; 304a7dea167SDimitry Andric } 305a7dea167SDimitry Andric 306a7dea167SDimitry Andric extern __inline int 307a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308a7dea167SDimitry Andric _mm_extract_epi32(__m128i __X, const int __N) { 309a7dea167SDimitry Andric return ((__v4si)__X)[__N & 3]; 310a7dea167SDimitry Andric } 311a7dea167SDimitry Andric 312a7dea167SDimitry Andric extern __inline int 313a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 314a7dea167SDimitry Andric _mm_extract_epi64(__m128i __X, const int __N) { 315a7dea167SDimitry Andric return ((__v2di)__X)[__N & 1]; 316a7dea167SDimitry Andric } 317a7dea167SDimitry Andric 318a7dea167SDimitry Andric extern __inline int 319a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 320a7dea167SDimitry Andric _mm_extract_ps(__m128 __X, const int __N) { 321a7dea167SDimitry Andric return ((__v4si)__X)[__N & 3]; 322a7dea167SDimitry Andric } 323a7dea167SDimitry Andric 32481ad6265SDimitry Andric #ifdef _ARCH_PWR8 325a7dea167SDimitry Andric extern __inline __m128i 326a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 327a7dea167SDimitry Andric _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) { 32806c3fb27SDimitry Andric __v16qu __charmask = vec_splats((unsigned char)__imm8); 329a7dea167SDimitry Andric __charmask = vec_gb(__charmask); 33006c3fb27SDimitry Andric __v8hu __shortmask = (__v8hu)vec_unpackh((__v16qi)__charmask); 331a7dea167SDimitry Andric #ifdef __BIG_ENDIAN__ 332a7dea167SDimitry Andric __shortmask = vec_reve(__shortmask); 333a7dea167SDimitry Andric #endif 334a7dea167SDimitry Andric return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask); 335a7dea167SDimitry Andric } 33681ad6265SDimitry Andric #endif 337a7dea167SDimitry Andric 338a7dea167SDimitry Andric extern __inline __m128i 339a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 340a7dea167SDimitry Andric _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) { 34181ad6265SDimitry Andric #ifdef _ARCH_PWR10 34281ad6265SDimitry Andric return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask); 34381ad6265SDimitry Andric #else 344a7dea167SDimitry Andric const __v16qu __seven = vec_splats((unsigned char)0x07); 345a7dea167SDimitry Andric __v16qu __lmask = vec_sra((__v16qu)__mask, __seven); 34681ad6265SDimitry Andric return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask); 34781ad6265SDimitry Andric #endif 34881ad6265SDimitry Andric } 34981ad6265SDimitry Andric 35081ad6265SDimitry Andric extern __inline __m128 35181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 35281ad6265SDimitry Andric _mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) { 35381ad6265SDimitry Andric __v16qu __pcv[] = { 35481ad6265SDimitry Andric {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 35581ad6265SDimitry Andric {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 35681ad6265SDimitry Andric {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15}, 35781ad6265SDimitry Andric {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15}, 35881ad6265SDimitry Andric {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15}, 35981ad6265SDimitry Andric {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15}, 36081ad6265SDimitry Andric {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15}, 36181ad6265SDimitry Andric {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15}, 36281ad6265SDimitry Andric {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31}, 36381ad6265SDimitry Andric {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31}, 36481ad6265SDimitry Andric {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31}, 36581ad6265SDimitry Andric {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31}, 36681ad6265SDimitry Andric {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, 36781ad6265SDimitry Andric {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, 36881ad6265SDimitry Andric {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 36981ad6265SDimitry Andric {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, 37081ad6265SDimitry Andric }; 37181ad6265SDimitry Andric __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]); 37281ad6265SDimitry Andric return (__m128)__r; 37381ad6265SDimitry Andric } 37481ad6265SDimitry Andric 37581ad6265SDimitry Andric extern __inline __m128 37681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 37781ad6265SDimitry Andric _mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) { 37881ad6265SDimitry Andric #ifdef _ARCH_PWR10 37981ad6265SDimitry Andric return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask); 38081ad6265SDimitry Andric #else 38181ad6265SDimitry Andric const __v4si __zero = {0}; 38281ad6265SDimitry Andric const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero); 38381ad6265SDimitry Andric return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask); 38481ad6265SDimitry Andric #endif 38581ad6265SDimitry Andric } 38681ad6265SDimitry Andric 38781ad6265SDimitry Andric extern __inline __m128d 38881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 38981ad6265SDimitry Andric _mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) { 39081ad6265SDimitry Andric __v16qu __pcv[] = { 39181ad6265SDimitry Andric {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 39281ad6265SDimitry Andric {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15}, 39381ad6265SDimitry Andric {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, 39481ad6265SDimitry Andric {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}}; 39581ad6265SDimitry Andric __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]); 39681ad6265SDimitry Andric return (__m128d)__r; 39781ad6265SDimitry Andric } 39881ad6265SDimitry Andric 39981ad6265SDimitry Andric #ifdef _ARCH_PWR8 40081ad6265SDimitry Andric extern __inline __m128d 40181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 40281ad6265SDimitry Andric _mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) { 40381ad6265SDimitry Andric #ifdef _ARCH_PWR10 40481ad6265SDimitry Andric return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask); 40581ad6265SDimitry Andric #else 40681ad6265SDimitry Andric const __v2di __zero = {0}; 40781ad6265SDimitry Andric const __vector __bool long long __boolmask = 40881ad6265SDimitry Andric vec_cmplt((__v2di)__mask, __zero); 40981ad6265SDimitry Andric return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask); 41081ad6265SDimitry Andric #endif 41181ad6265SDimitry Andric } 41281ad6265SDimitry Andric #endif 41381ad6265SDimitry Andric 41481ad6265SDimitry Andric extern __inline int 41581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 41681ad6265SDimitry Andric _mm_testz_si128(__m128i __A, __m128i __B) { 41781ad6265SDimitry Andric /* Note: This implementation does NOT set "zero" or "carry" flags. */ 41881ad6265SDimitry Andric const __v16qu __zero = {0}; 41981ad6265SDimitry Andric return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero); 42081ad6265SDimitry Andric } 42181ad6265SDimitry Andric 42281ad6265SDimitry Andric extern __inline int 42381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 42481ad6265SDimitry Andric _mm_testc_si128(__m128i __A, __m128i __B) { 42581ad6265SDimitry Andric /* Note: This implementation does NOT set "zero" or "carry" flags. */ 42681ad6265SDimitry Andric const __v16qu __zero = {0}; 42781ad6265SDimitry Andric const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A); 42881ad6265SDimitry Andric return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero); 42981ad6265SDimitry Andric } 43081ad6265SDimitry Andric 43181ad6265SDimitry Andric extern __inline int 43281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 43381ad6265SDimitry Andric _mm_testnzc_si128(__m128i __A, __m128i __B) { 43481ad6265SDimitry Andric /* Note: This implementation does NOT set "zero" or "carry" flags. */ 43581ad6265SDimitry Andric return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0; 43681ad6265SDimitry Andric } 43781ad6265SDimitry Andric 43881ad6265SDimitry Andric #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V)) 43981ad6265SDimitry Andric 44081ad6265SDimitry Andric #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) 44181ad6265SDimitry Andric 44281ad6265SDimitry Andric #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) 44381ad6265SDimitry Andric 44481ad6265SDimitry Andric #ifdef _ARCH_PWR8 44581ad6265SDimitry Andric extern __inline __m128i 44681ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 44781ad6265SDimitry Andric _mm_cmpeq_epi64(__m128i __X, __m128i __Y) { 44881ad6265SDimitry Andric return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y); 44981ad6265SDimitry Andric } 45081ad6265SDimitry Andric #endif 45181ad6265SDimitry Andric 45281ad6265SDimitry Andric extern __inline __m128i 45381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 45481ad6265SDimitry Andric _mm_min_epi8(__m128i __X, __m128i __Y) { 45581ad6265SDimitry Andric return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y); 456a7dea167SDimitry Andric } 457a7dea167SDimitry Andric 458e8d8bef9SDimitry Andric extern __inline __m128i 459e8d8bef9SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 46081ad6265SDimitry Andric _mm_min_epu16(__m128i __X, __m128i __Y) { 46181ad6265SDimitry Andric return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y); 462e8d8bef9SDimitry Andric } 463e8d8bef9SDimitry Andric 464e8d8bef9SDimitry Andric extern __inline __m128i 465e8d8bef9SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 46681ad6265SDimitry Andric _mm_min_epi32(__m128i __X, __m128i __Y) { 46781ad6265SDimitry Andric return (__m128i)vec_min((__v4si)__X, (__v4si)__Y); 468e8d8bef9SDimitry Andric } 469e8d8bef9SDimitry Andric 470e8d8bef9SDimitry Andric extern __inline __m128i 471e8d8bef9SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47281ad6265SDimitry Andric _mm_min_epu32(__m128i __X, __m128i __Y) { 47381ad6265SDimitry Andric return (__m128i)vec_min((__v4su)__X, (__v4su)__Y); 474e8d8bef9SDimitry Andric } 475e8d8bef9SDimitry Andric 47681ad6265SDimitry Andric extern __inline __m128i 47781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47881ad6265SDimitry Andric _mm_max_epi8(__m128i __X, __m128i __Y) { 47981ad6265SDimitry Andric return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y); 48081ad6265SDimitry Andric } 48181ad6265SDimitry Andric 48281ad6265SDimitry Andric extern __inline __m128i 48381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 48481ad6265SDimitry Andric _mm_max_epu16(__m128i __X, __m128i __Y) { 48581ad6265SDimitry Andric return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y); 48681ad6265SDimitry Andric } 48781ad6265SDimitry Andric 48881ad6265SDimitry Andric extern __inline __m128i 48981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 49081ad6265SDimitry Andric _mm_max_epi32(__m128i __X, __m128i __Y) { 49181ad6265SDimitry Andric return (__m128i)vec_max((__v4si)__X, (__v4si)__Y); 49281ad6265SDimitry Andric } 49381ad6265SDimitry Andric 49481ad6265SDimitry Andric extern __inline __m128i 49581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 49681ad6265SDimitry Andric _mm_max_epu32(__m128i __X, __m128i __Y) { 49781ad6265SDimitry Andric return (__m128i)vec_max((__v4su)__X, (__v4su)__Y); 49881ad6265SDimitry Andric } 49981ad6265SDimitry Andric 50081ad6265SDimitry Andric extern __inline __m128i 50181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 50281ad6265SDimitry Andric _mm_mullo_epi32(__m128i __X, __m128i __Y) { 50381ad6265SDimitry Andric return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y); 50481ad6265SDimitry Andric } 50581ad6265SDimitry Andric 50681ad6265SDimitry Andric #ifdef _ARCH_PWR8 50781ad6265SDimitry Andric extern __inline __m128i 50881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 50981ad6265SDimitry Andric _mm_mul_epi32(__m128i __X, __m128i __Y) { 51081ad6265SDimitry Andric return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y); 51181ad6265SDimitry Andric } 51281ad6265SDimitry Andric #endif 51381ad6265SDimitry Andric 51481ad6265SDimitry Andric extern __inline __m128i 51581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 51681ad6265SDimitry Andric _mm_cvtepi8_epi16(__m128i __A) { 51781ad6265SDimitry Andric return (__m128i)vec_unpackh((__v16qi)__A); 51881ad6265SDimitry Andric } 51981ad6265SDimitry Andric 52081ad6265SDimitry Andric extern __inline __m128i 52181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 52281ad6265SDimitry Andric _mm_cvtepi8_epi32(__m128i __A) { 52381ad6265SDimitry Andric __A = (__m128i)vec_unpackh((__v16qi)__A); 52481ad6265SDimitry Andric return (__m128i)vec_unpackh((__v8hi)__A); 52581ad6265SDimitry Andric } 52681ad6265SDimitry Andric 52781ad6265SDimitry Andric #ifdef _ARCH_PWR8 52881ad6265SDimitry Andric extern __inline __m128i 52981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 53081ad6265SDimitry Andric _mm_cvtepi8_epi64(__m128i __A) { 53181ad6265SDimitry Andric __A = (__m128i)vec_unpackh((__v16qi)__A); 53281ad6265SDimitry Andric __A = (__m128i)vec_unpackh((__v8hi)__A); 53381ad6265SDimitry Andric return (__m128i)vec_unpackh((__v4si)__A); 53481ad6265SDimitry Andric } 53581ad6265SDimitry Andric #endif 53681ad6265SDimitry Andric 53781ad6265SDimitry Andric extern __inline __m128i 53881ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 53981ad6265SDimitry Andric _mm_cvtepi16_epi32(__m128i __A) { 54081ad6265SDimitry Andric return (__m128i)vec_unpackh((__v8hi)__A); 54181ad6265SDimitry Andric } 54281ad6265SDimitry Andric 54381ad6265SDimitry Andric #ifdef _ARCH_PWR8 54481ad6265SDimitry Andric extern __inline __m128i 54581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 54681ad6265SDimitry Andric _mm_cvtepi16_epi64(__m128i __A) { 54781ad6265SDimitry Andric __A = (__m128i)vec_unpackh((__v8hi)__A); 54881ad6265SDimitry Andric return (__m128i)vec_unpackh((__v4si)__A); 54981ad6265SDimitry Andric } 55081ad6265SDimitry Andric #endif 55181ad6265SDimitry Andric 55281ad6265SDimitry Andric #ifdef _ARCH_PWR8 55381ad6265SDimitry Andric extern __inline __m128i 55481ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 55581ad6265SDimitry Andric _mm_cvtepi32_epi64(__m128i __A) { 55681ad6265SDimitry Andric return (__m128i)vec_unpackh((__v4si)__A); 55781ad6265SDimitry Andric } 55881ad6265SDimitry Andric #endif 55981ad6265SDimitry Andric 56081ad6265SDimitry Andric extern __inline __m128i 56181ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 56281ad6265SDimitry Andric _mm_cvtepu8_epi16(__m128i __A) { 56381ad6265SDimitry Andric const __v16qu __zero = {0}; 56481ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 56581ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v16qu)__A, __zero); 56681ad6265SDimitry Andric #else /* __BIG_ENDIAN__. */ 56781ad6265SDimitry Andric __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A); 56881ad6265SDimitry Andric #endif /* __BIG_ENDIAN__. */ 56981ad6265SDimitry Andric return __A; 57081ad6265SDimitry Andric } 57181ad6265SDimitry Andric 57281ad6265SDimitry Andric extern __inline __m128i 57381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 57481ad6265SDimitry Andric _mm_cvtepu8_epi32(__m128i __A) { 57581ad6265SDimitry Andric const __v16qu __zero = {0}; 57681ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 57781ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v16qu)__A, __zero); 57881ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero); 57981ad6265SDimitry Andric #else /* __BIG_ENDIAN__. */ 58081ad6265SDimitry Andric __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A); 58181ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A); 58281ad6265SDimitry Andric #endif /* __BIG_ENDIAN__. */ 58381ad6265SDimitry Andric return __A; 58481ad6265SDimitry Andric } 58581ad6265SDimitry Andric 58681ad6265SDimitry Andric extern __inline __m128i 58781ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 58881ad6265SDimitry Andric _mm_cvtepu8_epi64(__m128i __A) { 58981ad6265SDimitry Andric const __v16qu __zero = {0}; 59081ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 59181ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v16qu)__A, __zero); 59281ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero); 59381ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero); 59481ad6265SDimitry Andric #else /* __BIG_ENDIAN__. */ 59581ad6265SDimitry Andric __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A); 59681ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A); 59781ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A); 59881ad6265SDimitry Andric #endif /* __BIG_ENDIAN__. */ 59981ad6265SDimitry Andric return __A; 60081ad6265SDimitry Andric } 60181ad6265SDimitry Andric 60281ad6265SDimitry Andric extern __inline __m128i 60381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 60481ad6265SDimitry Andric _mm_cvtepu16_epi32(__m128i __A) { 60581ad6265SDimitry Andric const __v8hu __zero = {0}; 60681ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 60781ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v8hu)__A, __zero); 60881ad6265SDimitry Andric #else /* __BIG_ENDIAN__. */ 60981ad6265SDimitry Andric __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A); 61081ad6265SDimitry Andric #endif /* __BIG_ENDIAN__. */ 61181ad6265SDimitry Andric return __A; 61281ad6265SDimitry Andric } 61381ad6265SDimitry Andric 61481ad6265SDimitry Andric extern __inline __m128i 61581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 61681ad6265SDimitry Andric _mm_cvtepu16_epi64(__m128i __A) { 61781ad6265SDimitry Andric const __v8hu __zero = {0}; 61881ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 61981ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v8hu)__A, __zero); 62081ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero); 62181ad6265SDimitry Andric #else /* __BIG_ENDIAN__. */ 62281ad6265SDimitry Andric __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A); 62381ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A); 62481ad6265SDimitry Andric #endif /* __BIG_ENDIAN__. */ 62581ad6265SDimitry Andric return __A; 62681ad6265SDimitry Andric } 62781ad6265SDimitry Andric 62881ad6265SDimitry Andric extern __inline __m128i 62981ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 63081ad6265SDimitry Andric _mm_cvtepu32_epi64(__m128i __A) { 63181ad6265SDimitry Andric const __v4su __zero = {0}; 63281ad6265SDimitry Andric #ifdef __LITTLE_ENDIAN__ 63381ad6265SDimitry Andric __A = (__m128i)vec_mergeh((__v4su)__A, __zero); 63481ad6265SDimitry Andric #else /* __BIG_ENDIAN__. */ 63581ad6265SDimitry Andric __A = (__m128i)vec_mergeh(__zero, (__v4su)__A); 63681ad6265SDimitry Andric #endif /* __BIG_ENDIAN__. */ 63781ad6265SDimitry Andric return __A; 63881ad6265SDimitry Andric } 63981ad6265SDimitry Andric 64081ad6265SDimitry Andric /* Return horizontal packed word minimum and its index in bits [15:0] 64181ad6265SDimitry Andric and bits [18:16] respectively. */ 64281ad6265SDimitry Andric extern __inline __m128i 64381ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 64481ad6265SDimitry Andric _mm_minpos_epu16(__m128i __A) { 64581ad6265SDimitry Andric union __u { 64681ad6265SDimitry Andric __m128i __m; 64781ad6265SDimitry Andric __v8hu __uh; 64881ad6265SDimitry Andric }; 64981ad6265SDimitry Andric union __u __u = {.__m = __A}, __r = {.__m = {0}}; 65081ad6265SDimitry Andric unsigned short __ridx = 0; 65181ad6265SDimitry Andric unsigned short __rmin = __u.__uh[__ridx]; 65281ad6265SDimitry Andric unsigned long __i; 65381ad6265SDimitry Andric for (__i = 1; __i < 8; __i++) { 65481ad6265SDimitry Andric if (__u.__uh[__i] < __rmin) { 65581ad6265SDimitry Andric __rmin = __u.__uh[__i]; 65681ad6265SDimitry Andric __ridx = __i; 65781ad6265SDimitry Andric } 65881ad6265SDimitry Andric } 65981ad6265SDimitry Andric __r.__uh[0] = __rmin; 66081ad6265SDimitry Andric __r.__uh[1] = __ridx; 66181ad6265SDimitry Andric return __r.__m; 66281ad6265SDimitry Andric } 66381ad6265SDimitry Andric 66481ad6265SDimitry Andric extern __inline __m128i 66581ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 66681ad6265SDimitry Andric _mm_packus_epi32(__m128i __X, __m128i __Y) { 66781ad6265SDimitry Andric return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y); 66881ad6265SDimitry Andric } 66981ad6265SDimitry Andric 67081ad6265SDimitry Andric #ifdef _ARCH_PWR8 67181ad6265SDimitry Andric extern __inline __m128i 67281ad6265SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 67381ad6265SDimitry Andric _mm_cmpgt_epi64(__m128i __X, __m128i __Y) { 67481ad6265SDimitry Andric return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y); 67581ad6265SDimitry Andric } 67681ad6265SDimitry Andric #endif 67781ad6265SDimitry Andric 678a7dea167SDimitry Andric #else 679a7dea167SDimitry Andric #include_next <smmintrin.h> 680bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) && \ 681fcaf7f86SDimitry Andric * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ 682a7dea167SDimitry Andric 68381ad6265SDimitry Andric #endif /* SMMINTRIN_H_ */ 684