1a7dea167SDimitry Andric /*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------=== 2a7dea167SDimitry Andric * 3a7dea167SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a7dea167SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 5a7dea167SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a7dea167SDimitry Andric * 7a7dea167SDimitry Andric *===-----------------------------------------------------------------------=== 8a7dea167SDimitry Andric */ 9a7dea167SDimitry Andric 10a7dea167SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler 11a7dea167SDimitry Andric User Guide and Reference, version 9.0. */ 12a7dea167SDimitry Andric 13a7dea167SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS 14a7dea167SDimitry Andric /* This header is distributed to simplify porting x86_64 code that 15a7dea167SDimitry Andric makes explicit use of Intel intrinsics to powerpc64le. 16a7dea167SDimitry Andric 17a7dea167SDimitry Andric It is the user's responsibility to determine if the results are 18a7dea167SDimitry Andric acceptable and make additional changes as necessary. 19a7dea167SDimitry Andric 20a7dea167SDimitry Andric Note that much code that uses Intel intrinsics can be rewritten in 21a7dea167SDimitry Andric standard C or GNU C extensions, which are more portable and better 22a7dea167SDimitry Andric optimized across multiple targets. */ 23a7dea167SDimitry Andric #endif 24a7dea167SDimitry Andric 25a7dea167SDimitry Andric #ifndef TMMINTRIN_H_ 26a7dea167SDimitry Andric #define TMMINTRIN_H_ 27a7dea167SDimitry Andric 28*bdd1243dSDimitry Andric #if defined(__powerpc64__) && \ 29fcaf7f86SDimitry Andric (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) 30a7dea167SDimitry Andric 31a7dea167SDimitry Andric #include <altivec.h> 32a7dea167SDimitry Andric 33a7dea167SDimitry Andric /* We need definitions from the SSE header files. */ 34a7dea167SDimitry Andric #include <pmmintrin.h> 35a7dea167SDimitry Andric 36a7dea167SDimitry Andric extern __inline __m128i 37a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi16(__m128i __A)3881ad6265SDimitry Andric _mm_abs_epi16(__m128i __A) { 39a7dea167SDimitry Andric return (__m128i)vec_abs((__v8hi)__A); 40a7dea167SDimitry Andric } 41a7dea167SDimitry Andric 42a7dea167SDimitry Andric extern __inline __m128i 43a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi32(__m128i __A)4481ad6265SDimitry Andric _mm_abs_epi32(__m128i __A) { 45a7dea167SDimitry Andric return (__m128i)vec_abs((__v4si)__A); 46a7dea167SDimitry Andric } 47a7dea167SDimitry Andric 48a7dea167SDimitry Andric extern __inline __m128i 49a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi8(__m128i __A)5081ad6265SDimitry Andric _mm_abs_epi8(__m128i __A) { 51a7dea167SDimitry Andric return (__m128i)vec_abs((__v16qi)__A); 52a7dea167SDimitry Andric } 53a7dea167SDimitry Andric 54a7dea167SDimitry Andric extern __inline __m64 55a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi16(__m64 __A)5681ad6265SDimitry Andric _mm_abs_pi16(__m64 __A) { 57a7dea167SDimitry Andric __v8hi __B = (__v8hi)(__v2du){__A, __A}; 58a7dea167SDimitry Andric return (__m64)((__v2du)vec_abs(__B))[0]; 59a7dea167SDimitry Andric } 60a7dea167SDimitry Andric 61a7dea167SDimitry Andric extern __inline __m64 62a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi32(__m64 __A)6381ad6265SDimitry Andric _mm_abs_pi32(__m64 __A) { 64a7dea167SDimitry Andric __v4si __B = (__v4si)(__v2du){__A, __A}; 65a7dea167SDimitry Andric return (__m64)((__v2du)vec_abs(__B))[0]; 66a7dea167SDimitry Andric } 67a7dea167SDimitry Andric 68a7dea167SDimitry Andric extern __inline __m64 69a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi8(__m64 __A)7081ad6265SDimitry Andric _mm_abs_pi8(__m64 __A) { 71a7dea167SDimitry Andric __v16qi __B = (__v16qi)(__v2du){__A, __A}; 72a7dea167SDimitry Andric return (__m64)((__v2du)vec_abs(__B))[0]; 73a7dea167SDimitry Andric } 74a7dea167SDimitry Andric 75a7dea167SDimitry Andric extern __inline __m128i 76a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_alignr_epi8(__m128i __A,__m128i __B,const unsigned int __count)7781ad6265SDimitry Andric _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) { 7881ad6265SDimitry Andric if (__builtin_constant_p(__count) && __count < 16) { 79a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__ 80a7dea167SDimitry Andric __A = (__m128i)vec_reve((__v16qu)__A); 81a7dea167SDimitry Andric __B = (__m128i)vec_reve((__v16qu)__B); 82a7dea167SDimitry Andric #endif 83a7dea167SDimitry Andric __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count); 84a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__ 85a7dea167SDimitry Andric __A = (__m128i)vec_reve((__v16qu)__A); 86a7dea167SDimitry Andric #endif 87a7dea167SDimitry Andric return __A; 88a7dea167SDimitry Andric } 89a7dea167SDimitry Andric 90a7dea167SDimitry Andric if (__count == 0) 91a7dea167SDimitry Andric return __B; 92a7dea167SDimitry Andric 9381ad6265SDimitry Andric if (__count >= 16) { 9481ad6265SDimitry Andric if (__count >= 32) { 9581ad6265SDimitry Andric const __v16qu __zero = {0}; 9681ad6265SDimitry Andric return (__m128i)__zero; 9781ad6265SDimitry Andric } else { 9881ad6265SDimitry Andric const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8)); 99a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__ 100a7dea167SDimitry Andric return (__m128i)vec_sro((__v16qu)__A, __shift); 101a7dea167SDimitry Andric #else 102a7dea167SDimitry Andric return (__m128i)vec_slo((__v16qu)__A, __shift); 103a7dea167SDimitry Andric #endif 104a7dea167SDimitry Andric } 10581ad6265SDimitry Andric } else { 10681ad6265SDimitry Andric const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8)); 107a7dea167SDimitry Andric const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8)); 108a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__ 109a7dea167SDimitry Andric __A = (__m128i)vec_slo((__v16qu)__A, __shiftA); 110a7dea167SDimitry Andric __B = (__m128i)vec_sro((__v16qu)__B, __shiftB); 111a7dea167SDimitry Andric #else 112a7dea167SDimitry Andric __A = (__m128i)vec_sro((__v16qu)__A, __shiftA); 113a7dea167SDimitry Andric __B = (__m128i)vec_slo((__v16qu)__B, __shiftB); 114a7dea167SDimitry Andric #endif 115a7dea167SDimitry Andric return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B); 116a7dea167SDimitry Andric } 117a7dea167SDimitry Andric } 118a7dea167SDimitry Andric 119a7dea167SDimitry Andric extern __inline __m64 120a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_alignr_pi8(__m64 __A,__m64 __B,unsigned int __count)12181ad6265SDimitry Andric _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) { 12281ad6265SDimitry Andric if (__count < 16) { 123a7dea167SDimitry Andric __v2du __C = {__B, __A}; 124a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__ 125a7dea167SDimitry Andric const __v4su __shift = {__count << 3, 0, 0, 0}; 126a7dea167SDimitry Andric __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift); 127a7dea167SDimitry Andric #else 128a7dea167SDimitry Andric const __v4su __shift = {0, 0, 0, __count << 3}; 129a7dea167SDimitry Andric __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift); 130a7dea167SDimitry Andric #endif 131a7dea167SDimitry Andric return (__m64)__C[0]; 13281ad6265SDimitry Andric } else { 133a7dea167SDimitry Andric const __m64 __zero = {0}; 134a7dea167SDimitry Andric return __zero; 135a7dea167SDimitry Andric } 136a7dea167SDimitry Andric } 137a7dea167SDimitry Andric 138a7dea167SDimitry Andric extern __inline __m128i 139a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi16(__m128i __A,__m128i __B)14081ad6265SDimitry Andric _mm_hadd_epi16(__m128i __A, __m128i __B) { 14181ad6265SDimitry Andric const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 14281ad6265SDimitry Andric 16, 17, 20, 21, 24, 25, 28, 29}; 14381ad6265SDimitry Andric const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 14481ad6265SDimitry Andric 18, 19, 22, 23, 26, 27, 30, 31}; 145a7dea167SDimitry Andric __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P); 146a7dea167SDimitry Andric __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q); 147a7dea167SDimitry Andric return (__m128i)vec_add(__C, __D); 148a7dea167SDimitry Andric } 149a7dea167SDimitry Andric 150a7dea167SDimitry Andric extern __inline __m128i 151a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi32(__m128i __A,__m128i __B)15281ad6265SDimitry Andric _mm_hadd_epi32(__m128i __A, __m128i __B) { 15381ad6265SDimitry Andric const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 15481ad6265SDimitry Andric 16, 17, 18, 19, 24, 25, 26, 27}; 15581ad6265SDimitry Andric const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 15681ad6265SDimitry Andric 20, 21, 22, 23, 28, 29, 30, 31}; 157a7dea167SDimitry Andric __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P); 158a7dea167SDimitry Andric __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q); 159a7dea167SDimitry Andric return (__m128i)vec_add(__C, __D); 160a7dea167SDimitry Andric } 161a7dea167SDimitry Andric 162a7dea167SDimitry Andric extern __inline __m64 163a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pi16(__m64 __A,__m64 __B)16481ad6265SDimitry Andric _mm_hadd_pi16(__m64 __A, __m64 __B) { 165a7dea167SDimitry Andric __v8hi __C = (__v8hi)(__v2du){__A, __B}; 16681ad6265SDimitry Andric const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; 16781ad6265SDimitry Andric const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}; 168a7dea167SDimitry Andric __v8hi __D = vec_perm(__C, __C, __Q); 169a7dea167SDimitry Andric __C = vec_perm(__C, __C, __P); 170a7dea167SDimitry Andric __C = vec_add(__C, __D); 171a7dea167SDimitry Andric return (__m64)((__v2du)__C)[1]; 172a7dea167SDimitry Andric } 173a7dea167SDimitry Andric 174a7dea167SDimitry Andric extern __inline __m64 175a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pi32(__m64 __A,__m64 __B)17681ad6265SDimitry Andric _mm_hadd_pi32(__m64 __A, __m64 __B) { 177a7dea167SDimitry Andric __v4si __C = (__v4si)(__v2du){__A, __B}; 17881ad6265SDimitry Andric const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11}; 17981ad6265SDimitry Andric const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}; 180a7dea167SDimitry Andric __v4si __D = vec_perm(__C, __C, __Q); 181a7dea167SDimitry Andric __C = vec_perm(__C, __C, __P); 182a7dea167SDimitry Andric __C = vec_add(__C, __D); 183a7dea167SDimitry Andric return (__m64)((__v2du)__C)[1]; 184a7dea167SDimitry Andric } 185a7dea167SDimitry Andric 186a7dea167SDimitry Andric extern __inline __m128i 187a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadds_epi16(__m128i __A,__m128i __B)18881ad6265SDimitry Andric _mm_hadds_epi16(__m128i __A, __m128i __B) { 189a7dea167SDimitry Andric __v4si __C = {0}, __D = {0}; 190a7dea167SDimitry Andric __C = vec_sum4s((__v8hi)__A, __C); 191a7dea167SDimitry Andric __D = vec_sum4s((__v8hi)__B, __D); 192a7dea167SDimitry Andric __C = (__v4si)vec_packs(__C, __D); 193a7dea167SDimitry Andric return (__m128i)__C; 194a7dea167SDimitry Andric } 195a7dea167SDimitry Andric 196a7dea167SDimitry Andric extern __inline __m64 197a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadds_pi16(__m64 __A,__m64 __B)19881ad6265SDimitry Andric _mm_hadds_pi16(__m64 __A, __m64 __B) { 199a7dea167SDimitry Andric const __v4si __zero = {0}; 200a7dea167SDimitry Andric __v8hi __C = (__v8hi)(__v2du){__A, __B}; 201a7dea167SDimitry Andric __v4si __D = vec_sum4s(__C, __zero); 202a7dea167SDimitry Andric __C = vec_packs(__D, __D); 203a7dea167SDimitry Andric return (__m64)((__v2du)__C)[1]; 204a7dea167SDimitry Andric } 205a7dea167SDimitry Andric 206a7dea167SDimitry Andric extern __inline __m128i 207a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_epi16(__m128i __A,__m128i __B)20881ad6265SDimitry Andric _mm_hsub_epi16(__m128i __A, __m128i __B) { 20981ad6265SDimitry Andric const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 21081ad6265SDimitry Andric 16, 17, 20, 21, 24, 25, 28, 29}; 21181ad6265SDimitry Andric const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 21281ad6265SDimitry Andric 18, 19, 22, 23, 26, 27, 30, 31}; 213a7dea167SDimitry Andric __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P); 214a7dea167SDimitry Andric __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q); 215a7dea167SDimitry Andric return (__m128i)vec_sub(__C, __D); 216a7dea167SDimitry Andric } 217a7dea167SDimitry Andric 218a7dea167SDimitry Andric extern __inline __m128i 219a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_epi32(__m128i __A,__m128i __B)22081ad6265SDimitry Andric _mm_hsub_epi32(__m128i __A, __m128i __B) { 22181ad6265SDimitry Andric const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 22281ad6265SDimitry Andric 16, 17, 18, 19, 24, 25, 26, 27}; 22381ad6265SDimitry Andric const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 22481ad6265SDimitry Andric 20, 21, 22, 23, 28, 29, 30, 31}; 225a7dea167SDimitry Andric __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P); 226a7dea167SDimitry Andric __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q); 227a7dea167SDimitry Andric return (__m128i)vec_sub(__C, __D); 228a7dea167SDimitry Andric } 229a7dea167SDimitry Andric 230a7dea167SDimitry Andric extern __inline __m64 231a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pi16(__m64 __A,__m64 __B)23281ad6265SDimitry Andric _mm_hsub_pi16(__m64 __A, __m64 __B) { 23381ad6265SDimitry Andric const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; 23481ad6265SDimitry Andric const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}; 235a7dea167SDimitry Andric __v8hi __C = (__v8hi)(__v2du){__A, __B}; 236a7dea167SDimitry Andric __v8hi __D = vec_perm(__C, __C, __Q); 237a7dea167SDimitry Andric __C = vec_perm(__C, __C, __P); 238a7dea167SDimitry Andric __C = vec_sub(__C, __D); 239a7dea167SDimitry Andric return (__m64)((__v2du)__C)[1]; 240a7dea167SDimitry Andric } 241a7dea167SDimitry Andric 242a7dea167SDimitry Andric extern __inline __m64 243a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pi32(__m64 __A,__m64 __B)24481ad6265SDimitry Andric _mm_hsub_pi32(__m64 __A, __m64 __B) { 24581ad6265SDimitry Andric const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11}; 24681ad6265SDimitry Andric const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}; 247a7dea167SDimitry Andric __v4si __C = (__v4si)(__v2du){__A, __B}; 248a7dea167SDimitry Andric __v4si __D = vec_perm(__C, __C, __Q); 249a7dea167SDimitry Andric __C = vec_perm(__C, __C, __P); 250a7dea167SDimitry Andric __C = vec_sub(__C, __D); 251a7dea167SDimitry Andric return (__m64)((__v2du)__C)[1]; 252a7dea167SDimitry Andric } 253a7dea167SDimitry Andric 254a7dea167SDimitry Andric extern __inline __m128i 255a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubs_epi16(__m128i __A,__m128i __B)25681ad6265SDimitry Andric _mm_hsubs_epi16(__m128i __A, __m128i __B) { 25781ad6265SDimitry Andric const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 25881ad6265SDimitry Andric 16, 17, 20, 21, 24, 25, 28, 29}; 25981ad6265SDimitry Andric const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 26081ad6265SDimitry Andric 18, 19, 22, 23, 26, 27, 30, 31}; 261a7dea167SDimitry Andric __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P); 262a7dea167SDimitry Andric __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q); 263a7dea167SDimitry Andric return (__m128i)vec_subs(__C, __D); 264a7dea167SDimitry Andric } 265a7dea167SDimitry Andric 266a7dea167SDimitry Andric extern __inline __m64 267a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubs_pi16(__m64 __A,__m64 __B)26881ad6265SDimitry Andric _mm_hsubs_pi16(__m64 __A, __m64 __B) { 26981ad6265SDimitry Andric const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; 27081ad6265SDimitry Andric const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}; 271a7dea167SDimitry Andric __v8hi __C = (__v8hi)(__v2du){__A, __B}; 272a7dea167SDimitry Andric __v8hi __D = vec_perm(__C, __C, __P); 273a7dea167SDimitry Andric __v8hi __E = vec_perm(__C, __C, __Q); 274a7dea167SDimitry Andric __C = vec_subs(__D, __E); 275a7dea167SDimitry Andric return (__m64)((__v2du)__C)[1]; 276a7dea167SDimitry Andric } 277a7dea167SDimitry Andric 278a7dea167SDimitry Andric extern __inline __m128i 279a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi8(__m128i __A,__m128i __B)28081ad6265SDimitry Andric _mm_shuffle_epi8(__m128i __A, __m128i __B) { 281a7dea167SDimitry Andric const __v16qi __zero = {0}; 282a7dea167SDimitry Andric __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero); 283a7dea167SDimitry Andric __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B); 284a7dea167SDimitry Andric return (__m128i)vec_sel(__C, __zero, __select); 285a7dea167SDimitry Andric } 286a7dea167SDimitry Andric 287a7dea167SDimitry Andric extern __inline __m64 288a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi8(__m64 __A,__m64 __B)28981ad6265SDimitry Andric _mm_shuffle_pi8(__m64 __A, __m64 __B) { 290a7dea167SDimitry Andric const __v16qi __zero = {0}; 291a7dea167SDimitry Andric __v16qi __C = (__v16qi)(__v2du){__A, __A}; 292a7dea167SDimitry Andric __v16qi __D = (__v16qi)(__v2du){__B, __B}; 293a7dea167SDimitry Andric __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero); 294a7dea167SDimitry Andric __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D); 295a7dea167SDimitry Andric __C = vec_sel(__C, __zero, __select); 296a7dea167SDimitry Andric return (__m64)((__v2du)(__C))[0]; 297a7dea167SDimitry Andric } 298a7dea167SDimitry Andric 29981ad6265SDimitry Andric #ifdef _ARCH_PWR8 300a7dea167SDimitry Andric extern __inline __m128i 301a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi8(__m128i __A,__m128i __B)30281ad6265SDimitry Andric _mm_sign_epi8(__m128i __A, __m128i __B) { 303a7dea167SDimitry Andric const __v16qi __zero = {0}; 304a7dea167SDimitry Andric __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero); 305a7dea167SDimitry Andric __v16qi __selectpos = 306a7dea167SDimitry Andric (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero)); 307a7dea167SDimitry Andric __v16qi __conv = vec_add(__selectneg, __selectpos); 308a7dea167SDimitry Andric return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv); 309a7dea167SDimitry Andric } 31081ad6265SDimitry Andric #endif 311a7dea167SDimitry Andric 31281ad6265SDimitry Andric #ifdef _ARCH_PWR8 313a7dea167SDimitry Andric extern __inline __m128i 314a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi16(__m128i __A,__m128i __B)31581ad6265SDimitry Andric _mm_sign_epi16(__m128i __A, __m128i __B) { 316a7dea167SDimitry Andric const __v8hi __zero = {0}; 317a7dea167SDimitry Andric __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero); 31881ad6265SDimitry Andric __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero)); 319a7dea167SDimitry Andric __v8hi __conv = vec_add(__selectneg, __selectpos); 320a7dea167SDimitry Andric return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv); 321a7dea167SDimitry Andric } 32281ad6265SDimitry Andric #endif 323a7dea167SDimitry Andric 32481ad6265SDimitry Andric #ifdef _ARCH_PWR8 325a7dea167SDimitry Andric extern __inline __m128i 326a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi32(__m128i __A,__m128i __B)32781ad6265SDimitry Andric _mm_sign_epi32(__m128i __A, __m128i __B) { 328a7dea167SDimitry Andric const __v4si __zero = {0}; 329a7dea167SDimitry Andric __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero); 33081ad6265SDimitry Andric __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero)); 331a7dea167SDimitry Andric __v4si __conv = vec_add(__selectneg, __selectpos); 332a7dea167SDimitry Andric return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv); 333a7dea167SDimitry Andric } 33481ad6265SDimitry Andric #endif 335a7dea167SDimitry Andric 33681ad6265SDimitry Andric #ifdef _ARCH_PWR8 337a7dea167SDimitry Andric extern __inline __m64 338a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi8(__m64 __A,__m64 __B)33981ad6265SDimitry Andric _mm_sign_pi8(__m64 __A, __m64 __B) { 340a7dea167SDimitry Andric const __v16qi __zero = {0}; 341a7dea167SDimitry Andric __v16qi __C = (__v16qi)(__v2du){__A, __A}; 342a7dea167SDimitry Andric __v16qi __D = (__v16qi)(__v2du){__B, __B}; 343a7dea167SDimitry Andric __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D); 344a7dea167SDimitry Andric return (__m64)((__v2du)(__C))[0]; 345a7dea167SDimitry Andric } 34681ad6265SDimitry Andric #endif 347a7dea167SDimitry Andric 34881ad6265SDimitry Andric #ifdef _ARCH_PWR8 349a7dea167SDimitry Andric extern __inline __m64 350a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi16(__m64 __A,__m64 __B)35181ad6265SDimitry Andric _mm_sign_pi16(__m64 __A, __m64 __B) { 352a7dea167SDimitry Andric const __v8hi __zero = {0}; 353a7dea167SDimitry Andric __v8hi __C = (__v8hi)(__v2du){__A, __A}; 354a7dea167SDimitry Andric __v8hi __D = (__v8hi)(__v2du){__B, __B}; 355a7dea167SDimitry Andric __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D); 356a7dea167SDimitry Andric return (__m64)((__v2du)(__C))[0]; 357a7dea167SDimitry Andric } 35881ad6265SDimitry Andric #endif 359a7dea167SDimitry Andric 36081ad6265SDimitry Andric #ifdef _ARCH_PWR8 361a7dea167SDimitry Andric extern __inline __m64 362a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi32(__m64 __A,__m64 __B)36381ad6265SDimitry Andric _mm_sign_pi32(__m64 __A, __m64 __B) { 364a7dea167SDimitry Andric const __v4si __zero = {0}; 365a7dea167SDimitry Andric __v4si __C = (__v4si)(__v2du){__A, __A}; 366a7dea167SDimitry Andric __v4si __D = (__v4si)(__v2du){__B, __B}; 367a7dea167SDimitry Andric __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D); 368a7dea167SDimitry Andric return (__m64)((__v2du)(__C))[0]; 369a7dea167SDimitry Andric } 37081ad6265SDimitry Andric #endif 371a7dea167SDimitry Andric 372a7dea167SDimitry Andric extern __inline __m128i 373a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddubs_epi16(__m128i __A,__m128i __B)37481ad6265SDimitry Andric _mm_maddubs_epi16(__m128i __A, __m128i __B) { 375a7dea167SDimitry Andric __v8hi __unsigned = vec_splats((signed short)0x00ff); 376a7dea167SDimitry Andric __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned); 377a7dea167SDimitry Andric __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned); 378a7dea167SDimitry Andric __v8hi __E = vec_unpackh((__v16qi)__B); 379a7dea167SDimitry Andric __v8hi __F = vec_unpackl((__v16qi)__B); 380a7dea167SDimitry Andric __C = vec_mul(__C, __E); 381a7dea167SDimitry Andric __D = vec_mul(__D, __F); 38281ad6265SDimitry Andric const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13, 38381ad6265SDimitry Andric 16, 17, 20, 21, 24, 25, 28, 29}; 38481ad6265SDimitry Andric const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15, 38581ad6265SDimitry Andric 18, 19, 22, 23, 26, 27, 30, 31}; 386a7dea167SDimitry Andric __E = vec_perm(__C, __D, __odds); 387a7dea167SDimitry Andric __F = vec_perm(__C, __D, __evens); 388a7dea167SDimitry Andric return (__m128i)vec_adds(__E, __F); 389a7dea167SDimitry Andric } 390a7dea167SDimitry Andric 391a7dea167SDimitry Andric extern __inline __m64 392a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddubs_pi16(__m64 __A,__m64 __B)39381ad6265SDimitry Andric _mm_maddubs_pi16(__m64 __A, __m64 __B) { 394a7dea167SDimitry Andric __v8hi __C = (__v8hi)(__v2du){__A, __A}; 395a7dea167SDimitry Andric __C = vec_unpackl((__v16qi)__C); 396a7dea167SDimitry Andric const __v8hi __unsigned = vec_splats((signed short)0x00ff); 397a7dea167SDimitry Andric __C = vec_and(__C, __unsigned); 398a7dea167SDimitry Andric __v8hi __D = (__v8hi)(__v2du){__B, __B}; 399a7dea167SDimitry Andric __D = vec_unpackl((__v16qi)__D); 400a7dea167SDimitry Andric __D = vec_mul(__C, __D); 40181ad6265SDimitry Andric const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13, 40281ad6265SDimitry Andric 16, 17, 20, 21, 24, 25, 28, 29}; 40381ad6265SDimitry Andric const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15, 40481ad6265SDimitry Andric 18, 19, 22, 23, 26, 27, 30, 31}; 405a7dea167SDimitry Andric __C = vec_perm(__D, __D, __odds); 406a7dea167SDimitry Andric __D = vec_perm(__D, __D, __evens); 407a7dea167SDimitry Andric __C = vec_adds(__C, __D); 408a7dea167SDimitry Andric return (__m64)((__v2du)(__C))[0]; 409a7dea167SDimitry Andric } 410a7dea167SDimitry Andric 411a7dea167SDimitry Andric extern __inline __m128i 412a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhrs_epi16(__m128i __A,__m128i __B)41381ad6265SDimitry Andric _mm_mulhrs_epi16(__m128i __A, __m128i __B) { 414a7dea167SDimitry Andric __v4si __C = vec_unpackh((__v8hi)__A); 415a7dea167SDimitry Andric __v4si __D = vec_unpackh((__v8hi)__B); 416a7dea167SDimitry Andric __C = vec_mul(__C, __D); 417a7dea167SDimitry Andric __D = vec_unpackl((__v8hi)__A); 418a7dea167SDimitry Andric __v4si __E = vec_unpackl((__v8hi)__B); 419a7dea167SDimitry Andric __D = vec_mul(__D, __E); 420a7dea167SDimitry Andric const __v4su __shift = vec_splats((unsigned int)14); 421a7dea167SDimitry Andric __C = vec_sr(__C, __shift); 422a7dea167SDimitry Andric __D = vec_sr(__D, __shift); 423a7dea167SDimitry Andric const __v4si __ones = vec_splats((signed int)1); 424a7dea167SDimitry Andric __C = vec_add(__C, __ones); 425a7dea167SDimitry Andric __C = vec_sr(__C, (__v4su)__ones); 426a7dea167SDimitry Andric __D = vec_add(__D, __ones); 427a7dea167SDimitry Andric __D = vec_sr(__D, (__v4su)__ones); 428a7dea167SDimitry Andric return (__m128i)vec_pack(__C, __D); 429a7dea167SDimitry Andric } 430a7dea167SDimitry Andric 431a7dea167SDimitry Andric extern __inline __m64 432a7dea167SDimitry Andric __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhrs_pi16(__m64 __A,__m64 __B)43381ad6265SDimitry Andric _mm_mulhrs_pi16(__m64 __A, __m64 __B) { 434a7dea167SDimitry Andric __v4si __C = (__v4si)(__v2du){__A, __A}; 435a7dea167SDimitry Andric __C = vec_unpackh((__v8hi)__C); 436a7dea167SDimitry Andric __v4si __D = (__v4si)(__v2du){__B, __B}; 437a7dea167SDimitry Andric __D = vec_unpackh((__v8hi)__D); 438a7dea167SDimitry Andric __C = vec_mul(__C, __D); 439a7dea167SDimitry Andric const __v4su __shift = vec_splats((unsigned int)14); 440a7dea167SDimitry Andric __C = vec_sr(__C, __shift); 441a7dea167SDimitry Andric const __v4si __ones = vec_splats((signed int)1); 442a7dea167SDimitry Andric __C = vec_add(__C, __ones); 443a7dea167SDimitry Andric __C = vec_sr(__C, (__v4su)__ones); 444a7dea167SDimitry Andric __v8hi __E = vec_pack(__C, __D); 445a7dea167SDimitry Andric return (__m64)((__v2du)(__E))[0]; 446a7dea167SDimitry Andric } 447a7dea167SDimitry Andric 448a7dea167SDimitry Andric #else 449a7dea167SDimitry Andric #include_next <tmmintrin.h> 450*bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) && \ 451fcaf7f86SDimitry Andric * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ 452a7dea167SDimitry Andric 453a7dea167SDimitry Andric #endif /* TMMINTRIN_H_ */ 454