xref: /freebsd/contrib/llvm-project/clang/lib/Headers/ppc_wrappers/tmmintrin.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1a7dea167SDimitry Andric /*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
2a7dea167SDimitry Andric  *
3a7dea167SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a7dea167SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5a7dea167SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a7dea167SDimitry Andric  *
7a7dea167SDimitry Andric  *===-----------------------------------------------------------------------===
8a7dea167SDimitry Andric  */
9a7dea167SDimitry Andric 
10a7dea167SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler
11a7dea167SDimitry Andric    User Guide and Reference, version 9.0.  */
12a7dea167SDimitry Andric 
13a7dea167SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS
14a7dea167SDimitry Andric /* This header is distributed to simplify porting x86_64 code that
15a7dea167SDimitry Andric    makes explicit use of Intel intrinsics to powerpc64le.
16a7dea167SDimitry Andric 
17a7dea167SDimitry Andric    It is the user's responsibility to determine if the results are
18a7dea167SDimitry Andric    acceptable and make additional changes as necessary.
19a7dea167SDimitry Andric 
20a7dea167SDimitry Andric    Note that much code that uses Intel intrinsics can be rewritten in
21a7dea167SDimitry Andric    standard C or GNU C extensions, which are more portable and better
22a7dea167SDimitry Andric    optimized across multiple targets.  */
23a7dea167SDimitry Andric #endif
24a7dea167SDimitry Andric 
25a7dea167SDimitry Andric #ifndef TMMINTRIN_H_
26a7dea167SDimitry Andric #define TMMINTRIN_H_
27a7dea167SDimitry Andric 
28*bdd1243dSDimitry Andric #if defined(__powerpc64__) &&                                                  \
29fcaf7f86SDimitry Andric     (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
30a7dea167SDimitry Andric 
31a7dea167SDimitry Andric #include <altivec.h>
32a7dea167SDimitry Andric 
33a7dea167SDimitry Andric /* We need definitions from the SSE header files.  */
34a7dea167SDimitry Andric #include <pmmintrin.h>
35a7dea167SDimitry Andric 
36a7dea167SDimitry Andric extern __inline __m128i
37a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16(__m128i __A)3881ad6265SDimitry Andric     _mm_abs_epi16(__m128i __A) {
39a7dea167SDimitry Andric   return (__m128i)vec_abs((__v8hi)__A);
40a7dea167SDimitry Andric }
41a7dea167SDimitry Andric 
42a7dea167SDimitry Andric extern __inline __m128i
43a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32(__m128i __A)4481ad6265SDimitry Andric     _mm_abs_epi32(__m128i __A) {
45a7dea167SDimitry Andric   return (__m128i)vec_abs((__v4si)__A);
46a7dea167SDimitry Andric }
47a7dea167SDimitry Andric 
48a7dea167SDimitry Andric extern __inline __m128i
49a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8(__m128i __A)5081ad6265SDimitry Andric     _mm_abs_epi8(__m128i __A) {
51a7dea167SDimitry Andric   return (__m128i)vec_abs((__v16qi)__A);
52a7dea167SDimitry Andric }
53a7dea167SDimitry Andric 
54a7dea167SDimitry Andric extern __inline __m64
55a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16(__m64 __A)5681ad6265SDimitry Andric     _mm_abs_pi16(__m64 __A) {
57a7dea167SDimitry Andric   __v8hi __B = (__v8hi)(__v2du){__A, __A};
58a7dea167SDimitry Andric   return (__m64)((__v2du)vec_abs(__B))[0];
59a7dea167SDimitry Andric }
60a7dea167SDimitry Andric 
61a7dea167SDimitry Andric extern __inline __m64
62a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32(__m64 __A)6381ad6265SDimitry Andric     _mm_abs_pi32(__m64 __A) {
64a7dea167SDimitry Andric   __v4si __B = (__v4si)(__v2du){__A, __A};
65a7dea167SDimitry Andric   return (__m64)((__v2du)vec_abs(__B))[0];
66a7dea167SDimitry Andric }
67a7dea167SDimitry Andric 
68a7dea167SDimitry Andric extern __inline __m64
69a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8(__m64 __A)7081ad6265SDimitry Andric     _mm_abs_pi8(__m64 __A) {
71a7dea167SDimitry Andric   __v16qi __B = (__v16qi)(__v2du){__A, __A};
72a7dea167SDimitry Andric   return (__m64)((__v2du)vec_abs(__B))[0];
73a7dea167SDimitry Andric }
74a7dea167SDimitry Andric 
75a7dea167SDimitry Andric extern __inline __m128i
76a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __A,__m128i __B,const unsigned int __count)7781ad6265SDimitry Andric     _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {
7881ad6265SDimitry Andric   if (__builtin_constant_p(__count) && __count < 16) {
79a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__
80a7dea167SDimitry Andric     __A = (__m128i)vec_reve((__v16qu)__A);
81a7dea167SDimitry Andric     __B = (__m128i)vec_reve((__v16qu)__B);
82a7dea167SDimitry Andric #endif
83a7dea167SDimitry Andric     __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);
84a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__
85a7dea167SDimitry Andric     __A = (__m128i)vec_reve((__v16qu)__A);
86a7dea167SDimitry Andric #endif
87a7dea167SDimitry Andric     return __A;
88a7dea167SDimitry Andric   }
89a7dea167SDimitry Andric 
90a7dea167SDimitry Andric   if (__count == 0)
91a7dea167SDimitry Andric     return __B;
92a7dea167SDimitry Andric 
9381ad6265SDimitry Andric   if (__count >= 16) {
9481ad6265SDimitry Andric     if (__count >= 32) {
9581ad6265SDimitry Andric       const __v16qu __zero = {0};
9681ad6265SDimitry Andric       return (__m128i)__zero;
9781ad6265SDimitry Andric     } else {
9881ad6265SDimitry Andric       const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));
99a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__
100a7dea167SDimitry Andric       return (__m128i)vec_sro((__v16qu)__A, __shift);
101a7dea167SDimitry Andric #else
102a7dea167SDimitry Andric       return (__m128i)vec_slo((__v16qu)__A, __shift);
103a7dea167SDimitry Andric #endif
104a7dea167SDimitry Andric     }
10581ad6265SDimitry Andric   } else {
10681ad6265SDimitry Andric     const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));
107a7dea167SDimitry Andric     const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));
108a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__
109a7dea167SDimitry Andric     __A = (__m128i)vec_slo((__v16qu)__A, __shiftA);
110a7dea167SDimitry Andric     __B = (__m128i)vec_sro((__v16qu)__B, __shiftB);
111a7dea167SDimitry Andric #else
112a7dea167SDimitry Andric     __A = (__m128i)vec_sro((__v16qu)__A, __shiftA);
113a7dea167SDimitry Andric     __B = (__m128i)vec_slo((__v16qu)__B, __shiftB);
114a7dea167SDimitry Andric #endif
115a7dea167SDimitry Andric     return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);
116a7dea167SDimitry Andric   }
117a7dea167SDimitry Andric }
118a7dea167SDimitry Andric 
119a7dea167SDimitry Andric extern __inline __m64
120a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __A,__m64 __B,unsigned int __count)12181ad6265SDimitry Andric     _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {
12281ad6265SDimitry Andric   if (__count < 16) {
123a7dea167SDimitry Andric     __v2du __C = {__B, __A};
124a7dea167SDimitry Andric #ifdef __LITTLE_ENDIAN__
125a7dea167SDimitry Andric     const __v4su __shift = {__count << 3, 0, 0, 0};
126a7dea167SDimitry Andric     __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);
127a7dea167SDimitry Andric #else
128a7dea167SDimitry Andric     const __v4su __shift = {0, 0, 0, __count << 3};
129a7dea167SDimitry Andric     __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);
130a7dea167SDimitry Andric #endif
131a7dea167SDimitry Andric     return (__m64)__C[0];
13281ad6265SDimitry Andric   } else {
133a7dea167SDimitry Andric     const __m64 __zero = {0};
134a7dea167SDimitry Andric     return __zero;
135a7dea167SDimitry Andric   }
136a7dea167SDimitry Andric }
137a7dea167SDimitry Andric 
138a7dea167SDimitry Andric extern __inline __m128i
139a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16(__m128i __A,__m128i __B)14081ad6265SDimitry Andric     _mm_hadd_epi16(__m128i __A, __m128i __B) {
14181ad6265SDimitry Andric   const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,
14281ad6265SDimitry Andric                        16, 17, 20, 21, 24, 25, 28, 29};
14381ad6265SDimitry Andric   const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,
14481ad6265SDimitry Andric                        18, 19, 22, 23, 26, 27, 30, 31};
145a7dea167SDimitry Andric   __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
146a7dea167SDimitry Andric   __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
147a7dea167SDimitry Andric   return (__m128i)vec_add(__C, __D);
148a7dea167SDimitry Andric }
149a7dea167SDimitry Andric 
150a7dea167SDimitry Andric extern __inline __m128i
151a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32(__m128i __A,__m128i __B)15281ad6265SDimitry Andric     _mm_hadd_epi32(__m128i __A, __m128i __B) {
15381ad6265SDimitry Andric   const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,
15481ad6265SDimitry Andric                        16, 17, 18, 19, 24, 25, 26, 27};
15581ad6265SDimitry Andric   const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,
15681ad6265SDimitry Andric                        20, 21, 22, 23, 28, 29, 30, 31};
157a7dea167SDimitry Andric   __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
158a7dea167SDimitry Andric   __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
159a7dea167SDimitry Andric   return (__m128i)vec_add(__C, __D);
160a7dea167SDimitry Andric }
161a7dea167SDimitry Andric 
162a7dea167SDimitry Andric extern __inline __m64
163a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16(__m64 __A,__m64 __B)16481ad6265SDimitry Andric     _mm_hadd_pi16(__m64 __A, __m64 __B) {
165a7dea167SDimitry Andric   __v8hi __C = (__v8hi)(__v2du){__A, __B};
16681ad6265SDimitry Andric   const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
16781ad6265SDimitry Andric   const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
168a7dea167SDimitry Andric   __v8hi __D = vec_perm(__C, __C, __Q);
169a7dea167SDimitry Andric   __C = vec_perm(__C, __C, __P);
170a7dea167SDimitry Andric   __C = vec_add(__C, __D);
171a7dea167SDimitry Andric   return (__m64)((__v2du)__C)[1];
172a7dea167SDimitry Andric }
173a7dea167SDimitry Andric 
174a7dea167SDimitry Andric extern __inline __m64
175a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32(__m64 __A,__m64 __B)17681ad6265SDimitry Andric     _mm_hadd_pi32(__m64 __A, __m64 __B) {
177a7dea167SDimitry Andric   __v4si __C = (__v4si)(__v2du){__A, __B};
17881ad6265SDimitry Andric   const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
17981ad6265SDimitry Andric   const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
180a7dea167SDimitry Andric   __v4si __D = vec_perm(__C, __C, __Q);
181a7dea167SDimitry Andric   __C = vec_perm(__C, __C, __P);
182a7dea167SDimitry Andric   __C = vec_add(__C, __D);
183a7dea167SDimitry Andric   return (__m64)((__v2du)__C)[1];
184a7dea167SDimitry Andric }
185a7dea167SDimitry Andric 
186a7dea167SDimitry Andric extern __inline __m128i
187a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16(__m128i __A,__m128i __B)18881ad6265SDimitry Andric     _mm_hadds_epi16(__m128i __A, __m128i __B) {
189a7dea167SDimitry Andric   __v4si __C = {0}, __D = {0};
190a7dea167SDimitry Andric   __C = vec_sum4s((__v8hi)__A, __C);
191a7dea167SDimitry Andric   __D = vec_sum4s((__v8hi)__B, __D);
192a7dea167SDimitry Andric   __C = (__v4si)vec_packs(__C, __D);
193a7dea167SDimitry Andric   return (__m128i)__C;
194a7dea167SDimitry Andric }
195a7dea167SDimitry Andric 
196a7dea167SDimitry Andric extern __inline __m64
197a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16(__m64 __A,__m64 __B)19881ad6265SDimitry Andric     _mm_hadds_pi16(__m64 __A, __m64 __B) {
199a7dea167SDimitry Andric   const __v4si __zero = {0};
200a7dea167SDimitry Andric   __v8hi __C = (__v8hi)(__v2du){__A, __B};
201a7dea167SDimitry Andric   __v4si __D = vec_sum4s(__C, __zero);
202a7dea167SDimitry Andric   __C = vec_packs(__D, __D);
203a7dea167SDimitry Andric   return (__m64)((__v2du)__C)[1];
204a7dea167SDimitry Andric }
205a7dea167SDimitry Andric 
206a7dea167SDimitry Andric extern __inline __m128i
207a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16(__m128i __A,__m128i __B)20881ad6265SDimitry Andric     _mm_hsub_epi16(__m128i __A, __m128i __B) {
20981ad6265SDimitry Andric   const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,
21081ad6265SDimitry Andric                        16, 17, 20, 21, 24, 25, 28, 29};
21181ad6265SDimitry Andric   const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,
21281ad6265SDimitry Andric                        18, 19, 22, 23, 26, 27, 30, 31};
213a7dea167SDimitry Andric   __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
214a7dea167SDimitry Andric   __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
215a7dea167SDimitry Andric   return (__m128i)vec_sub(__C, __D);
216a7dea167SDimitry Andric }
217a7dea167SDimitry Andric 
218a7dea167SDimitry Andric extern __inline __m128i
219a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32(__m128i __A,__m128i __B)22081ad6265SDimitry Andric     _mm_hsub_epi32(__m128i __A, __m128i __B) {
22181ad6265SDimitry Andric   const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,
22281ad6265SDimitry Andric                        16, 17, 18, 19, 24, 25, 26, 27};
22381ad6265SDimitry Andric   const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,
22481ad6265SDimitry Andric                        20, 21, 22, 23, 28, 29, 30, 31};
225a7dea167SDimitry Andric   __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
226a7dea167SDimitry Andric   __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
227a7dea167SDimitry Andric   return (__m128i)vec_sub(__C, __D);
228a7dea167SDimitry Andric }
229a7dea167SDimitry Andric 
230a7dea167SDimitry Andric extern __inline __m64
231a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16(__m64 __A,__m64 __B)23281ad6265SDimitry Andric     _mm_hsub_pi16(__m64 __A, __m64 __B) {
23381ad6265SDimitry Andric   const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
23481ad6265SDimitry Andric   const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
235a7dea167SDimitry Andric   __v8hi __C = (__v8hi)(__v2du){__A, __B};
236a7dea167SDimitry Andric   __v8hi __D = vec_perm(__C, __C, __Q);
237a7dea167SDimitry Andric   __C = vec_perm(__C, __C, __P);
238a7dea167SDimitry Andric   __C = vec_sub(__C, __D);
239a7dea167SDimitry Andric   return (__m64)((__v2du)__C)[1];
240a7dea167SDimitry Andric }
241a7dea167SDimitry Andric 
242a7dea167SDimitry Andric extern __inline __m64
243a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32(__m64 __A,__m64 __B)24481ad6265SDimitry Andric     _mm_hsub_pi32(__m64 __A, __m64 __B) {
24581ad6265SDimitry Andric   const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
24681ad6265SDimitry Andric   const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
247a7dea167SDimitry Andric   __v4si __C = (__v4si)(__v2du){__A, __B};
248a7dea167SDimitry Andric   __v4si __D = vec_perm(__C, __C, __Q);
249a7dea167SDimitry Andric   __C = vec_perm(__C, __C, __P);
250a7dea167SDimitry Andric   __C = vec_sub(__C, __D);
251a7dea167SDimitry Andric   return (__m64)((__v2du)__C)[1];
252a7dea167SDimitry Andric }
253a7dea167SDimitry Andric 
254a7dea167SDimitry Andric extern __inline __m128i
255a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16(__m128i __A,__m128i __B)25681ad6265SDimitry Andric     _mm_hsubs_epi16(__m128i __A, __m128i __B) {
25781ad6265SDimitry Andric   const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,
25881ad6265SDimitry Andric                        16, 17, 20, 21, 24, 25, 28, 29};
25981ad6265SDimitry Andric   const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,
26081ad6265SDimitry Andric                        18, 19, 22, 23, 26, 27, 30, 31};
261a7dea167SDimitry Andric   __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
262a7dea167SDimitry Andric   __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
263a7dea167SDimitry Andric   return (__m128i)vec_subs(__C, __D);
264a7dea167SDimitry Andric }
265a7dea167SDimitry Andric 
266a7dea167SDimitry Andric extern __inline __m64
267a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16(__m64 __A,__m64 __B)26881ad6265SDimitry Andric     _mm_hsubs_pi16(__m64 __A, __m64 __B) {
26981ad6265SDimitry Andric   const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
27081ad6265SDimitry Andric   const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
271a7dea167SDimitry Andric   __v8hi __C = (__v8hi)(__v2du){__A, __B};
272a7dea167SDimitry Andric   __v8hi __D = vec_perm(__C, __C, __P);
273a7dea167SDimitry Andric   __v8hi __E = vec_perm(__C, __C, __Q);
274a7dea167SDimitry Andric   __C = vec_subs(__D, __E);
275a7dea167SDimitry Andric   return (__m64)((__v2du)__C)[1];
276a7dea167SDimitry Andric }
277a7dea167SDimitry Andric 
278a7dea167SDimitry Andric extern __inline __m128i
279a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8(__m128i __A,__m128i __B)28081ad6265SDimitry Andric     _mm_shuffle_epi8(__m128i __A, __m128i __B) {
281a7dea167SDimitry Andric   const __v16qi __zero = {0};
282a7dea167SDimitry Andric   __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);
283a7dea167SDimitry Andric   __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);
284a7dea167SDimitry Andric   return (__m128i)vec_sel(__C, __zero, __select);
285a7dea167SDimitry Andric }
286a7dea167SDimitry Andric 
287a7dea167SDimitry Andric extern __inline __m64
288a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8(__m64 __A,__m64 __B)28981ad6265SDimitry Andric     _mm_shuffle_pi8(__m64 __A, __m64 __B) {
290a7dea167SDimitry Andric   const __v16qi __zero = {0};
291a7dea167SDimitry Andric   __v16qi __C = (__v16qi)(__v2du){__A, __A};
292a7dea167SDimitry Andric   __v16qi __D = (__v16qi)(__v2du){__B, __B};
293a7dea167SDimitry Andric   __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);
294a7dea167SDimitry Andric   __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);
295a7dea167SDimitry Andric   __C = vec_sel(__C, __zero, __select);
296a7dea167SDimitry Andric   return (__m64)((__v2du)(__C))[0];
297a7dea167SDimitry Andric }
298a7dea167SDimitry Andric 
29981ad6265SDimitry Andric #ifdef _ARCH_PWR8
300a7dea167SDimitry Andric extern __inline __m128i
301a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8(__m128i __A,__m128i __B)30281ad6265SDimitry Andric     _mm_sign_epi8(__m128i __A, __m128i __B) {
303a7dea167SDimitry Andric   const __v16qi __zero = {0};
304a7dea167SDimitry Andric   __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);
305a7dea167SDimitry Andric   __v16qi __selectpos =
306a7dea167SDimitry Andric       (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));
307a7dea167SDimitry Andric   __v16qi __conv = vec_add(__selectneg, __selectpos);
308a7dea167SDimitry Andric   return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);
309a7dea167SDimitry Andric }
31081ad6265SDimitry Andric #endif
311a7dea167SDimitry Andric 
31281ad6265SDimitry Andric #ifdef _ARCH_PWR8
313a7dea167SDimitry Andric extern __inline __m128i
314a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16(__m128i __A,__m128i __B)31581ad6265SDimitry Andric     _mm_sign_epi16(__m128i __A, __m128i __B) {
316a7dea167SDimitry Andric   const __v8hi __zero = {0};
317a7dea167SDimitry Andric   __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);
31881ad6265SDimitry Andric   __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));
319a7dea167SDimitry Andric   __v8hi __conv = vec_add(__selectneg, __selectpos);
320a7dea167SDimitry Andric   return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);
321a7dea167SDimitry Andric }
32281ad6265SDimitry Andric #endif
323a7dea167SDimitry Andric 
32481ad6265SDimitry Andric #ifdef _ARCH_PWR8
325a7dea167SDimitry Andric extern __inline __m128i
326a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32(__m128i __A,__m128i __B)32781ad6265SDimitry Andric     _mm_sign_epi32(__m128i __A, __m128i __B) {
328a7dea167SDimitry Andric   const __v4si __zero = {0};
329a7dea167SDimitry Andric   __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);
33081ad6265SDimitry Andric   __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));
331a7dea167SDimitry Andric   __v4si __conv = vec_add(__selectneg, __selectpos);
332a7dea167SDimitry Andric   return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);
333a7dea167SDimitry Andric }
33481ad6265SDimitry Andric #endif
335a7dea167SDimitry Andric 
33681ad6265SDimitry Andric #ifdef _ARCH_PWR8
337a7dea167SDimitry Andric extern __inline __m64
338a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8(__m64 __A,__m64 __B)33981ad6265SDimitry Andric     _mm_sign_pi8(__m64 __A, __m64 __B) {
340a7dea167SDimitry Andric   const __v16qi __zero = {0};
341a7dea167SDimitry Andric   __v16qi __C = (__v16qi)(__v2du){__A, __A};
342a7dea167SDimitry Andric   __v16qi __D = (__v16qi)(__v2du){__B, __B};
343a7dea167SDimitry Andric   __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);
344a7dea167SDimitry Andric   return (__m64)((__v2du)(__C))[0];
345a7dea167SDimitry Andric }
34681ad6265SDimitry Andric #endif
347a7dea167SDimitry Andric 
34881ad6265SDimitry Andric #ifdef _ARCH_PWR8
349a7dea167SDimitry Andric extern __inline __m64
350a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16(__m64 __A,__m64 __B)35181ad6265SDimitry Andric     _mm_sign_pi16(__m64 __A, __m64 __B) {
352a7dea167SDimitry Andric   const __v8hi __zero = {0};
353a7dea167SDimitry Andric   __v8hi __C = (__v8hi)(__v2du){__A, __A};
354a7dea167SDimitry Andric   __v8hi __D = (__v8hi)(__v2du){__B, __B};
355a7dea167SDimitry Andric   __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);
356a7dea167SDimitry Andric   return (__m64)((__v2du)(__C))[0];
357a7dea167SDimitry Andric }
35881ad6265SDimitry Andric #endif
359a7dea167SDimitry Andric 
36081ad6265SDimitry Andric #ifdef _ARCH_PWR8
361a7dea167SDimitry Andric extern __inline __m64
362a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32(__m64 __A,__m64 __B)36381ad6265SDimitry Andric     _mm_sign_pi32(__m64 __A, __m64 __B) {
364a7dea167SDimitry Andric   const __v4si __zero = {0};
365a7dea167SDimitry Andric   __v4si __C = (__v4si)(__v2du){__A, __A};
366a7dea167SDimitry Andric   __v4si __D = (__v4si)(__v2du){__B, __B};
367a7dea167SDimitry Andric   __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);
368a7dea167SDimitry Andric   return (__m64)((__v2du)(__C))[0];
369a7dea167SDimitry Andric }
37081ad6265SDimitry Andric #endif
371a7dea167SDimitry Andric 
372a7dea167SDimitry Andric extern __inline __m128i
373a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16(__m128i __A,__m128i __B)37481ad6265SDimitry Andric     _mm_maddubs_epi16(__m128i __A, __m128i __B) {
375a7dea167SDimitry Andric   __v8hi __unsigned = vec_splats((signed short)0x00ff);
376a7dea167SDimitry Andric   __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);
377a7dea167SDimitry Andric   __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);
378a7dea167SDimitry Andric   __v8hi __E = vec_unpackh((__v16qi)__B);
379a7dea167SDimitry Andric   __v8hi __F = vec_unpackl((__v16qi)__B);
380a7dea167SDimitry Andric   __C = vec_mul(__C, __E);
381a7dea167SDimitry Andric   __D = vec_mul(__D, __F);
38281ad6265SDimitry Andric   const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,
38381ad6265SDimitry Andric                           16, 17, 20, 21, 24, 25, 28, 29};
38481ad6265SDimitry Andric   const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,
38581ad6265SDimitry Andric                            18, 19, 22, 23, 26, 27, 30, 31};
386a7dea167SDimitry Andric   __E = vec_perm(__C, __D, __odds);
387a7dea167SDimitry Andric   __F = vec_perm(__C, __D, __evens);
388a7dea167SDimitry Andric   return (__m128i)vec_adds(__E, __F);
389a7dea167SDimitry Andric }
390a7dea167SDimitry Andric 
391a7dea167SDimitry Andric extern __inline __m64
392a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16(__m64 __A,__m64 __B)39381ad6265SDimitry Andric     _mm_maddubs_pi16(__m64 __A, __m64 __B) {
394a7dea167SDimitry Andric   __v8hi __C = (__v8hi)(__v2du){__A, __A};
395a7dea167SDimitry Andric   __C = vec_unpackl((__v16qi)__C);
396a7dea167SDimitry Andric   const __v8hi __unsigned = vec_splats((signed short)0x00ff);
397a7dea167SDimitry Andric   __C = vec_and(__C, __unsigned);
398a7dea167SDimitry Andric   __v8hi __D = (__v8hi)(__v2du){__B, __B};
399a7dea167SDimitry Andric   __D = vec_unpackl((__v16qi)__D);
400a7dea167SDimitry Andric   __D = vec_mul(__C, __D);
40181ad6265SDimitry Andric   const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,
40281ad6265SDimitry Andric                           16, 17, 20, 21, 24, 25, 28, 29};
40381ad6265SDimitry Andric   const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,
40481ad6265SDimitry Andric                            18, 19, 22, 23, 26, 27, 30, 31};
405a7dea167SDimitry Andric   __C = vec_perm(__D, __D, __odds);
406a7dea167SDimitry Andric   __D = vec_perm(__D, __D, __evens);
407a7dea167SDimitry Andric   __C = vec_adds(__C, __D);
408a7dea167SDimitry Andric   return (__m64)((__v2du)(__C))[0];
409a7dea167SDimitry Andric }
410a7dea167SDimitry Andric 
411a7dea167SDimitry Andric extern __inline __m128i
412a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16(__m128i __A,__m128i __B)41381ad6265SDimitry Andric     _mm_mulhrs_epi16(__m128i __A, __m128i __B) {
414a7dea167SDimitry Andric   __v4si __C = vec_unpackh((__v8hi)__A);
415a7dea167SDimitry Andric   __v4si __D = vec_unpackh((__v8hi)__B);
416a7dea167SDimitry Andric   __C = vec_mul(__C, __D);
417a7dea167SDimitry Andric   __D = vec_unpackl((__v8hi)__A);
418a7dea167SDimitry Andric   __v4si __E = vec_unpackl((__v8hi)__B);
419a7dea167SDimitry Andric   __D = vec_mul(__D, __E);
420a7dea167SDimitry Andric   const __v4su __shift = vec_splats((unsigned int)14);
421a7dea167SDimitry Andric   __C = vec_sr(__C, __shift);
422a7dea167SDimitry Andric   __D = vec_sr(__D, __shift);
423a7dea167SDimitry Andric   const __v4si __ones = vec_splats((signed int)1);
424a7dea167SDimitry Andric   __C = vec_add(__C, __ones);
425a7dea167SDimitry Andric   __C = vec_sr(__C, (__v4su)__ones);
426a7dea167SDimitry Andric   __D = vec_add(__D, __ones);
427a7dea167SDimitry Andric   __D = vec_sr(__D, (__v4su)__ones);
428a7dea167SDimitry Andric   return (__m128i)vec_pack(__C, __D);
429a7dea167SDimitry Andric }
430a7dea167SDimitry Andric 
431a7dea167SDimitry Andric extern __inline __m64
432a7dea167SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16(__m64 __A,__m64 __B)43381ad6265SDimitry Andric     _mm_mulhrs_pi16(__m64 __A, __m64 __B) {
434a7dea167SDimitry Andric   __v4si __C = (__v4si)(__v2du){__A, __A};
435a7dea167SDimitry Andric   __C = vec_unpackh((__v8hi)__C);
436a7dea167SDimitry Andric   __v4si __D = (__v4si)(__v2du){__B, __B};
437a7dea167SDimitry Andric   __D = vec_unpackh((__v8hi)__D);
438a7dea167SDimitry Andric   __C = vec_mul(__C, __D);
439a7dea167SDimitry Andric   const __v4su __shift = vec_splats((unsigned int)14);
440a7dea167SDimitry Andric   __C = vec_sr(__C, __shift);
441a7dea167SDimitry Andric   const __v4si __ones = vec_splats((signed int)1);
442a7dea167SDimitry Andric   __C = vec_add(__C, __ones);
443a7dea167SDimitry Andric   __C = vec_sr(__C, (__v4su)__ones);
444a7dea167SDimitry Andric   __v8hi __E = vec_pack(__C, __D);
445a7dea167SDimitry Andric   return (__m64)((__v2du)(__E))[0];
446a7dea167SDimitry Andric }
447a7dea167SDimitry Andric 
448a7dea167SDimitry Andric #else
449a7dea167SDimitry Andric #include_next <tmmintrin.h>
450*bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) &&                                            \
451fcaf7f86SDimitry Andric         *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
452a7dea167SDimitry Andric 
453a7dea167SDimitry Andric #endif /* TMMINTRIN_H_ */
454