xref: /freebsd/contrib/llvm-project/clang/lib/Headers/ppc_wrappers/pmmintrin.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1a7dea167SDimitry Andric /*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------===
2a7dea167SDimitry Andric  *
3a7dea167SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a7dea167SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5a7dea167SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a7dea167SDimitry Andric  *
7a7dea167SDimitry Andric  *===-----------------------------------------------------------------------===
8a7dea167SDimitry Andric  */
9a7dea167SDimitry Andric 
10a7dea167SDimitry Andric /* Implemented from the specification included in the Intel C++ Compiler
11a7dea167SDimitry Andric    User Guide and Reference, version 9.0.  */
12a7dea167SDimitry Andric 
13a7dea167SDimitry Andric #ifndef NO_WARN_X86_INTRINSICS
14a7dea167SDimitry Andric /* This header is distributed to simplify porting x86_64 code that
15a7dea167SDimitry Andric    makes explicit use of Intel intrinsics to powerpc64le.
16a7dea167SDimitry Andric    It is the user's responsibility to determine if the results are
17a7dea167SDimitry Andric    acceptable and make additional changes as necessary.
18a7dea167SDimitry Andric    Note that much code that uses Intel intrinsics can be rewritten in
19a7dea167SDimitry Andric    standard C or GNU C extensions, which are more portable and better
20a7dea167SDimitry Andric    optimized across multiple targets.
21a7dea167SDimitry Andric 
22a7dea167SDimitry Andric    In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
23a7dea167SDimitry Andric    is a good match for most SIMD operations.  However the Horizontal
24a7dea167SDimitry Andric    add/sub requires the data pairs be permuted into a separate
25a7dea167SDimitry Andric    registers with vertical even/odd alignment for the operation.
26a7dea167SDimitry Andric    And the addsub operation requires the sign of only the even numbered
27a7dea167SDimitry Andric    elements be flipped (xored with -0.0).
28a7dea167SDimitry Andric    For larger blocks of code using these intrinsic implementations,
29a7dea167SDimitry Andric    the compiler be should be able to schedule instructions to avoid
30a7dea167SDimitry Andric    additional latency.
31a7dea167SDimitry Andric 
32a7dea167SDimitry Andric    In the specific case of the monitor and mwait instructions there are
33a7dea167SDimitry Andric    no direct equivalent in the PowerISA at this time.  So those
34a7dea167SDimitry Andric    intrinsics are not implemented.  */
3581ad6265SDimitry Andric #error                                                                         \
3681ad6265SDimitry Andric     "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this warning."
37a7dea167SDimitry Andric #endif
38a7dea167SDimitry Andric 
39a7dea167SDimitry Andric #ifndef PMMINTRIN_H_
40a7dea167SDimitry Andric #define PMMINTRIN_H_
41a7dea167SDimitry Andric 
42*bdd1243dSDimitry Andric #if defined(__powerpc64__) &&                                                  \
43fcaf7f86SDimitry Andric     (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
44a7dea167SDimitry Andric 
45a7dea167SDimitry Andric /* We need definitions from the SSE2 and SSE header files*/
46a7dea167SDimitry Andric #include <emmintrin.h>
47a7dea167SDimitry Andric 
4881ad6265SDimitry Andric extern __inline __m128
4981ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_ps(__m128 __X,__m128 __Y)5081ad6265SDimitry Andric     _mm_addsub_ps(__m128 __X, __m128 __Y) {
5181ad6265SDimitry Andric   const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0};
5281ad6265SDimitry Andric   __v4sf __even_neg_Y = vec_xor(__Y, __even_n0);
5381ad6265SDimitry Andric   return (__m128)vec_add(__X, __even_neg_Y);
54a7dea167SDimitry Andric }
55a7dea167SDimitry Andric 
5681ad6265SDimitry Andric extern __inline __m128d
5781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_pd(__m128d __X,__m128d __Y)5881ad6265SDimitry Andric     _mm_addsub_pd(__m128d __X, __m128d __Y) {
5981ad6265SDimitry Andric   const __v2df __even_n0 = {-0.0, 0.0};
6081ad6265SDimitry Andric   __v2df __even_neg_Y = vec_xor(__Y, __even_n0);
6181ad6265SDimitry Andric   return (__m128d)vec_add(__X, __even_neg_Y);
62a7dea167SDimitry Andric }
63a7dea167SDimitry Andric 
6481ad6265SDimitry Andric extern __inline __m128
6581ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_ps(__m128 __X,__m128 __Y)6681ad6265SDimitry Andric     _mm_hadd_ps(__m128 __X, __m128 __Y) {
6781ad6265SDimitry Andric   __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09,
6881ad6265SDimitry Andric                                      0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13,
6981ad6265SDimitry Andric                                      0x18, 0x19, 0x1A, 0x1B};
7081ad6265SDimitry Andric   __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D,
7181ad6265SDimitry Andric                                      0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17,
7281ad6265SDimitry Andric                                      0x1C, 0x1D, 0x1E, 0x1F};
7381ad6265SDimitry Andric   return (__m128)vec_add(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2),
7481ad6265SDimitry Andric                          vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1));
75a7dea167SDimitry Andric }
76a7dea167SDimitry Andric 
7781ad6265SDimitry Andric extern __inline __m128
7881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_ps(__m128 __X,__m128 __Y)7981ad6265SDimitry Andric     _mm_hsub_ps(__m128 __X, __m128 __Y) {
8081ad6265SDimitry Andric   __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09,
8181ad6265SDimitry Andric                                      0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13,
8281ad6265SDimitry Andric                                      0x18, 0x19, 0x1A, 0x1B};
8381ad6265SDimitry Andric   __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D,
8481ad6265SDimitry Andric                                      0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17,
8581ad6265SDimitry Andric                                      0x1C, 0x1D, 0x1E, 0x1F};
8681ad6265SDimitry Andric   return (__m128)vec_sub(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2),
8781ad6265SDimitry Andric                          vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1));
88a7dea167SDimitry Andric }
89a7dea167SDimitry Andric 
9081ad6265SDimitry Andric extern __inline __m128d
9181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pd(__m128d __X,__m128d __Y)9281ad6265SDimitry Andric     _mm_hadd_pd(__m128d __X, __m128d __Y) {
93a7dea167SDimitry Andric   return (__m128d)vec_add(vec_mergeh((__v2df)__X, (__v2df)__Y),
94a7dea167SDimitry Andric                           vec_mergel((__v2df)__X, (__v2df)__Y));
95a7dea167SDimitry Andric }
96a7dea167SDimitry Andric 
9781ad6265SDimitry Andric extern __inline __m128d
9881ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pd(__m128d __X,__m128d __Y)9981ad6265SDimitry Andric     _mm_hsub_pd(__m128d __X, __m128d __Y) {
100a7dea167SDimitry Andric   return (__m128d)vec_sub(vec_mergeh((__v2df)__X, (__v2df)__Y),
101a7dea167SDimitry Andric                           vec_mergel((__v2df)__X, (__v2df)__Y));
102a7dea167SDimitry Andric }
103a7dea167SDimitry Andric 
10481ad6265SDimitry Andric #ifdef _ARCH_PWR8
10581ad6265SDimitry Andric extern __inline __m128
10681ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps(__m128 __X)10781ad6265SDimitry Andric     _mm_movehdup_ps(__m128 __X) {
108a7dea167SDimitry Andric   return (__m128)vec_mergeo((__v4su)__X, (__v4su)__X);
109a7dea167SDimitry Andric }
11081ad6265SDimitry Andric #endif
111a7dea167SDimitry Andric 
11281ad6265SDimitry Andric #ifdef _ARCH_PWR8
11381ad6265SDimitry Andric extern __inline __m128
11481ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps(__m128 __X)11581ad6265SDimitry Andric     _mm_moveldup_ps(__m128 __X) {
116a7dea167SDimitry Andric   return (__m128)vec_mergee((__v4su)__X, (__v4su)__X);
117a7dea167SDimitry Andric }
11881ad6265SDimitry Andric #endif
119a7dea167SDimitry Andric 
12081ad6265SDimitry Andric extern __inline __m128d
12181ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd(double const * __P)12281ad6265SDimitry Andric     _mm_loaddup_pd(double const *__P) {
123a7dea167SDimitry Andric   return (__m128d)vec_splats(*__P);
124a7dea167SDimitry Andric }
125a7dea167SDimitry Andric 
12681ad6265SDimitry Andric extern __inline __m128d
12781ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movedup_pd(__m128d __X)12881ad6265SDimitry Andric     _mm_movedup_pd(__m128d __X) {
129a7dea167SDimitry Andric   return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0));
130a7dea167SDimitry Andric }
131a7dea167SDimitry Andric 
13281ad6265SDimitry Andric extern __inline __m128i
13381ad6265SDimitry Andric     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lddqu_si128(__m128i const * __P)13481ad6265SDimitry Andric     _mm_lddqu_si128(__m128i const *__P) {
135a7dea167SDimitry Andric   return (__m128i)(vec_vsx_ld(0, (signed int const *)__P));
136a7dea167SDimitry Andric }
137a7dea167SDimitry Andric 
138a7dea167SDimitry Andric /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait.  */
139a7dea167SDimitry Andric 
140a7dea167SDimitry Andric #else
141a7dea167SDimitry Andric #include_next <pmmintrin.h>
142*bdd1243dSDimitry Andric #endif /* defined(__powerpc64__) &&                                            \
143fcaf7f86SDimitry Andric         *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
144a7dea167SDimitry Andric 
145a7dea167SDimitry Andric #endif /* PMMINTRIN_H_ */
146