xref: /freebsd/contrib/llvm-project/clang/lib/Headers/avx512vnniintrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric /*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  *
40b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
50b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
60b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
70b57cec5SDimitry Andric  *
80b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
90b57cec5SDimitry Andric  */
100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H
110b57cec5SDimitry Andric #error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
120b57cec5SDimitry Andric #endif
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef __AVX512VNNIINTRIN_H
150b57cec5SDimitry Andric #define __AVX512VNNIINTRIN_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS                                                     \
19*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
20*5f757f3fSDimitry Andric                  __target__("avx512vnni,evex512"), __min_vector_width__(512)))
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
230b57cec5SDimitry Andric _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
240b57cec5SDimitry Andric {
250b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
260b57cec5SDimitry Andric                                              (__v16si)__B);
270b57cec5SDimitry Andric }
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
300b57cec5SDimitry Andric _mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
310b57cec5SDimitry Andric {
320b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
330b57cec5SDimitry Andric                                     (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
340b57cec5SDimitry Andric                                     (__v16si)__S);
350b57cec5SDimitry Andric }
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
380b57cec5SDimitry Andric _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
390b57cec5SDimitry Andric {
400b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
410b57cec5SDimitry Andric                                     (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
420b57cec5SDimitry Andric                                     (__v16si)_mm512_setzero_si512());
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
460b57cec5SDimitry Andric _mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
470b57cec5SDimitry Andric {
480b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
490b57cec5SDimitry Andric                                               (__v16si)__B);
500b57cec5SDimitry Andric }
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
530b57cec5SDimitry Andric _mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
540b57cec5SDimitry Andric {
550b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
560b57cec5SDimitry Andric                                    (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
570b57cec5SDimitry Andric                                    (__v16si)__S);
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
610b57cec5SDimitry Andric _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
620b57cec5SDimitry Andric {
630b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
640b57cec5SDimitry Andric                                    (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
650b57cec5SDimitry Andric                                    (__v16si)_mm512_setzero_si512());
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
690b57cec5SDimitry Andric _mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
700b57cec5SDimitry Andric {
710b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
720b57cec5SDimitry Andric                                              (__v16si)__B);
730b57cec5SDimitry Andric }
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
760b57cec5SDimitry Andric _mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
770b57cec5SDimitry Andric {
780b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
790b57cec5SDimitry Andric                                     (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
800b57cec5SDimitry Andric                                     (__v16si)__S);
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
840b57cec5SDimitry Andric _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
850b57cec5SDimitry Andric {
860b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
870b57cec5SDimitry Andric                                     (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
880b57cec5SDimitry Andric                                     (__v16si)_mm512_setzero_si512());
890b57cec5SDimitry Andric }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
920b57cec5SDimitry Andric _mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
930b57cec5SDimitry Andric {
940b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
950b57cec5SDimitry Andric                                               (__v16si)__B);
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
990b57cec5SDimitry Andric _mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
1000b57cec5SDimitry Andric {
1010b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
1020b57cec5SDimitry Andric                                    (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
1030b57cec5SDimitry Andric                                    (__v16si)__S);
1040b57cec5SDimitry Andric }
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS
1070b57cec5SDimitry Andric _mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
1080b57cec5SDimitry Andric {
1090b57cec5SDimitry Andric   return (__m512i)__builtin_ia32_selectd_512(__U,
1100b57cec5SDimitry Andric                                    (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
1110b57cec5SDimitry Andric                                    (__v16si)_mm512_setzero_si512());
1120b57cec5SDimitry Andric }
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric #endif
117