10b57cec5SDimitry Andric /*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512VNNIINTRIN_H 150b57cec5SDimitry Andric #define __AVX512VNNIINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512vnni,evex512"), __min_vector_width__(512))) 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 230b57cec5SDimitry Andric _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) 240b57cec5SDimitry Andric { 250b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A, 260b57cec5SDimitry Andric (__v16si)__B); 270b57cec5SDimitry Andric } 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 300b57cec5SDimitry Andric _mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 310b57cec5SDimitry Andric { 320b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 330b57cec5SDimitry Andric (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), 340b57cec5SDimitry Andric (__v16si)__S); 350b57cec5SDimitry Andric } 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 380b57cec5SDimitry Andric _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 390b57cec5SDimitry Andric { 400b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 410b57cec5SDimitry Andric (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), 420b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 460b57cec5SDimitry Andric _mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) 470b57cec5SDimitry Andric { 480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A, 490b57cec5SDimitry Andric (__v16si)__B); 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 530b57cec5SDimitry Andric _mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 540b57cec5SDimitry Andric { 550b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 560b57cec5SDimitry Andric (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), 570b57cec5SDimitry Andric (__v16si)__S); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 610b57cec5SDimitry Andric _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 620b57cec5SDimitry Andric { 630b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 640b57cec5SDimitry Andric (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), 650b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 690b57cec5SDimitry Andric _mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) 700b57cec5SDimitry Andric { 710b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A, 720b57cec5SDimitry Andric (__v16si)__B); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 760b57cec5SDimitry Andric _mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 770b57cec5SDimitry Andric { 780b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 790b57cec5SDimitry Andric (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), 800b57cec5SDimitry Andric (__v16si)__S); 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 840b57cec5SDimitry Andric _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 850b57cec5SDimitry Andric { 860b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 870b57cec5SDimitry Andric (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), 880b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 920b57cec5SDimitry Andric _mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) 930b57cec5SDimitry Andric { 940b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A, 950b57cec5SDimitry Andric (__v16si)__B); 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 990b57cec5SDimitry Andric _mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 1000b57cec5SDimitry Andric { 1010b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 1020b57cec5SDimitry Andric (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), 1030b57cec5SDimitry Andric (__v16si)__S); 1040b57cec5SDimitry Andric } 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 1070b57cec5SDimitry Andric _mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 1080b57cec5SDimitry Andric { 1090b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 1100b57cec5SDimitry Andric (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), 1110b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric #endif 117