10b57cec5SDimitry Andric /*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512BITALGINTRIN_H 150b57cec5SDimitry Andric #define __AVX512BITALGINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512bitalg,evex512"), \ 21*5f757f3fSDimitry Andric __min_vector_width__(512))) 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 240b57cec5SDimitry Andric _mm512_popcnt_epi16(__m512i __A) 250b57cec5SDimitry Andric { 260b57cec5SDimitry Andric return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); 270b57cec5SDimitry Andric } 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 300b57cec5SDimitry Andric _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) 310b57cec5SDimitry Andric { 320b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, 330b57cec5SDimitry Andric (__v32hi) _mm512_popcnt_epi16(__B), 340b57cec5SDimitry Andric (__v32hi) __A); 350b57cec5SDimitry Andric } 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 380b57cec5SDimitry Andric _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) 390b57cec5SDimitry Andric { 400b57cec5SDimitry Andric return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), 410b57cec5SDimitry Andric __U, 420b57cec5SDimitry Andric __B); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 460b57cec5SDimitry Andric _mm512_popcnt_epi8(__m512i __A) 470b57cec5SDimitry Andric { 480b57cec5SDimitry Andric return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 520b57cec5SDimitry Andric _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) 530b57cec5SDimitry Andric { 540b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, 550b57cec5SDimitry Andric (__v64qi) _mm512_popcnt_epi8(__B), 560b57cec5SDimitry Andric (__v64qi) __A); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 600b57cec5SDimitry Andric _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) 610b57cec5SDimitry Andric { 620b57cec5SDimitry Andric return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), 630b57cec5SDimitry Andric __U, 640b57cec5SDimitry Andric __B); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS 680b57cec5SDimitry Andric _mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) 690b57cec5SDimitry Andric { 700b57cec5SDimitry Andric return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A, 710b57cec5SDimitry Andric (__v64qi) __B, 720b57cec5SDimitry Andric __U); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS 760b57cec5SDimitry Andric _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) 770b57cec5SDimitry Andric { 780b57cec5SDimitry Andric return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1, 790b57cec5SDimitry Andric __A, 800b57cec5SDimitry Andric __B); 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric #endif 87