10b57cec5SDimitry Andric /*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error \ 120b57cec5SDimitry Andric "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead." 130b57cec5SDimitry Andric #endif 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #ifndef __AVX512VPOPCNTDQVLINTRIN_H 160b57cec5SDimitry Andric #define __AVX512VPOPCNTDQVLINTRIN_H 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 190b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 20*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 21*5f757f3fSDimitry Andric __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ 22*5f757f3fSDimitry Andric __min_vector_width__(128))) 230b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 24*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 25*5f757f3fSDimitry Andric __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ 26*5f757f3fSDimitry Andric __min_vector_width__(256))) 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 290b57cec5SDimitry Andric _mm_popcnt_epi64(__m128i __A) { 300b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); 310b57cec5SDimitry Andric } 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 340b57cec5SDimitry Andric _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { 350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128( 360b57cec5SDimitry Andric (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); 370b57cec5SDimitry Andric } 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 400b57cec5SDimitry Andric _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { 410b57cec5SDimitry Andric return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 450b57cec5SDimitry Andric _mm_popcnt_epi32(__m128i __A) { 460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 500b57cec5SDimitry Andric _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128( 520b57cec5SDimitry Andric (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 560b57cec5SDimitry Andric _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { 570b57cec5SDimitry Andric return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 610b57cec5SDimitry Andric _mm256_popcnt_epi64(__m256i __A) { 620b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 660b57cec5SDimitry Andric _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { 670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256( 680b57cec5SDimitry Andric (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 720b57cec5SDimitry Andric _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { 730b57cec5SDimitry Andric return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 770b57cec5SDimitry Andric _mm256_popcnt_epi32(__m256i __A) { 780b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 820b57cec5SDimitry Andric _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256( 840b57cec5SDimitry Andric (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 880b57cec5SDimitry Andric _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { 890b57cec5SDimitry Andric return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 930b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric #endif 96