10b57cec5SDimitry Andric /*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512VLBITALGINTRIN_H 150b57cec5SDimitry Andric #define __AVX512VLBITALGINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512vl,avx512bitalg,no-evex512"), \ 21*5f757f3fSDimitry Andric __min_vector_width__(128))) 22*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 23*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 24*5f757f3fSDimitry Andric __target__("avx512vl,avx512bitalg,no-evex512"), \ 25*5f757f3fSDimitry Andric __min_vector_width__(256))) 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 280b57cec5SDimitry Andric _mm256_popcnt_epi16(__m256i __A) 290b57cec5SDimitry Andric { 300b57cec5SDimitry Andric return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); 310b57cec5SDimitry Andric } 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 340b57cec5SDimitry Andric _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) 350b57cec5SDimitry Andric { 360b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, 370b57cec5SDimitry Andric (__v16hi) _mm256_popcnt_epi16(__B), 380b57cec5SDimitry Andric (__v16hi) __A); 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 420b57cec5SDimitry Andric _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) 430b57cec5SDimitry Andric { 440b57cec5SDimitry Andric return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), 450b57cec5SDimitry Andric __U, 460b57cec5SDimitry Andric __B); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 500b57cec5SDimitry Andric _mm_popcnt_epi16(__m128i __A) 510b57cec5SDimitry Andric { 520b57cec5SDimitry Andric return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 560b57cec5SDimitry Andric _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) 570b57cec5SDimitry Andric { 580b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, 590b57cec5SDimitry Andric (__v8hi) _mm_popcnt_epi16(__B), 600b57cec5SDimitry Andric (__v8hi) __A); 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 640b57cec5SDimitry Andric _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) 650b57cec5SDimitry Andric { 660b57cec5SDimitry Andric return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), 670b57cec5SDimitry Andric __U, 680b57cec5SDimitry Andric __B); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 720b57cec5SDimitry Andric _mm256_popcnt_epi8(__m256i __A) 730b57cec5SDimitry Andric { 740b57cec5SDimitry Andric return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 780b57cec5SDimitry Andric _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) 790b57cec5SDimitry Andric { 800b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, 810b57cec5SDimitry Andric (__v32qi) _mm256_popcnt_epi8(__B), 820b57cec5SDimitry Andric (__v32qi) __A); 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 860b57cec5SDimitry Andric _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) 870b57cec5SDimitry Andric { 880b57cec5SDimitry Andric return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), 890b57cec5SDimitry Andric __U, 900b57cec5SDimitry Andric __B); 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 940b57cec5SDimitry Andric _mm_popcnt_epi8(__m128i __A) 950b57cec5SDimitry Andric { 960b57cec5SDimitry Andric return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); 970b57cec5SDimitry Andric } 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1000b57cec5SDimitry Andric _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) 1010b57cec5SDimitry Andric { 1020b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, 1030b57cec5SDimitry Andric (__v16qi) _mm_popcnt_epi8(__B), 1040b57cec5SDimitry Andric (__v16qi) __A); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1080b57cec5SDimitry Andric _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) 1090b57cec5SDimitry Andric { 1100b57cec5SDimitry Andric return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), 1110b57cec5SDimitry Andric __U, 1120b57cec5SDimitry Andric __B); 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 1160b57cec5SDimitry Andric _mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) 1170b57cec5SDimitry Andric { 1180b57cec5SDimitry Andric return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, 1190b57cec5SDimitry Andric (__v32qi) __B, 1200b57cec5SDimitry Andric __U); 1210b57cec5SDimitry Andric } 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 1240b57cec5SDimitry Andric _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) 1250b57cec5SDimitry Andric { 1260b57cec5SDimitry Andric return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, 1270b57cec5SDimitry Andric __A, 1280b57cec5SDimitry Andric __B); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 1320b57cec5SDimitry Andric _mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) 1330b57cec5SDimitry Andric { 1340b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, 1350b57cec5SDimitry Andric (__v16qi) __B, 1360b57cec5SDimitry Andric __U); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 1400b57cec5SDimitry Andric _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) 1410b57cec5SDimitry Andric { 1420b57cec5SDimitry Andric return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, 1430b57cec5SDimitry Andric __A, 1440b57cec5SDimitry Andric __B); 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 1490b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric #endif 152