10b57cec5SDimitry Andric /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 100b57cec5SDimitry Andric #error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead." 110b57cec5SDimitry Andric #endif 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef __AVX512VLCDINTRIN_H 140b57cec5SDimitry Andric #define __AVX512VLCDINTRIN_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 17*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 18*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 19*5f757f3fSDimitry Andric __target__("avx512vl,avx512cd,no-evex512"), \ 20*5f757f3fSDimitry Andric __min_vector_width__(128))) 21*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 22*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 23*5f757f3fSDimitry Andric __target__("avx512vl,avx512cd,no-evex512"), \ 24*5f757f3fSDimitry Andric __min_vector_width__(256))) 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 270b57cec5SDimitry Andric _mm_broadcastmb_epi64 (__mmask8 __A) 280b57cec5SDimitry Andric { 290b57cec5SDimitry Andric return (__m128i) _mm_set1_epi64x((long long) __A); 300b57cec5SDimitry Andric } 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 330b57cec5SDimitry Andric _mm256_broadcastmb_epi64 (__mmask8 __A) 340b57cec5SDimitry Andric { 350b57cec5SDimitry Andric return (__m256i) _mm256_set1_epi64x((long long)__A); 360b57cec5SDimitry Andric } 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 390b57cec5SDimitry Andric _mm_broadcastmw_epi32 (__mmask16 __A) 400b57cec5SDimitry Andric { 410b57cec5SDimitry Andric return (__m128i) _mm_set1_epi32((int)__A); 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 450b57cec5SDimitry Andric _mm256_broadcastmw_epi32 (__mmask16 __A) 460b57cec5SDimitry Andric { 470b57cec5SDimitry Andric return (__m256i) _mm256_set1_epi32((int)__A); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 520b57cec5SDimitry Andric _mm_conflict_epi64 (__m128i __A) 530b57cec5SDimitry Andric { 540b57cec5SDimitry Andric return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 580b57cec5SDimitry Andric _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 590b57cec5SDimitry Andric { 600b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 610b57cec5SDimitry Andric (__v2di)_mm_conflict_epi64(__A), 620b57cec5SDimitry Andric (__v2di)__W); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 660b57cec5SDimitry Andric _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) 670b57cec5SDimitry Andric { 680b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 690b57cec5SDimitry Andric (__v2di)_mm_conflict_epi64(__A), 700b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 740b57cec5SDimitry Andric _mm256_conflict_epi64 (__m256i __A) 750b57cec5SDimitry Andric { 760b57cec5SDimitry Andric return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 800b57cec5SDimitry Andric _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 810b57cec5SDimitry Andric { 820b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 830b57cec5SDimitry Andric (__v4di)_mm256_conflict_epi64(__A), 840b57cec5SDimitry Andric (__v4di)__W); 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 880b57cec5SDimitry Andric _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) 890b57cec5SDimitry Andric { 900b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 910b57cec5SDimitry Andric (__v4di)_mm256_conflict_epi64(__A), 920b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 960b57cec5SDimitry Andric _mm_conflict_epi32 (__m128i __A) 970b57cec5SDimitry Andric { 980b57cec5SDimitry Andric return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1020b57cec5SDimitry Andric _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 1030b57cec5SDimitry Andric { 1040b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1050b57cec5SDimitry Andric (__v4si)_mm_conflict_epi32(__A), 1060b57cec5SDimitry Andric (__v4si)__W); 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1100b57cec5SDimitry Andric _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) 1110b57cec5SDimitry Andric { 1120b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1130b57cec5SDimitry Andric (__v4si)_mm_conflict_epi32(__A), 1140b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1180b57cec5SDimitry Andric _mm256_conflict_epi32 (__m256i __A) 1190b57cec5SDimitry Andric { 1200b57cec5SDimitry Andric return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); 1210b57cec5SDimitry Andric } 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1240b57cec5SDimitry Andric _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 1250b57cec5SDimitry Andric { 1260b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 1270b57cec5SDimitry Andric (__v8si)_mm256_conflict_epi32(__A), 1280b57cec5SDimitry Andric (__v8si)__W); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1320b57cec5SDimitry Andric _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) 1330b57cec5SDimitry Andric { 1340b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 1350b57cec5SDimitry Andric (__v8si)_mm256_conflict_epi32(__A), 1360b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1400b57cec5SDimitry Andric _mm_lzcnt_epi32 (__m128i __A) 1410b57cec5SDimitry Andric { 1420b57cec5SDimitry Andric return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1460b57cec5SDimitry Andric _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 1470b57cec5SDimitry Andric { 1480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1490b57cec5SDimitry Andric (__v4si)_mm_lzcnt_epi32(__A), 1500b57cec5SDimitry Andric (__v4si)__W); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1540b57cec5SDimitry Andric _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) 1550b57cec5SDimitry Andric { 1560b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1570b57cec5SDimitry Andric (__v4si)_mm_lzcnt_epi32(__A), 1580b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1620b57cec5SDimitry Andric _mm256_lzcnt_epi32 (__m256i __A) 1630b57cec5SDimitry Andric { 1640b57cec5SDimitry Andric return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1680b57cec5SDimitry Andric _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 1690b57cec5SDimitry Andric { 1700b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 1710b57cec5SDimitry Andric (__v8si)_mm256_lzcnt_epi32(__A), 1720b57cec5SDimitry Andric (__v8si)__W); 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1760b57cec5SDimitry Andric _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) 1770b57cec5SDimitry Andric { 1780b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 1790b57cec5SDimitry Andric (__v8si)_mm256_lzcnt_epi32(__A), 1800b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1840b57cec5SDimitry Andric _mm_lzcnt_epi64 (__m128i __A) 1850b57cec5SDimitry Andric { 1860b57cec5SDimitry Andric return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1900b57cec5SDimitry Andric _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 1910b57cec5SDimitry Andric { 1920b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1930b57cec5SDimitry Andric (__v2di)_mm_lzcnt_epi64(__A), 1940b57cec5SDimitry Andric (__v2di)__W); 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1980b57cec5SDimitry Andric _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) 1990b57cec5SDimitry Andric { 2000b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 2010b57cec5SDimitry Andric (__v2di)_mm_lzcnt_epi64(__A), 2020b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2060b57cec5SDimitry Andric _mm256_lzcnt_epi64 (__m256i __A) 2070b57cec5SDimitry Andric { 2080b57cec5SDimitry Andric return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2120b57cec5SDimitry Andric _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 2130b57cec5SDimitry Andric { 2140b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 2150b57cec5SDimitry Andric (__v4di)_mm256_lzcnt_epi64(__A), 2160b57cec5SDimitry Andric (__v4di)__W); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2200b57cec5SDimitry Andric _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) 2210b57cec5SDimitry Andric { 2220b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 2230b57cec5SDimitry Andric (__v4di)_mm256_lzcnt_epi64(__A), 2240b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 2280b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric #endif /* __AVX512VLCDINTRIN_H */ 231