10b57cec5SDimitry Andric /*===------------- avx512vbmiintrin.h - VBMI intrinsics ------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __VBMIINTRIN_H 150b57cec5SDimitry Andric #define __VBMIINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512vbmi,evex512"), __min_vector_width__(512))) 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 230b57cec5SDimitry Andric _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) 240b57cec5SDimitry Andric { 250b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I, 260b57cec5SDimitry Andric (__v64qi) __B); 270b57cec5SDimitry Andric } 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 300b57cec5SDimitry Andric _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I, 310b57cec5SDimitry Andric __m512i __B) 320b57cec5SDimitry Andric { 330b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512(__U, 340b57cec5SDimitry Andric (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), 350b57cec5SDimitry Andric (__v64qi)__A); 360b57cec5SDimitry Andric } 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 390b57cec5SDimitry Andric _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, 400b57cec5SDimitry Andric __m512i __B) 410b57cec5SDimitry Andric { 420b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512(__U, 430b57cec5SDimitry Andric (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), 440b57cec5SDimitry Andric (__v64qi)__I); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 480b57cec5SDimitry Andric _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, 490b57cec5SDimitry Andric __m512i __B) 500b57cec5SDimitry Andric { 510b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512(__U, 520b57cec5SDimitry Andric (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), 530b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 570b57cec5SDimitry Andric _mm512_permutexvar_epi8 (__m512i __A, __m512i __B) 580b57cec5SDimitry Andric { 590b57cec5SDimitry Andric return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A); 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 630b57cec5SDimitry Andric _mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A, 640b57cec5SDimitry Andric __m512i __B) 650b57cec5SDimitry Andric { 660b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 670b57cec5SDimitry Andric (__v64qi)_mm512_permutexvar_epi8(__A, __B), 680b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 720b57cec5SDimitry Andric _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A, 730b57cec5SDimitry Andric __m512i __B) 740b57cec5SDimitry Andric { 750b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 760b57cec5SDimitry Andric (__v64qi)_mm512_permutexvar_epi8(__A, __B), 770b57cec5SDimitry Andric (__v64qi)__W); 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 810b57cec5SDimitry Andric _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) 820b57cec5SDimitry Andric { 830b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 870b57cec5SDimitry Andric _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, 880b57cec5SDimitry Andric __m512i __Y) 890b57cec5SDimitry Andric { 900b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 910b57cec5SDimitry Andric (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), 920b57cec5SDimitry Andric (__v64qi)__W); 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS 960b57cec5SDimitry Andric _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) 970b57cec5SDimitry Andric { 980b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 990b57cec5SDimitry Andric (__v64qi)_mm512_multishift_epi64_epi8(__X, __Y), 1000b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric #endif 107