1349cc55cSDimitry Andric /*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------=== 2349cc55cSDimitry Andric * 3349cc55cSDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric * 7349cc55cSDimitry Andric *===-----------------------------------------------------------------------=== 8349cc55cSDimitry Andric */ 9349cc55cSDimitry Andric #ifndef __IMMINTRIN_H 10349cc55cSDimitry Andric #error \ 11349cc55cSDimitry Andric "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead." 12349cc55cSDimitry Andric #endif 13349cc55cSDimitry Andric 14bdd1243dSDimitry Andric #ifdef __SSE2__ 15bdd1243dSDimitry Andric 16349cc55cSDimitry Andric #ifndef __AVX512VLFP16INTRIN_H 17349cc55cSDimitry Andric #define __AVX512VLFP16INTRIN_H 18349cc55cSDimitry Andric 19349cc55cSDimitry Andric /* Define the default attributes for the functions in this file. */ 20349cc55cSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 21349cc55cSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 22*5f757f3fSDimitry Andric __target__("avx512fp16,avx512vl,no-evex512"), \ 23349cc55cSDimitry Andric __min_vector_width__(256))) 24349cc55cSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 25349cc55cSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 26*5f757f3fSDimitry Andric __target__("avx512fp16,avx512vl,no-evex512"), \ 27349cc55cSDimitry Andric __min_vector_width__(128))) 28349cc55cSDimitry Andric 29349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) { 30349cc55cSDimitry Andric return __a[0]; 31349cc55cSDimitry Andric } 32349cc55cSDimitry Andric 33349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) { 34349cc55cSDimitry Andric return __a[0]; 35349cc55cSDimitry Andric } 36349cc55cSDimitry Andric 37349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) { 38349cc55cSDimitry Andric return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0}; 39349cc55cSDimitry Andric } 40349cc55cSDimitry Andric 41349cc55cSDimitry Andric static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) { 42349cc55cSDimitry Andric return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h}; 43349cc55cSDimitry Andric } 44349cc55cSDimitry Andric 45349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) { 46349cc55cSDimitry Andric return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h, 47349cc55cSDimitry Andric __h, __h, __h, __h, __h, __h, __h, __h}; 48349cc55cSDimitry Andric } 49349cc55cSDimitry Andric 50349cc55cSDimitry Andric static __inline __m128h __DEFAULT_FN_ATTRS128 51349cc55cSDimitry Andric _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, 52349cc55cSDimitry Andric _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { 53349cc55cSDimitry Andric return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; 54349cc55cSDimitry Andric } 55349cc55cSDimitry Andric 56349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256 57349cc55cSDimitry Andric _mm256_set1_pch(_Float16 _Complex h) { 58349cc55cSDimitry Andric return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h)); 59349cc55cSDimitry Andric } 60349cc55cSDimitry Andric 61349cc55cSDimitry Andric static __inline __m128h __DEFAULT_FN_ATTRS128 62349cc55cSDimitry Andric _mm_set1_pch(_Float16 _Complex h) { 63349cc55cSDimitry Andric return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h)); 64349cc55cSDimitry Andric } 65349cc55cSDimitry Andric 66349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256 67349cc55cSDimitry Andric _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, 68349cc55cSDimitry Andric _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, 69349cc55cSDimitry Andric _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, 70349cc55cSDimitry Andric _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) { 71349cc55cSDimitry Andric return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11, 72349cc55cSDimitry Andric __h10, __h9, __h8, __h7, __h6, __h5, 73349cc55cSDimitry Andric __h4, __h3, __h2, __h1}; 74349cc55cSDimitry Andric } 75349cc55cSDimitry Andric 76349cc55cSDimitry Andric #define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8) \ 77349cc55cSDimitry Andric _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1)) 78349cc55cSDimitry Andric 79349cc55cSDimitry Andric #define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \ 80349cc55cSDimitry Andric h14, h15, h16) \ 81349cc55cSDimitry Andric _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \ 82349cc55cSDimitry Andric (h7), (h6), (h5), (h4), (h3), (h2), (h1)) 83349cc55cSDimitry Andric 84349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A, 85349cc55cSDimitry Andric __m256h __B) { 86349cc55cSDimitry Andric return (__m256h)((__v16hf)__A + (__v16hf)__B); 87349cc55cSDimitry Andric } 88349cc55cSDimitry Andric 89349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 90349cc55cSDimitry Andric _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 91349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 92349cc55cSDimitry Andric __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W); 93349cc55cSDimitry Andric } 94349cc55cSDimitry Andric 95349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 96349cc55cSDimitry Andric _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) { 97349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 98349cc55cSDimitry Andric __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); 99349cc55cSDimitry Andric } 100349cc55cSDimitry Andric 101349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A, 102349cc55cSDimitry Andric __m128h __B) { 103349cc55cSDimitry Andric return (__m128h)((__v8hf)__A + (__v8hf)__B); 104349cc55cSDimitry Andric } 105349cc55cSDimitry Andric 106349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W, 107349cc55cSDimitry Andric __mmask8 __U, 108349cc55cSDimitry Andric __m128h __A, 109349cc55cSDimitry Andric __m128h __B) { 110349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), 111349cc55cSDimitry Andric (__v8hf)__W); 112349cc55cSDimitry Andric } 113349cc55cSDimitry Andric 114349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U, 115349cc55cSDimitry Andric __m128h __A, 116349cc55cSDimitry Andric __m128h __B) { 117349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), 118349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 119349cc55cSDimitry Andric } 120349cc55cSDimitry Andric 121349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A, 122349cc55cSDimitry Andric __m256h __B) { 123349cc55cSDimitry Andric return (__m256h)((__v16hf)__A - (__v16hf)__B); 124349cc55cSDimitry Andric } 125349cc55cSDimitry Andric 126349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 127349cc55cSDimitry Andric _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 128349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 129349cc55cSDimitry Andric __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W); 130349cc55cSDimitry Andric } 131349cc55cSDimitry Andric 132349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 133349cc55cSDimitry Andric _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) { 134349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 135349cc55cSDimitry Andric __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); 136349cc55cSDimitry Andric } 137349cc55cSDimitry Andric 138349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A, 139349cc55cSDimitry Andric __m128h __B) { 140349cc55cSDimitry Andric return (__m128h)((__v8hf)__A - (__v8hf)__B); 141349cc55cSDimitry Andric } 142349cc55cSDimitry Andric 143349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W, 144349cc55cSDimitry Andric __mmask8 __U, 145349cc55cSDimitry Andric __m128h __A, 146349cc55cSDimitry Andric __m128h __B) { 147349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), 148349cc55cSDimitry Andric (__v8hf)__W); 149349cc55cSDimitry Andric } 150349cc55cSDimitry Andric 151349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U, 152349cc55cSDimitry Andric __m128h __A, 153349cc55cSDimitry Andric __m128h __B) { 154349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), 155349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 156349cc55cSDimitry Andric } 157349cc55cSDimitry Andric 158349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A, 159349cc55cSDimitry Andric __m256h __B) { 160349cc55cSDimitry Andric return (__m256h)((__v16hf)__A * (__v16hf)__B); 161349cc55cSDimitry Andric } 162349cc55cSDimitry Andric 163349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 164349cc55cSDimitry Andric _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 165349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 166349cc55cSDimitry Andric __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W); 167349cc55cSDimitry Andric } 168349cc55cSDimitry Andric 169349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 170349cc55cSDimitry Andric _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) { 171349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 172349cc55cSDimitry Andric __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); 173349cc55cSDimitry Andric } 174349cc55cSDimitry Andric 175349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A, 176349cc55cSDimitry Andric __m128h __B) { 177349cc55cSDimitry Andric return (__m128h)((__v8hf)__A * (__v8hf)__B); 178349cc55cSDimitry Andric } 179349cc55cSDimitry Andric 180349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W, 181349cc55cSDimitry Andric __mmask8 __U, 182349cc55cSDimitry Andric __m128h __A, 183349cc55cSDimitry Andric __m128h __B) { 184349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), 185349cc55cSDimitry Andric (__v8hf)__W); 186349cc55cSDimitry Andric } 187349cc55cSDimitry Andric 188349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U, 189349cc55cSDimitry Andric __m128h __A, 190349cc55cSDimitry Andric __m128h __B) { 191349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), 192349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 193349cc55cSDimitry Andric } 194349cc55cSDimitry Andric 195349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A, 196349cc55cSDimitry Andric __m256h __B) { 197349cc55cSDimitry Andric return (__m256h)((__v16hf)__A / (__v16hf)__B); 198349cc55cSDimitry Andric } 199349cc55cSDimitry Andric 200349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 201349cc55cSDimitry Andric _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 202349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 203349cc55cSDimitry Andric __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W); 204349cc55cSDimitry Andric } 205349cc55cSDimitry Andric 206349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 207349cc55cSDimitry Andric _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) { 208349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 209349cc55cSDimitry Andric __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); 210349cc55cSDimitry Andric } 211349cc55cSDimitry Andric 212349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A, 213349cc55cSDimitry Andric __m128h __B) { 214349cc55cSDimitry Andric return (__m128h)((__v8hf)__A / (__v8hf)__B); 215349cc55cSDimitry Andric } 216349cc55cSDimitry Andric 217349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W, 218349cc55cSDimitry Andric __mmask8 __U, 219349cc55cSDimitry Andric __m128h __A, 220349cc55cSDimitry Andric __m128h __B) { 221349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), 222349cc55cSDimitry Andric (__v8hf)__W); 223349cc55cSDimitry Andric } 224349cc55cSDimitry Andric 225349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, 226349cc55cSDimitry Andric __m128h __A, 227349cc55cSDimitry Andric __m128h __B) { 228349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), 229349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 230349cc55cSDimitry Andric } 231349cc55cSDimitry Andric 232349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, 233349cc55cSDimitry Andric __m256h __B) { 234349cc55cSDimitry Andric return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B); 235349cc55cSDimitry Andric } 236349cc55cSDimitry Andric 237349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 238349cc55cSDimitry Andric _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 239349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 240349cc55cSDimitry Andric (__mmask16)__U, 241349cc55cSDimitry Andric (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), 242349cc55cSDimitry Andric (__v16hf)__W); 243349cc55cSDimitry Andric } 244349cc55cSDimitry Andric 245349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 246349cc55cSDimitry Andric _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { 247349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 248349cc55cSDimitry Andric (__mmask16)__U, 249349cc55cSDimitry Andric (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), 250349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 251349cc55cSDimitry Andric } 252349cc55cSDimitry Andric 253349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, 254349cc55cSDimitry Andric __m128h __B) { 255349cc55cSDimitry Andric return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B); 256349cc55cSDimitry Andric } 257349cc55cSDimitry Andric 258349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, 259349cc55cSDimitry Andric __mmask8 __U, 260349cc55cSDimitry Andric __m128h __A, 261349cc55cSDimitry Andric __m128h __B) { 262349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 263349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), 264349cc55cSDimitry Andric (__v8hf)__W); 265349cc55cSDimitry Andric } 266349cc55cSDimitry Andric 267349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, 268349cc55cSDimitry Andric __m128h __A, 269349cc55cSDimitry Andric __m128h __B) { 270349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 271349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), 272349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 273349cc55cSDimitry Andric } 274349cc55cSDimitry Andric 275349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, 276349cc55cSDimitry Andric __m256h __B) { 277349cc55cSDimitry Andric return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B); 278349cc55cSDimitry Andric } 279349cc55cSDimitry Andric 280349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 281349cc55cSDimitry Andric _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 282349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 283349cc55cSDimitry Andric (__mmask16)__U, 284349cc55cSDimitry Andric (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), 285349cc55cSDimitry Andric (__v16hf)__W); 286349cc55cSDimitry Andric } 287349cc55cSDimitry Andric 288349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 289349cc55cSDimitry Andric _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { 290349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 291349cc55cSDimitry Andric (__mmask16)__U, 292349cc55cSDimitry Andric (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), 293349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 294349cc55cSDimitry Andric } 295349cc55cSDimitry Andric 296349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, 297349cc55cSDimitry Andric __m128h __B) { 298349cc55cSDimitry Andric return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B); 299349cc55cSDimitry Andric } 300349cc55cSDimitry Andric 301349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, 302349cc55cSDimitry Andric __mmask8 __U, 303349cc55cSDimitry Andric __m128h __A, 304349cc55cSDimitry Andric __m128h __B) { 305349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 306349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), 307349cc55cSDimitry Andric (__v8hf)__W); 308349cc55cSDimitry Andric } 309349cc55cSDimitry Andric 310349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, 311349cc55cSDimitry Andric __m128h __A, 312349cc55cSDimitry Andric __m128h __B) { 313349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 314349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), 315349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 316349cc55cSDimitry Andric } 317349cc55cSDimitry Andric 318349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) { 319349cc55cSDimitry Andric return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A); 320349cc55cSDimitry Andric } 321349cc55cSDimitry Andric 322349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) { 323349cc55cSDimitry Andric return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); 324349cc55cSDimitry Andric } 325349cc55cSDimitry Andric 326349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) { 327349cc55cSDimitry Andric return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f)); 328349cc55cSDimitry Andric } 329349cc55cSDimitry Andric 330349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 331349cc55cSDimitry Andric _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) { 332349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectps_256( 333349cc55cSDimitry Andric (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W); 334349cc55cSDimitry Andric } 335349cc55cSDimitry Andric 336349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 337349cc55cSDimitry Andric _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) { 338349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectps_256( 339349cc55cSDimitry Andric (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps()); 340349cc55cSDimitry Andric } 341349cc55cSDimitry Andric 342349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) { 343349cc55cSDimitry Andric return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f)); 344349cc55cSDimitry Andric } 345349cc55cSDimitry Andric 346349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W, 347349cc55cSDimitry Andric __mmask8 __U, 348349cc55cSDimitry Andric __m128h __A) { 349349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectps_128( 350349cc55cSDimitry Andric (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W); 351349cc55cSDimitry Andric } 352349cc55cSDimitry Andric 353349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 354349cc55cSDimitry Andric _mm_maskz_conj_pch(__mmask8 __U, __m128h __A) { 355349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectps_128( 356349cc55cSDimitry Andric (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps()); 357349cc55cSDimitry Andric } 358349cc55cSDimitry Andric 359349cc55cSDimitry Andric #define _mm256_cmp_ph_mask(a, b, p) \ 360349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpph256_mask( \ 361349cc55cSDimitry Andric (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1)) 362349cc55cSDimitry Andric 363349cc55cSDimitry Andric #define _mm256_mask_cmp_ph_mask(m, a, b, p) \ 364349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpph256_mask( \ 365349cc55cSDimitry Andric (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m))) 366349cc55cSDimitry Andric 367349cc55cSDimitry Andric #define _mm_cmp_ph_mask(a, b, p) \ 368349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpph128_mask( \ 369349cc55cSDimitry Andric (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1)) 370349cc55cSDimitry Andric 371349cc55cSDimitry Andric #define _mm_mask_cmp_ph_mask(m, a, b, p) \ 372349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpph128_mask( \ 373349cc55cSDimitry Andric (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m))) 374349cc55cSDimitry Andric 375349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) { 376349cc55cSDimitry Andric return (__m256h)__builtin_ia32_rcpph256_mask( 377349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); 378349cc55cSDimitry Andric } 379349cc55cSDimitry Andric 380349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 381349cc55cSDimitry Andric _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) { 382349cc55cSDimitry Andric return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W, 383349cc55cSDimitry Andric (__mmask16)__U); 384349cc55cSDimitry Andric } 385349cc55cSDimitry Andric 386349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 387349cc55cSDimitry Andric _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) { 388349cc55cSDimitry Andric return (__m256h)__builtin_ia32_rcpph256_mask( 389349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); 390349cc55cSDimitry Andric } 391349cc55cSDimitry Andric 392349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) { 393349cc55cSDimitry Andric return (__m128h)__builtin_ia32_rcpph128_mask( 394349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 395349cc55cSDimitry Andric } 396349cc55cSDimitry Andric 397349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W, 398349cc55cSDimitry Andric __mmask8 __U, 399349cc55cSDimitry Andric __m128h __A) { 400349cc55cSDimitry Andric return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W, 401349cc55cSDimitry Andric (__mmask8)__U); 402349cc55cSDimitry Andric } 403349cc55cSDimitry Andric 404349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U, 405349cc55cSDimitry Andric __m128h __A) { 406349cc55cSDimitry Andric return (__m128h)__builtin_ia32_rcpph128_mask( 407349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 408349cc55cSDimitry Andric } 409349cc55cSDimitry Andric 410349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) { 411349cc55cSDimitry Andric return (__m256h)__builtin_ia32_rsqrtph256_mask( 412349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); 413349cc55cSDimitry Andric } 414349cc55cSDimitry Andric 415349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 416349cc55cSDimitry Andric _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { 417349cc55cSDimitry Andric return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W, 418349cc55cSDimitry Andric (__mmask16)__U); 419349cc55cSDimitry Andric } 420349cc55cSDimitry Andric 421349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 422349cc55cSDimitry Andric _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) { 423349cc55cSDimitry Andric return (__m256h)__builtin_ia32_rsqrtph256_mask( 424349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); 425349cc55cSDimitry Andric } 426349cc55cSDimitry Andric 427349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) { 428349cc55cSDimitry Andric return (__m128h)__builtin_ia32_rsqrtph128_mask( 429349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 430349cc55cSDimitry Andric } 431349cc55cSDimitry Andric 432349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W, 433349cc55cSDimitry Andric __mmask8 __U, 434349cc55cSDimitry Andric __m128h __A) { 435349cc55cSDimitry Andric return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W, 436349cc55cSDimitry Andric (__mmask8)__U); 437349cc55cSDimitry Andric } 438349cc55cSDimitry Andric 439349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 440349cc55cSDimitry Andric _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) { 441349cc55cSDimitry Andric return (__m128h)__builtin_ia32_rsqrtph128_mask( 442349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 443349cc55cSDimitry Andric } 444349cc55cSDimitry Andric 445349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) { 446349cc55cSDimitry Andric return (__m128h)__builtin_ia32_getexpph128_mask( 447349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); 448349cc55cSDimitry Andric } 449349cc55cSDimitry Andric 450349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 451349cc55cSDimitry Andric _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) { 452349cc55cSDimitry Andric return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W, 453349cc55cSDimitry Andric (__mmask8)__U); 454349cc55cSDimitry Andric } 455349cc55cSDimitry Andric 456349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 457349cc55cSDimitry Andric _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) { 458349cc55cSDimitry Andric return (__m128h)__builtin_ia32_getexpph128_mask( 459349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 460349cc55cSDimitry Andric } 461349cc55cSDimitry Andric 462349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) { 463349cc55cSDimitry Andric return (__m256h)__builtin_ia32_getexpph256_mask( 464349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); 465349cc55cSDimitry Andric } 466349cc55cSDimitry Andric 467349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 468349cc55cSDimitry Andric _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) { 469349cc55cSDimitry Andric return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W, 470349cc55cSDimitry Andric (__mmask16)__U); 471349cc55cSDimitry Andric } 472349cc55cSDimitry Andric 473349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 474349cc55cSDimitry Andric _mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) { 475349cc55cSDimitry Andric return (__m256h)__builtin_ia32_getexpph256_mask( 476349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); 477349cc55cSDimitry Andric } 478349cc55cSDimitry Andric 479349cc55cSDimitry Andric #define _mm_getmant_ph(A, B, C) \ 480349cc55cSDimitry Andric ((__m128h)__builtin_ia32_getmantph128_mask( \ 481349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ 482349cc55cSDimitry Andric (__mmask8)-1)) 483349cc55cSDimitry Andric 484349cc55cSDimitry Andric #define _mm_mask_getmant_ph(W, U, A, B, C) \ 485349cc55cSDimitry Andric ((__m128h)__builtin_ia32_getmantph128_mask( \ 486349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \ 487349cc55cSDimitry Andric (__mmask8)(U))) 488349cc55cSDimitry Andric 489349cc55cSDimitry Andric #define _mm_maskz_getmant_ph(U, A, B, C) \ 490349cc55cSDimitry Andric ((__m128h)__builtin_ia32_getmantph128_mask( \ 491349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ 492349cc55cSDimitry Andric (__mmask8)(U))) 493349cc55cSDimitry Andric 494349cc55cSDimitry Andric #define _mm256_getmant_ph(A, B, C) \ 495349cc55cSDimitry Andric ((__m256h)__builtin_ia32_getmantph256_mask( \ 496349cc55cSDimitry Andric (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ 497349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph(), (__mmask16)-1)) 498349cc55cSDimitry Andric 499349cc55cSDimitry Andric #define _mm256_mask_getmant_ph(W, U, A, B, C) \ 500349cc55cSDimitry Andric ((__m256h)__builtin_ia32_getmantph256_mask( \ 501349cc55cSDimitry Andric (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ 502349cc55cSDimitry Andric (__mmask16)(U))) 503349cc55cSDimitry Andric 504349cc55cSDimitry Andric #define _mm256_maskz_getmant_ph(U, A, B, C) \ 505349cc55cSDimitry Andric ((__m256h)__builtin_ia32_getmantph256_mask( \ 506349cc55cSDimitry Andric (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ 507349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph(), (__mmask16)(U))) 508349cc55cSDimitry Andric 509349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A, 510349cc55cSDimitry Andric __m128h __B) { 511349cc55cSDimitry Andric return (__m128h)__builtin_ia32_scalefph128_mask( 512349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); 513349cc55cSDimitry Andric } 514349cc55cSDimitry Andric 515349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 516349cc55cSDimitry Andric _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { 517349cc55cSDimitry Andric return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B, 518349cc55cSDimitry Andric (__v8hf)__W, (__mmask8)__U); 519349cc55cSDimitry Andric } 520349cc55cSDimitry Andric 521349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 522349cc55cSDimitry Andric _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) { 523349cc55cSDimitry Andric return (__m128h)__builtin_ia32_scalefph128_mask( 524349cc55cSDimitry Andric (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 525349cc55cSDimitry Andric } 526349cc55cSDimitry Andric 527349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A, 528349cc55cSDimitry Andric __m256h __B) { 529349cc55cSDimitry Andric return (__m256h)__builtin_ia32_scalefph256_mask( 530349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); 531349cc55cSDimitry Andric } 532349cc55cSDimitry Andric 533349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 534349cc55cSDimitry Andric _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { 535349cc55cSDimitry Andric return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B, 536349cc55cSDimitry Andric (__v16hf)__W, (__mmask16)__U); 537349cc55cSDimitry Andric } 538349cc55cSDimitry Andric 539349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 540349cc55cSDimitry Andric _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) { 541349cc55cSDimitry Andric return (__m256h)__builtin_ia32_scalefph256_mask( 542349cc55cSDimitry Andric (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); 543349cc55cSDimitry Andric } 544349cc55cSDimitry Andric 545349cc55cSDimitry Andric #define _mm_roundscale_ph(A, imm) \ 546349cc55cSDimitry Andric ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ 547349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ 548349cc55cSDimitry Andric (__mmask8)-1)) 549349cc55cSDimitry Andric 550349cc55cSDimitry Andric #define _mm_mask_roundscale_ph(W, U, A, imm) \ 551349cc55cSDimitry Andric ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ 552349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) 553349cc55cSDimitry Andric 554349cc55cSDimitry Andric #define _mm_maskz_roundscale_ph(U, A, imm) \ 555349cc55cSDimitry Andric ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ 556349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ 557349cc55cSDimitry Andric (__mmask8)(U))) 558349cc55cSDimitry Andric 559349cc55cSDimitry Andric #define _mm256_roundscale_ph(A, imm) \ 560349cc55cSDimitry Andric ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ 561349cc55cSDimitry Andric (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ 562349cc55cSDimitry Andric (__mmask16)-1)) 563349cc55cSDimitry Andric 564349cc55cSDimitry Andric #define _mm256_mask_roundscale_ph(W, U, A, imm) \ 565349cc55cSDimitry Andric ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ 566349cc55cSDimitry Andric (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ 567349cc55cSDimitry Andric (__mmask16)(U))) 568349cc55cSDimitry Andric 569349cc55cSDimitry Andric #define _mm256_maskz_roundscale_ph(U, A, imm) \ 570349cc55cSDimitry Andric ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ 571349cc55cSDimitry Andric (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ 572349cc55cSDimitry Andric (__mmask16)(U))) 573349cc55cSDimitry Andric 574349cc55cSDimitry Andric #define _mm_reduce_ph(A, imm) \ 575349cc55cSDimitry Andric ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ 576349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph(), \ 577349cc55cSDimitry Andric (__mmask8)-1)) 578349cc55cSDimitry Andric 579349cc55cSDimitry Andric #define _mm_mask_reduce_ph(W, U, A, imm) \ 580349cc55cSDimitry Andric ((__m128h)__builtin_ia32_reduceph128_mask( \ 581349cc55cSDimitry Andric (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) 582349cc55cSDimitry Andric 583349cc55cSDimitry Andric #define _mm_maskz_reduce_ph(U, A, imm) \ 584349cc55cSDimitry Andric ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ 585349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph(), \ 586349cc55cSDimitry Andric (__mmask8)(U))) 587349cc55cSDimitry Andric 588349cc55cSDimitry Andric #define _mm256_reduce_ph(A, imm) \ 589349cc55cSDimitry Andric ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ 590349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph(), \ 591349cc55cSDimitry Andric (__mmask16)-1)) 592349cc55cSDimitry Andric 593349cc55cSDimitry Andric #define _mm256_mask_reduce_ph(W, U, A, imm) \ 594349cc55cSDimitry Andric ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ 595349cc55cSDimitry Andric (__v16hf)(__m256h)(W), \ 596349cc55cSDimitry Andric (__mmask16)(U))) 597349cc55cSDimitry Andric 598349cc55cSDimitry Andric #define _mm256_maskz_reduce_ph(U, A, imm) \ 599349cc55cSDimitry Andric ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ 600349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph(), \ 601349cc55cSDimitry Andric (__mmask16)(U))) 602349cc55cSDimitry Andric 603349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) { 604349cc55cSDimitry Andric return __builtin_ia32_sqrtph((__v8hf)__a); 605349cc55cSDimitry Andric } 606349cc55cSDimitry Andric 607349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, 608349cc55cSDimitry Andric __mmask8 __U, 609349cc55cSDimitry Andric __m128h __A) { 610349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 611349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W); 612349cc55cSDimitry Andric } 613349cc55cSDimitry Andric 614349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, 615349cc55cSDimitry Andric __m128h __A) { 616349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 617349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph()); 618349cc55cSDimitry Andric } 619349cc55cSDimitry Andric 620349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) { 621349cc55cSDimitry Andric return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a); 622349cc55cSDimitry Andric } 623349cc55cSDimitry Andric 624349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 625349cc55cSDimitry Andric _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { 626349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 627349cc55cSDimitry Andric (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W); 628349cc55cSDimitry Andric } 629349cc55cSDimitry Andric 630349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 631349cc55cSDimitry Andric _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) { 632349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, 633349cc55cSDimitry Andric (__v16hf)_mm256_sqrt_ph(__A), 634349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 635349cc55cSDimitry Andric } 636349cc55cSDimitry Andric 637349cc55cSDimitry Andric #define _mm_mask_fpclass_ph_mask(U, A, imm) \ 638349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ 639349cc55cSDimitry Andric (int)(imm), (__mmask8)(U))) 640349cc55cSDimitry Andric 641349cc55cSDimitry Andric #define _mm_fpclass_ph_mask(A, imm) \ 642349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ 643349cc55cSDimitry Andric (int)(imm), (__mmask8)-1)) 644349cc55cSDimitry Andric 645349cc55cSDimitry Andric #define _mm256_mask_fpclass_ph_mask(U, A, imm) \ 646349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ 647349cc55cSDimitry Andric (int)(imm), (__mmask16)(U))) 648349cc55cSDimitry Andric 649349cc55cSDimitry Andric #define _mm256_fpclass_ph_mask(A, imm) \ 650349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ 651349cc55cSDimitry Andric (int)(imm), (__mmask16)-1)) 652349cc55cSDimitry Andric 653349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) { 654349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( 655349cc55cSDimitry Andric (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 656349cc55cSDimitry Andric } 657349cc55cSDimitry Andric 658349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W, 659349cc55cSDimitry Andric __mmask8 __U, 660349cc55cSDimitry Andric __m128d __A) { 661349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W, 662349cc55cSDimitry Andric (__mmask8)__U); 663349cc55cSDimitry Andric } 664349cc55cSDimitry Andric 665349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 666349cc55cSDimitry Andric _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) { 667349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( 668349cc55cSDimitry Andric (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 669349cc55cSDimitry Andric } 670349cc55cSDimitry Andric 671349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) { 672349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( 673349cc55cSDimitry Andric (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 674349cc55cSDimitry Andric } 675349cc55cSDimitry Andric 676349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 677349cc55cSDimitry Andric _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) { 678349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W, 679349cc55cSDimitry Andric (__mmask8)__U); 680349cc55cSDimitry Andric } 681349cc55cSDimitry Andric 682349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 683349cc55cSDimitry Andric _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) { 684349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( 685349cc55cSDimitry Andric (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 686349cc55cSDimitry Andric } 687349cc55cSDimitry Andric 688349cc55cSDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) { 689349cc55cSDimitry Andric return (__m128d)__builtin_ia32_vcvtph2pd128_mask( 690349cc55cSDimitry Andric (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1); 691349cc55cSDimitry Andric } 692349cc55cSDimitry Andric 693349cc55cSDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W, 694349cc55cSDimitry Andric __mmask8 __U, 695349cc55cSDimitry Andric __m128h __A) { 696349cc55cSDimitry Andric return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W, 697349cc55cSDimitry Andric (__mmask8)__U); 698349cc55cSDimitry Andric } 699349cc55cSDimitry Andric 700349cc55cSDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 701349cc55cSDimitry Andric _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { 702349cc55cSDimitry Andric return (__m128d)__builtin_ia32_vcvtph2pd128_mask( 703349cc55cSDimitry Andric (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U); 704349cc55cSDimitry Andric } 705349cc55cSDimitry Andric 706349cc55cSDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) { 707349cc55cSDimitry Andric return (__m256d)__builtin_ia32_vcvtph2pd256_mask( 708349cc55cSDimitry Andric (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1); 709349cc55cSDimitry Andric } 710349cc55cSDimitry Andric 711349cc55cSDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 712349cc55cSDimitry Andric _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) { 713349cc55cSDimitry Andric return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W, 714349cc55cSDimitry Andric (__mmask8)__U); 715349cc55cSDimitry Andric } 716349cc55cSDimitry Andric 717349cc55cSDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 718349cc55cSDimitry Andric _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { 719349cc55cSDimitry Andric return (__m256d)__builtin_ia32_vcvtph2pd256_mask( 720349cc55cSDimitry Andric (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U); 721349cc55cSDimitry Andric } 722349cc55cSDimitry Andric 723349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) { 724349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2w128_mask( 725349cc55cSDimitry Andric (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); 726349cc55cSDimitry Andric } 727349cc55cSDimitry Andric 728349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 729349cc55cSDimitry Andric _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { 730349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W, 731349cc55cSDimitry Andric (__mmask8)__U); 732349cc55cSDimitry Andric } 733349cc55cSDimitry Andric 734349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 735349cc55cSDimitry Andric _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) { 736349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2w128_mask( 737349cc55cSDimitry Andric (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); 738349cc55cSDimitry Andric } 739349cc55cSDimitry Andric 740349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 741349cc55cSDimitry Andric _mm256_cvtph_epi16(__m256h __A) { 742349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2w256_mask( 743349cc55cSDimitry Andric (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); 744349cc55cSDimitry Andric } 745349cc55cSDimitry Andric 746349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 747349cc55cSDimitry Andric _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { 748349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W, 749349cc55cSDimitry Andric (__mmask16)__U); 750349cc55cSDimitry Andric } 751349cc55cSDimitry Andric 752349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 753349cc55cSDimitry Andric _mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) { 754349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2w256_mask( 755349cc55cSDimitry Andric (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); 756349cc55cSDimitry Andric } 757349cc55cSDimitry Andric 758349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) { 759349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2w128_mask( 760349cc55cSDimitry Andric (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); 761349cc55cSDimitry Andric } 762349cc55cSDimitry Andric 763349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 764349cc55cSDimitry Andric _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { 765349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W, 766349cc55cSDimitry Andric (__mmask8)__U); 767349cc55cSDimitry Andric } 768349cc55cSDimitry Andric 769349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 770349cc55cSDimitry Andric _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) { 771349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2w128_mask( 772349cc55cSDimitry Andric (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); 773349cc55cSDimitry Andric } 774349cc55cSDimitry Andric 775349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 776349cc55cSDimitry Andric _mm256_cvttph_epi16(__m256h __A) { 777349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2w256_mask( 778349cc55cSDimitry Andric (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); 779349cc55cSDimitry Andric } 780349cc55cSDimitry Andric 781349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 782349cc55cSDimitry Andric _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { 783349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W, 784349cc55cSDimitry Andric (__mmask16)__U); 785349cc55cSDimitry Andric } 786349cc55cSDimitry Andric 787349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 788349cc55cSDimitry Andric _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) { 789349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2w256_mask( 790349cc55cSDimitry Andric (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); 791349cc55cSDimitry Andric } 792349cc55cSDimitry Andric 793349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) { 794349cc55cSDimitry Andric return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf); 795349cc55cSDimitry Andric } 796349cc55cSDimitry Andric 797349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 798349cc55cSDimitry Andric _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) { 799349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 800349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W); 801349cc55cSDimitry Andric } 802349cc55cSDimitry Andric 803349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 804349cc55cSDimitry Andric _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) { 805349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 806349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph()); 807349cc55cSDimitry Andric } 808349cc55cSDimitry Andric 809349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 810349cc55cSDimitry Andric _mm256_cvtepi16_ph(__m256i __A) { 811349cc55cSDimitry Andric return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf); 812349cc55cSDimitry Andric } 813349cc55cSDimitry Andric 814349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 815349cc55cSDimitry Andric _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) { 816349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 817349cc55cSDimitry Andric (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W); 818349cc55cSDimitry Andric } 819349cc55cSDimitry Andric 820349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 821349cc55cSDimitry Andric _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) { 822349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, 823349cc55cSDimitry Andric (__v16hf)_mm256_cvtepi16_ph(__A), 824349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 825349cc55cSDimitry Andric } 826349cc55cSDimitry Andric 827349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) { 828349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2uw128_mask( 829349cc55cSDimitry Andric (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); 830349cc55cSDimitry Andric } 831349cc55cSDimitry Andric 832349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 833349cc55cSDimitry Andric _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { 834349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W, 835349cc55cSDimitry Andric (__mmask8)__U); 836349cc55cSDimitry Andric } 837349cc55cSDimitry Andric 838349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 839349cc55cSDimitry Andric _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) { 840349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2uw128_mask( 841349cc55cSDimitry Andric (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); 842349cc55cSDimitry Andric } 843349cc55cSDimitry Andric 844349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 845349cc55cSDimitry Andric _mm256_cvtph_epu16(__m256h __A) { 846349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2uw256_mask( 847349cc55cSDimitry Andric (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); 848349cc55cSDimitry Andric } 849349cc55cSDimitry Andric 850349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 851349cc55cSDimitry Andric _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { 852349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W, 853349cc55cSDimitry Andric (__mmask16)__U); 854349cc55cSDimitry Andric } 855349cc55cSDimitry Andric 856349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 857349cc55cSDimitry Andric _mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) { 858349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2uw256_mask( 859349cc55cSDimitry Andric (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); 860349cc55cSDimitry Andric } 861349cc55cSDimitry Andric 862349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) { 863349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2uw128_mask( 864349cc55cSDimitry Andric (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); 865349cc55cSDimitry Andric } 866349cc55cSDimitry Andric 867349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 868349cc55cSDimitry Andric _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { 869349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W, 870349cc55cSDimitry Andric (__mmask8)__U); 871349cc55cSDimitry Andric } 872349cc55cSDimitry Andric 873349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 874349cc55cSDimitry Andric _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) { 875349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2uw128_mask( 876349cc55cSDimitry Andric (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); 877349cc55cSDimitry Andric } 878349cc55cSDimitry Andric 879349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 880349cc55cSDimitry Andric _mm256_cvttph_epu16(__m256h __A) { 881349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2uw256_mask( 882349cc55cSDimitry Andric (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); 883349cc55cSDimitry Andric } 884349cc55cSDimitry Andric 885349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 886349cc55cSDimitry Andric _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { 887349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W, 888349cc55cSDimitry Andric (__mmask16)__U); 889349cc55cSDimitry Andric } 890349cc55cSDimitry Andric 891349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 892349cc55cSDimitry Andric _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) { 893349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2uw256_mask( 894349cc55cSDimitry Andric (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); 895349cc55cSDimitry Andric } 896349cc55cSDimitry Andric 897349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) { 898349cc55cSDimitry Andric return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf); 899349cc55cSDimitry Andric } 900349cc55cSDimitry Andric 901349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 902349cc55cSDimitry Andric _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) { 903349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 904349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W); 905349cc55cSDimitry Andric } 906349cc55cSDimitry Andric 907349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 908349cc55cSDimitry Andric _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) { 909349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 910349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph()); 911349cc55cSDimitry Andric } 912349cc55cSDimitry Andric 913349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 914349cc55cSDimitry Andric _mm256_cvtepu16_ph(__m256i __A) { 915349cc55cSDimitry Andric return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf); 916349cc55cSDimitry Andric } 917349cc55cSDimitry Andric 918349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 919349cc55cSDimitry Andric _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) { 920349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 921349cc55cSDimitry Andric (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W); 922349cc55cSDimitry Andric } 923349cc55cSDimitry Andric 924349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 925349cc55cSDimitry Andric _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) { 926349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, 927349cc55cSDimitry Andric (__v16hf)_mm256_cvtepu16_ph(__A), 928349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 929349cc55cSDimitry Andric } 930349cc55cSDimitry Andric 931349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) { 932349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2dq128_mask( 933349cc55cSDimitry Andric (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); 934349cc55cSDimitry Andric } 935349cc55cSDimitry Andric 936349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 937349cc55cSDimitry Andric _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { 938349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W, 939349cc55cSDimitry Andric (__mmask8)__U); 940349cc55cSDimitry Andric } 941349cc55cSDimitry Andric 942349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 943349cc55cSDimitry Andric _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { 944349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2dq128_mask( 945349cc55cSDimitry Andric (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); 946349cc55cSDimitry Andric } 947349cc55cSDimitry Andric 948349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 949349cc55cSDimitry Andric _mm256_cvtph_epi32(__m128h __A) { 950349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2dq256_mask( 951349cc55cSDimitry Andric (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); 952349cc55cSDimitry Andric } 953349cc55cSDimitry Andric 954349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 955349cc55cSDimitry Andric _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { 956349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W, 957349cc55cSDimitry Andric (__mmask8)__U); 958349cc55cSDimitry Andric } 959349cc55cSDimitry Andric 960349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 961349cc55cSDimitry Andric _mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { 962349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2dq256_mask( 963349cc55cSDimitry Andric (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); 964349cc55cSDimitry Andric } 965349cc55cSDimitry Andric 966349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) { 967349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2udq128_mask( 968349cc55cSDimitry Andric (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); 969349cc55cSDimitry Andric } 970349cc55cSDimitry Andric 971349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 972349cc55cSDimitry Andric _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { 973349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W, 974349cc55cSDimitry Andric (__mmask8)__U); 975349cc55cSDimitry Andric } 976349cc55cSDimitry Andric 977349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 978349cc55cSDimitry Andric _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { 979349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2udq128_mask( 980349cc55cSDimitry Andric (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); 981349cc55cSDimitry Andric } 982349cc55cSDimitry Andric 983349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 984349cc55cSDimitry Andric _mm256_cvtph_epu32(__m128h __A) { 985349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2udq256_mask( 986349cc55cSDimitry Andric (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); 987349cc55cSDimitry Andric } 988349cc55cSDimitry Andric 989349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 990349cc55cSDimitry Andric _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { 991349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W, 992349cc55cSDimitry Andric (__mmask8)__U); 993349cc55cSDimitry Andric } 994349cc55cSDimitry Andric 995349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 996349cc55cSDimitry Andric _mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { 997349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2udq256_mask( 998349cc55cSDimitry Andric (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); 999349cc55cSDimitry Andric } 1000349cc55cSDimitry Andric 1001349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) { 1002349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( 1003349cc55cSDimitry Andric (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1004349cc55cSDimitry Andric } 1005349cc55cSDimitry Andric 1006349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1007349cc55cSDimitry Andric _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) { 1008349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W, 1009349cc55cSDimitry Andric (__mmask8)__U); 1010349cc55cSDimitry Andric } 1011349cc55cSDimitry Andric 1012349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1013349cc55cSDimitry Andric _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) { 1014349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( 1015349cc55cSDimitry Andric (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1016349cc55cSDimitry Andric } 1017349cc55cSDimitry Andric 1018349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1019349cc55cSDimitry Andric _mm256_cvtepi32_ph(__m256i __A) { 1020349cc55cSDimitry Andric return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf); 1021349cc55cSDimitry Andric } 1022349cc55cSDimitry Andric 1023349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1024349cc55cSDimitry Andric _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) { 1025349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1026349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W); 1027349cc55cSDimitry Andric } 1028349cc55cSDimitry Andric 1029349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1030349cc55cSDimitry Andric _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) { 1031349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1032349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph()); 1033349cc55cSDimitry Andric } 1034349cc55cSDimitry Andric 1035349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) { 1036349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( 1037349cc55cSDimitry Andric (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1038349cc55cSDimitry Andric } 1039349cc55cSDimitry Andric 1040349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1041349cc55cSDimitry Andric _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) { 1042349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W, 1043349cc55cSDimitry Andric (__mmask8)__U); 1044349cc55cSDimitry Andric } 1045349cc55cSDimitry Andric 1046349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1047349cc55cSDimitry Andric _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) { 1048349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( 1049349cc55cSDimitry Andric (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1050349cc55cSDimitry Andric } 1051349cc55cSDimitry Andric 1052349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1053349cc55cSDimitry Andric _mm256_cvtepu32_ph(__m256i __A) { 1054349cc55cSDimitry Andric return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf); 1055349cc55cSDimitry Andric } 1056349cc55cSDimitry Andric 1057349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1058349cc55cSDimitry Andric _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) { 1059349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1060349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W); 1061349cc55cSDimitry Andric } 1062349cc55cSDimitry Andric 1063349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1064349cc55cSDimitry Andric _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) { 1065349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1066349cc55cSDimitry Andric (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph()); 1067349cc55cSDimitry Andric } 1068349cc55cSDimitry Andric 1069349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) { 1070349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2dq128_mask( 1071349cc55cSDimitry Andric (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); 1072349cc55cSDimitry Andric } 1073349cc55cSDimitry Andric 1074349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1075349cc55cSDimitry Andric _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { 1076349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W, 1077349cc55cSDimitry Andric (__mmask8)__U); 1078349cc55cSDimitry Andric } 1079349cc55cSDimitry Andric 1080349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1081349cc55cSDimitry Andric _mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { 1082349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2dq128_mask( 1083349cc55cSDimitry Andric (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); 1084349cc55cSDimitry Andric } 1085349cc55cSDimitry Andric 1086349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1087349cc55cSDimitry Andric _mm256_cvttph_epi32(__m128h __A) { 1088349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2dq256_mask( 1089349cc55cSDimitry Andric (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); 1090349cc55cSDimitry Andric } 1091349cc55cSDimitry Andric 1092349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1093349cc55cSDimitry Andric _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { 1094349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W, 1095349cc55cSDimitry Andric (__mmask8)__U); 1096349cc55cSDimitry Andric } 1097349cc55cSDimitry Andric 1098349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1099349cc55cSDimitry Andric _mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { 1100349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2dq256_mask( 1101349cc55cSDimitry Andric (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); 1102349cc55cSDimitry Andric } 1103349cc55cSDimitry Andric 1104349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) { 1105349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2udq128_mask( 1106349cc55cSDimitry Andric (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); 1107349cc55cSDimitry Andric } 1108349cc55cSDimitry Andric 1109349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1110349cc55cSDimitry Andric _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { 1111349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W, 1112349cc55cSDimitry Andric (__mmask8)__U); 1113349cc55cSDimitry Andric } 1114349cc55cSDimitry Andric 1115349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1116349cc55cSDimitry Andric _mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { 1117349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2udq128_mask( 1118349cc55cSDimitry Andric (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); 1119349cc55cSDimitry Andric } 1120349cc55cSDimitry Andric 1121349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1122349cc55cSDimitry Andric _mm256_cvttph_epu32(__m128h __A) { 1123349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2udq256_mask( 1124349cc55cSDimitry Andric (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); 1125349cc55cSDimitry Andric } 1126349cc55cSDimitry Andric 1127349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1128349cc55cSDimitry Andric _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { 1129349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W, 1130349cc55cSDimitry Andric (__mmask8)__U); 1131349cc55cSDimitry Andric } 1132349cc55cSDimitry Andric 1133349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1134349cc55cSDimitry Andric _mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { 1135349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2udq256_mask( 1136349cc55cSDimitry Andric (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); 1137349cc55cSDimitry Andric } 1138349cc55cSDimitry Andric 1139349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) { 1140349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( 1141349cc55cSDimitry Andric (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1142349cc55cSDimitry Andric } 1143349cc55cSDimitry Andric 1144349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1145349cc55cSDimitry Andric _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) { 1146349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W, 1147349cc55cSDimitry Andric (__mmask8)__U); 1148349cc55cSDimitry Andric } 1149349cc55cSDimitry Andric 1150349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1151349cc55cSDimitry Andric _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) { 1152349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( 1153349cc55cSDimitry Andric (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1154349cc55cSDimitry Andric } 1155349cc55cSDimitry Andric 1156349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1157349cc55cSDimitry Andric _mm256_cvtepi64_ph(__m256i __A) { 1158349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( 1159349cc55cSDimitry Andric (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1160349cc55cSDimitry Andric } 1161349cc55cSDimitry Andric 1162349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1163349cc55cSDimitry Andric _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) { 1164349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W, 1165349cc55cSDimitry Andric (__mmask8)__U); 1166349cc55cSDimitry Andric } 1167349cc55cSDimitry Andric 1168349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1169349cc55cSDimitry Andric _mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) { 1170349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( 1171349cc55cSDimitry Andric (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1172349cc55cSDimitry Andric } 1173349cc55cSDimitry Andric 1174349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) { 1175349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2qq128_mask( 1176349cc55cSDimitry Andric (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); 1177349cc55cSDimitry Andric } 1178349cc55cSDimitry Andric 1179349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1180349cc55cSDimitry Andric _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { 1181349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W, 1182349cc55cSDimitry Andric (__mmask8)__U); 1183349cc55cSDimitry Andric } 1184349cc55cSDimitry Andric 1185349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1186349cc55cSDimitry Andric _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { 1187349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2qq128_mask( 1188349cc55cSDimitry Andric (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); 1189349cc55cSDimitry Andric } 1190349cc55cSDimitry Andric 1191349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1192349cc55cSDimitry Andric _mm256_cvtph_epi64(__m128h __A) { 1193349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2qq256_mask( 1194349cc55cSDimitry Andric (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); 1195349cc55cSDimitry Andric } 1196349cc55cSDimitry Andric 1197349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1198349cc55cSDimitry Andric _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { 1199349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W, 1200349cc55cSDimitry Andric (__mmask8)__U); 1201349cc55cSDimitry Andric } 1202349cc55cSDimitry Andric 1203349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1204349cc55cSDimitry Andric _mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { 1205349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2qq256_mask( 1206349cc55cSDimitry Andric (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); 1207349cc55cSDimitry Andric } 1208349cc55cSDimitry Andric 1209349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) { 1210349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( 1211349cc55cSDimitry Andric (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1212349cc55cSDimitry Andric } 1213349cc55cSDimitry Andric 1214349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1215349cc55cSDimitry Andric _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) { 1216349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W, 1217349cc55cSDimitry Andric (__mmask8)__U); 1218349cc55cSDimitry Andric } 1219349cc55cSDimitry Andric 1220349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1221349cc55cSDimitry Andric _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) { 1222349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( 1223349cc55cSDimitry Andric (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1224349cc55cSDimitry Andric } 1225349cc55cSDimitry Andric 1226349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1227349cc55cSDimitry Andric _mm256_cvtepu64_ph(__m256i __A) { 1228349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( 1229349cc55cSDimitry Andric (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1230349cc55cSDimitry Andric } 1231349cc55cSDimitry Andric 1232349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1233349cc55cSDimitry Andric _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) { 1234349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W, 1235349cc55cSDimitry Andric (__mmask8)__U); 1236349cc55cSDimitry Andric } 1237349cc55cSDimitry Andric 1238349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1239349cc55cSDimitry Andric _mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) { 1240349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( 1241349cc55cSDimitry Andric (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1242349cc55cSDimitry Andric } 1243349cc55cSDimitry Andric 1244349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) { 1245349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( 1246349cc55cSDimitry Andric (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); 1247349cc55cSDimitry Andric } 1248349cc55cSDimitry Andric 1249349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1250349cc55cSDimitry Andric _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { 1251349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W, 1252349cc55cSDimitry Andric (__mmask8)__U); 1253349cc55cSDimitry Andric } 1254349cc55cSDimitry Andric 1255349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1256349cc55cSDimitry Andric _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { 1257349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( 1258349cc55cSDimitry Andric (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); 1259349cc55cSDimitry Andric } 1260349cc55cSDimitry Andric 1261349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1262349cc55cSDimitry Andric _mm256_cvtph_epu64(__m128h __A) { 1263349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( 1264349cc55cSDimitry Andric (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); 1265349cc55cSDimitry Andric } 1266349cc55cSDimitry Andric 1267349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1268349cc55cSDimitry Andric _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { 1269349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W, 1270349cc55cSDimitry Andric (__mmask8)__U); 1271349cc55cSDimitry Andric } 1272349cc55cSDimitry Andric 1273349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1274349cc55cSDimitry Andric _mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { 1275349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( 1276349cc55cSDimitry Andric (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); 1277349cc55cSDimitry Andric } 1278349cc55cSDimitry Andric 1279349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) { 1280349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2qq128_mask( 1281349cc55cSDimitry Andric (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); 1282349cc55cSDimitry Andric } 1283349cc55cSDimitry Andric 1284349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1285349cc55cSDimitry Andric _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { 1286349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W, 1287349cc55cSDimitry Andric (__mmask8)__U); 1288349cc55cSDimitry Andric } 1289349cc55cSDimitry Andric 1290349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1291349cc55cSDimitry Andric _mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { 1292349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2qq128_mask( 1293349cc55cSDimitry Andric (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); 1294349cc55cSDimitry Andric } 1295349cc55cSDimitry Andric 1296349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1297349cc55cSDimitry Andric _mm256_cvttph_epi64(__m128h __A) { 1298349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2qq256_mask( 1299349cc55cSDimitry Andric (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); 1300349cc55cSDimitry Andric } 1301349cc55cSDimitry Andric 1302349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1303349cc55cSDimitry Andric _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { 1304349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W, 1305349cc55cSDimitry Andric (__mmask8)__U); 1306349cc55cSDimitry Andric } 1307349cc55cSDimitry Andric 1308349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1309349cc55cSDimitry Andric _mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { 1310349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2qq256_mask( 1311349cc55cSDimitry Andric (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); 1312349cc55cSDimitry Andric } 1313349cc55cSDimitry Andric 1314349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) { 1315349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( 1316349cc55cSDimitry Andric (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); 1317349cc55cSDimitry Andric } 1318349cc55cSDimitry Andric 1319349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1320349cc55cSDimitry Andric _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { 1321349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W, 1322349cc55cSDimitry Andric (__mmask8)__U); 1323349cc55cSDimitry Andric } 1324349cc55cSDimitry Andric 1325349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1326349cc55cSDimitry Andric _mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { 1327349cc55cSDimitry Andric return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( 1328349cc55cSDimitry Andric (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); 1329349cc55cSDimitry Andric } 1330349cc55cSDimitry Andric 1331349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1332349cc55cSDimitry Andric _mm256_cvttph_epu64(__m128h __A) { 1333349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( 1334349cc55cSDimitry Andric (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); 1335349cc55cSDimitry Andric } 1336349cc55cSDimitry Andric 1337349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1338349cc55cSDimitry Andric _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { 1339349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W, 1340349cc55cSDimitry Andric (__mmask8)__U); 1341349cc55cSDimitry Andric } 1342349cc55cSDimitry Andric 1343349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1344349cc55cSDimitry Andric _mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { 1345349cc55cSDimitry Andric return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( 1346349cc55cSDimitry Andric (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); 1347349cc55cSDimitry Andric } 1348349cc55cSDimitry Andric 1349349cc55cSDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) { 1350349cc55cSDimitry Andric return (__m128)__builtin_ia32_vcvtph2psx128_mask( 1351349cc55cSDimitry Andric (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1); 1352349cc55cSDimitry Andric } 1353349cc55cSDimitry Andric 1354349cc55cSDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W, 1355349cc55cSDimitry Andric __mmask8 __U, 1356349cc55cSDimitry Andric __m128h __A) { 1357349cc55cSDimitry Andric return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W, 1358349cc55cSDimitry Andric (__mmask8)__U); 1359349cc55cSDimitry Andric } 1360349cc55cSDimitry Andric 1361349cc55cSDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1362349cc55cSDimitry Andric _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { 1363349cc55cSDimitry Andric return (__m128)__builtin_ia32_vcvtph2psx128_mask( 1364349cc55cSDimitry Andric (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U); 1365349cc55cSDimitry Andric } 1366349cc55cSDimitry Andric 1367349cc55cSDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) { 1368349cc55cSDimitry Andric return (__m256)__builtin_ia32_vcvtph2psx256_mask( 1369349cc55cSDimitry Andric (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1); 1370349cc55cSDimitry Andric } 1371349cc55cSDimitry Andric 1372349cc55cSDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 1373349cc55cSDimitry Andric _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) { 1374349cc55cSDimitry Andric return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W, 1375349cc55cSDimitry Andric (__mmask8)__U); 1376349cc55cSDimitry Andric } 1377349cc55cSDimitry Andric 1378349cc55cSDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 1379349cc55cSDimitry Andric _mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { 1380349cc55cSDimitry Andric return (__m256)__builtin_ia32_vcvtph2psx256_mask( 1381349cc55cSDimitry Andric (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U); 1382349cc55cSDimitry Andric } 1383349cc55cSDimitry Andric 1384349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) { 1385349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtps2phx128_mask( 1386349cc55cSDimitry Andric (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1387349cc55cSDimitry Andric } 1388349cc55cSDimitry Andric 1389349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W, 1390349cc55cSDimitry Andric __mmask8 __U, 1391349cc55cSDimitry Andric __m128 __A) { 1392349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W, 1393349cc55cSDimitry Andric (__mmask8)__U); 1394349cc55cSDimitry Andric } 1395349cc55cSDimitry Andric 1396349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1397349cc55cSDimitry Andric _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) { 1398349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtps2phx128_mask( 1399349cc55cSDimitry Andric (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1400349cc55cSDimitry Andric } 1401349cc55cSDimitry Andric 1402349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) { 1403349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtps2phx256_mask( 1404349cc55cSDimitry Andric (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); 1405349cc55cSDimitry Andric } 1406349cc55cSDimitry Andric 1407349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1408349cc55cSDimitry Andric _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) { 1409349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W, 1410349cc55cSDimitry Andric (__mmask8)__U); 1411349cc55cSDimitry Andric } 1412349cc55cSDimitry Andric 1413349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 1414349cc55cSDimitry Andric _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { 1415349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vcvtps2phx256_mask( 1416349cc55cSDimitry Andric (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 1417349cc55cSDimitry Andric } 1418349cc55cSDimitry Andric 1419349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, 1420349cc55cSDimitry Andric __m128h __B, 1421349cc55cSDimitry Andric __m128h __C) { 1422349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, 1423349cc55cSDimitry Andric (__v8hf)__C); 1424349cc55cSDimitry Andric } 1425349cc55cSDimitry Andric 1426349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, 1427349cc55cSDimitry Andric __mmask8 __U, 1428349cc55cSDimitry Andric __m128h __B, 1429349cc55cSDimitry Andric __m128h __C) { 1430349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1431349cc55cSDimitry Andric (__mmask8)__U, 1432349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1433349cc55cSDimitry Andric (__v8hf)__A); 1434349cc55cSDimitry Andric } 1435349cc55cSDimitry Andric 1436349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1437349cc55cSDimitry Andric _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1438349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1439349cc55cSDimitry Andric (__mmask8)__U, 1440349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1441349cc55cSDimitry Andric (__v8hf)__C); 1442349cc55cSDimitry Andric } 1443349cc55cSDimitry Andric 1444349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1445349cc55cSDimitry Andric _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1446349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1447349cc55cSDimitry Andric (__mmask8)__U, 1448349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1449349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 1450349cc55cSDimitry Andric } 1451349cc55cSDimitry Andric 1452349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, 1453349cc55cSDimitry Andric __m128h __B, 1454349cc55cSDimitry Andric __m128h __C) { 1455349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, 1456349cc55cSDimitry Andric -(__v8hf)__C); 1457349cc55cSDimitry Andric } 1458349cc55cSDimitry Andric 1459349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, 1460349cc55cSDimitry Andric __mmask8 __U, 1461349cc55cSDimitry Andric __m128h __B, 1462349cc55cSDimitry Andric __m128h __C) { 1463349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1464349cc55cSDimitry Andric (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1465349cc55cSDimitry Andric (__v8hf)__A); 1466349cc55cSDimitry Andric } 1467349cc55cSDimitry Andric 1468349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1469349cc55cSDimitry Andric _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1470349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1471349cc55cSDimitry Andric (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1472349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 1473349cc55cSDimitry Andric } 1474349cc55cSDimitry Andric 1475349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1476349cc55cSDimitry Andric _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1477349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1478349cc55cSDimitry Andric (__mmask8)__U, 1479349cc55cSDimitry Andric __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1480349cc55cSDimitry Andric (__v8hf)__C); 1481349cc55cSDimitry Andric } 1482349cc55cSDimitry Andric 1483349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1484349cc55cSDimitry Andric _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1485349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1486349cc55cSDimitry Andric (__mmask8)__U, 1487349cc55cSDimitry Andric __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1488349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 1489349cc55cSDimitry Andric } 1490349cc55cSDimitry Andric 1491349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1492349cc55cSDimitry Andric _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1493349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1494349cc55cSDimitry Andric (__mmask8)__U, 1495349cc55cSDimitry Andric __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), 1496349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 1497349cc55cSDimitry Andric } 1498349cc55cSDimitry Andric 1499349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, 1500349cc55cSDimitry Andric __m256h __B, 1501349cc55cSDimitry Andric __m256h __C) { 1502349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, 1503349cc55cSDimitry Andric (__v16hf)__C); 1504349cc55cSDimitry Andric } 1505349cc55cSDimitry Andric 1506349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1507349cc55cSDimitry Andric _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { 1508349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1509349cc55cSDimitry Andric (__mmask16)__U, 1510349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1511349cc55cSDimitry Andric (__v16hf)__A); 1512349cc55cSDimitry Andric } 1513349cc55cSDimitry Andric 1514349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1515349cc55cSDimitry Andric _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { 1516349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1517349cc55cSDimitry Andric (__mmask16)__U, 1518349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1519349cc55cSDimitry Andric (__v16hf)__C); 1520349cc55cSDimitry Andric } 1521349cc55cSDimitry Andric 1522349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1523349cc55cSDimitry Andric _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { 1524349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1525349cc55cSDimitry Andric (__mmask16)__U, 1526349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1527349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 1528349cc55cSDimitry Andric } 1529349cc55cSDimitry Andric 1530349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, 1531349cc55cSDimitry Andric __m256h __B, 1532349cc55cSDimitry Andric __m256h __C) { 1533349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, 1534349cc55cSDimitry Andric -(__v16hf)__C); 1535349cc55cSDimitry Andric } 1536349cc55cSDimitry Andric 1537349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1538349cc55cSDimitry Andric _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { 1539349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1540349cc55cSDimitry Andric (__mmask16)__U, 1541349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1542349cc55cSDimitry Andric (__v16hf)__A); 1543349cc55cSDimitry Andric } 1544349cc55cSDimitry Andric 1545349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1546349cc55cSDimitry Andric _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { 1547349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1548349cc55cSDimitry Andric (__mmask16)__U, 1549349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1550349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 1551349cc55cSDimitry Andric } 1552349cc55cSDimitry Andric 1553349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1554349cc55cSDimitry Andric _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { 1555349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1556349cc55cSDimitry Andric (__mmask16)__U, 1557349cc55cSDimitry Andric __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1558349cc55cSDimitry Andric (__v16hf)__C); 1559349cc55cSDimitry Andric } 1560349cc55cSDimitry Andric 1561349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1562349cc55cSDimitry Andric _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { 1563349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1564349cc55cSDimitry Andric (__mmask16)__U, 1565349cc55cSDimitry Andric __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1566349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 1567349cc55cSDimitry Andric } 1568349cc55cSDimitry Andric 1569349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1570349cc55cSDimitry Andric _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { 1571349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1572349cc55cSDimitry Andric (__mmask16)__U, 1573349cc55cSDimitry Andric __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1574349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 1575349cc55cSDimitry Andric } 1576349cc55cSDimitry Andric 1577349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A, 1578349cc55cSDimitry Andric __m128h __B, 1579349cc55cSDimitry Andric __m128h __C) { 1580349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, 1581349cc55cSDimitry Andric (__v8hf)__C); 1582349cc55cSDimitry Andric } 1583349cc55cSDimitry Andric 1584349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1585349cc55cSDimitry Andric _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { 1586349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1587349cc55cSDimitry Andric (__mmask8)__U, 1588349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1589349cc55cSDimitry Andric (__v8hf)__A); 1590349cc55cSDimitry Andric } 1591349cc55cSDimitry Andric 1592349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1593349cc55cSDimitry Andric _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1594349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1595349cc55cSDimitry Andric (__mmask8)__U, 1596349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1597349cc55cSDimitry Andric (__v8hf)__C); 1598349cc55cSDimitry Andric } 1599349cc55cSDimitry Andric 1600349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1601349cc55cSDimitry Andric _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1602349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1603349cc55cSDimitry Andric (__mmask8)__U, 1604349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), 1605349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 1606349cc55cSDimitry Andric } 1607349cc55cSDimitry Andric 1608349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A, 1609349cc55cSDimitry Andric __m128h __B, 1610349cc55cSDimitry Andric __m128h __C) { 1611349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, 1612349cc55cSDimitry Andric -(__v8hf)__C); 1613349cc55cSDimitry Andric } 1614349cc55cSDimitry Andric 1615349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1616349cc55cSDimitry Andric _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { 1617349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1618349cc55cSDimitry Andric (__mmask8)__U, 1619349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), 1620349cc55cSDimitry Andric (__v8hf)__A); 1621349cc55cSDimitry Andric } 1622349cc55cSDimitry Andric 1623349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1624349cc55cSDimitry Andric _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1625349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1626349cc55cSDimitry Andric (__mmask8)__U, 1627349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), 1628349cc55cSDimitry Andric (__v8hf)_mm_setzero_ph()); 1629349cc55cSDimitry Andric } 1630349cc55cSDimitry Andric 1631349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1632349cc55cSDimitry Andric _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) { 1633349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, 1634349cc55cSDimitry Andric (__v16hf)__C); 1635349cc55cSDimitry Andric } 1636349cc55cSDimitry Andric 1637349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1638349cc55cSDimitry Andric _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { 1639349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1640349cc55cSDimitry Andric (__mmask16)__U, 1641349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1642349cc55cSDimitry Andric (__v16hf)__A); 1643349cc55cSDimitry Andric } 1644349cc55cSDimitry Andric 1645349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1646349cc55cSDimitry Andric _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { 1647349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1648349cc55cSDimitry Andric (__mmask16)__U, 1649349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1650349cc55cSDimitry Andric (__v16hf)__C); 1651349cc55cSDimitry Andric } 1652349cc55cSDimitry Andric 1653349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1654349cc55cSDimitry Andric _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { 1655349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1656349cc55cSDimitry Andric (__mmask16)__U, 1657349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), 1658349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 1659349cc55cSDimitry Andric } 1660349cc55cSDimitry Andric 1661349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1662349cc55cSDimitry Andric _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) { 1663349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, 1664349cc55cSDimitry Andric -(__v16hf)__C); 1665349cc55cSDimitry Andric } 1666349cc55cSDimitry Andric 1667349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1668349cc55cSDimitry Andric _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { 1669349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1670349cc55cSDimitry Andric (__mmask16)__U, 1671349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1672349cc55cSDimitry Andric (__v16hf)__A); 1673349cc55cSDimitry Andric } 1674349cc55cSDimitry Andric 1675349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1676349cc55cSDimitry Andric _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { 1677349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1678349cc55cSDimitry Andric (__mmask16)__U, 1679349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1680349cc55cSDimitry Andric (__v16hf)_mm256_setzero_ph()); 1681349cc55cSDimitry Andric } 1682349cc55cSDimitry Andric 1683349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1684349cc55cSDimitry Andric _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1685349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1686349cc55cSDimitry Andric (__mmask8)__U, 1687349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), 1688349cc55cSDimitry Andric (__v8hf)__C); 1689349cc55cSDimitry Andric } 1690349cc55cSDimitry Andric 1691349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1692349cc55cSDimitry Andric _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { 1693349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1694349cc55cSDimitry Andric (__mmask16)__U, 1695349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1696349cc55cSDimitry Andric (__v16hf)__C); 1697349cc55cSDimitry Andric } 1698349cc55cSDimitry Andric 1699349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1700349cc55cSDimitry Andric _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1701349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1702349cc55cSDimitry Andric (__mmask8)__U, 1703349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), 1704349cc55cSDimitry Andric (__v8hf)__C); 1705349cc55cSDimitry Andric } 1706349cc55cSDimitry Andric 1707349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1708349cc55cSDimitry Andric _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { 1709349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1710349cc55cSDimitry Andric (__mmask16)__U, 1711349cc55cSDimitry Andric __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), 1712349cc55cSDimitry Andric (__v16hf)__C); 1713349cc55cSDimitry Andric } 1714349cc55cSDimitry Andric 1715349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, 1716349cc55cSDimitry Andric __m128h __B, 1717349cc55cSDimitry Andric __m128h __C) { 1718349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, 1719349cc55cSDimitry Andric (__v8hf)__C); 1720349cc55cSDimitry Andric } 1721349cc55cSDimitry Andric 1722349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1723349cc55cSDimitry Andric _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { 1724349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1725349cc55cSDimitry Andric (__mmask8)__U, 1726349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), 1727349cc55cSDimitry Andric (__v8hf)__A); 1728349cc55cSDimitry Andric } 1729349cc55cSDimitry Andric 1730349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, 1731349cc55cSDimitry Andric __m256h __B, 1732349cc55cSDimitry Andric __m256h __C) { 1733349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, 1734349cc55cSDimitry Andric (__v16hf)__C); 1735349cc55cSDimitry Andric } 1736349cc55cSDimitry Andric 1737349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1738349cc55cSDimitry Andric _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { 1739349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1740349cc55cSDimitry Andric (__mmask16)__U, 1741349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), 1742349cc55cSDimitry Andric (__v16hf)__A); 1743349cc55cSDimitry Andric } 1744349cc55cSDimitry Andric 1745349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, 1746349cc55cSDimitry Andric __m128h __B, 1747349cc55cSDimitry Andric __m128h __C) { 1748349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, 1749349cc55cSDimitry Andric -(__v8hf)__C); 1750349cc55cSDimitry Andric } 1751349cc55cSDimitry Andric 1752349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1753349cc55cSDimitry Andric _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { 1754349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1755349cc55cSDimitry Andric (__mmask8)__U, 1756349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), 1757349cc55cSDimitry Andric (__v8hf)__A); 1758349cc55cSDimitry Andric } 1759349cc55cSDimitry Andric 1760349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1761349cc55cSDimitry Andric _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1762349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128( 1763349cc55cSDimitry Andric (__mmask8)__U, 1764349cc55cSDimitry Andric __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), 1765349cc55cSDimitry Andric (__v8hf)__C); 1766349cc55cSDimitry Andric } 1767349cc55cSDimitry Andric 1768349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, 1769349cc55cSDimitry Andric __m256h __B, 1770349cc55cSDimitry Andric __m256h __C) { 1771349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, 1772349cc55cSDimitry Andric -(__v16hf)__C); 1773349cc55cSDimitry Andric } 1774349cc55cSDimitry Andric 1775349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1776349cc55cSDimitry Andric _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { 1777349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1778349cc55cSDimitry Andric (__mmask16)__U, 1779349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), 1780349cc55cSDimitry Andric (__v16hf)__A); 1781349cc55cSDimitry Andric } 1782349cc55cSDimitry Andric 1783349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1784349cc55cSDimitry Andric _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { 1785349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256( 1786349cc55cSDimitry Andric (__mmask16)__U, 1787349cc55cSDimitry Andric __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), 1788349cc55cSDimitry Andric (__v16hf)__C); 1789349cc55cSDimitry Andric } 1790349cc55cSDimitry Andric 1791349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A, 1792349cc55cSDimitry Andric __m128h __B) { 1793349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfcmulcph128_mask( 1794349cc55cSDimitry Andric (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1); 1795349cc55cSDimitry Andric } 1796349cc55cSDimitry Andric 1797349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1798349cc55cSDimitry Andric _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { 1799349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B, 1800349cc55cSDimitry Andric (__v4sf)__W, (__mmask8)__U); 1801349cc55cSDimitry Andric } 1802349cc55cSDimitry Andric 1803349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1804349cc55cSDimitry Andric _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { 1805349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfcmulcph128_mask( 1806349cc55cSDimitry Andric (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U); 1807349cc55cSDimitry Andric } 1808349cc55cSDimitry Andric 1809349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A, 1810349cc55cSDimitry Andric __m256h __B) { 1811349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfcmulcph256_mask( 1812349cc55cSDimitry Andric (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1); 1813349cc55cSDimitry Andric } 1814349cc55cSDimitry Andric 1815349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1816349cc55cSDimitry Andric _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { 1817349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B, 1818349cc55cSDimitry Andric (__v8sf)__W, (__mmask8)__U); 1819349cc55cSDimitry Andric } 1820349cc55cSDimitry Andric 1821349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1822349cc55cSDimitry Andric _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { 1823349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfcmulcph256_mask( 1824349cc55cSDimitry Andric (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U); 1825349cc55cSDimitry Andric } 1826349cc55cSDimitry Andric 1827349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A, 1828349cc55cSDimitry Andric __m128h __B, 1829349cc55cSDimitry Andric __m128h __C) { 1830349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B, 1831349cc55cSDimitry Andric (__v4sf)__C, (__mmask8)-1); 1832349cc55cSDimitry Andric } 1833349cc55cSDimitry Andric 1834349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1835349cc55cSDimitry Andric _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { 1836349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectps_128( 1837349cc55cSDimitry Andric __U, 1838349cc55cSDimitry Andric __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B, 1839349cc55cSDimitry Andric (__v4sf)__C, (__mmask8)__U), 1840349cc55cSDimitry Andric (__v4sf)__A); 1841349cc55cSDimitry Andric } 1842349cc55cSDimitry Andric 1843349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1844349cc55cSDimitry Andric _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1845349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B, 1846349cc55cSDimitry Andric (__v4sf)__C, (__mmask8)__U); 1847349cc55cSDimitry Andric } 1848349cc55cSDimitry Andric 1849349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1850349cc55cSDimitry Andric _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1851349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfcmaddcph128_maskz( 1852349cc55cSDimitry Andric (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); 1853349cc55cSDimitry Andric } 1854349cc55cSDimitry Andric 1855349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A, 1856349cc55cSDimitry Andric __m256h __B, 1857349cc55cSDimitry Andric __m256h __C) { 1858349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, 1859349cc55cSDimitry Andric (__v8sf)__C, (__mmask8)-1); 1860349cc55cSDimitry Andric } 1861349cc55cSDimitry Andric 1862349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1863349cc55cSDimitry Andric _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { 1864349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectps_256( 1865349cc55cSDimitry Andric __U, 1866349cc55cSDimitry Andric __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, 1867349cc55cSDimitry Andric (__mmask8)__U), 1868349cc55cSDimitry Andric (__v8sf)__A); 1869349cc55cSDimitry Andric } 1870349cc55cSDimitry Andric 1871349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1872349cc55cSDimitry Andric _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { 1873349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, 1874349cc55cSDimitry Andric (__v8sf)__C, (__mmask8)__U); 1875349cc55cSDimitry Andric } 1876349cc55cSDimitry Andric 1877349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1878349cc55cSDimitry Andric _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { 1879349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfcmaddcph256_maskz( 1880349cc55cSDimitry Andric (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); 1881349cc55cSDimitry Andric } 1882349cc55cSDimitry Andric 1883349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A, 1884349cc55cSDimitry Andric __m128h __B) { 1885349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmulcph128_mask( 1886349cc55cSDimitry Andric (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1); 1887349cc55cSDimitry Andric } 1888349cc55cSDimitry Andric 1889349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W, 1890349cc55cSDimitry Andric __mmask8 __U, 1891349cc55cSDimitry Andric __m128h __A, 1892349cc55cSDimitry Andric __m128h __B) { 1893349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B, 1894349cc55cSDimitry Andric (__v4sf)__W, (__mmask8)__U); 1895349cc55cSDimitry Andric } 1896349cc55cSDimitry Andric 1897349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1898349cc55cSDimitry Andric _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { 1899349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmulcph128_mask( 1900349cc55cSDimitry Andric (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U); 1901349cc55cSDimitry Andric } 1902349cc55cSDimitry Andric 1903349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A, 1904349cc55cSDimitry Andric __m256h __B) { 1905349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmulcph256_mask( 1906349cc55cSDimitry Andric (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1); 1907349cc55cSDimitry Andric } 1908349cc55cSDimitry Andric 1909349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1910349cc55cSDimitry Andric _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { 1911349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B, 1912349cc55cSDimitry Andric (__v8sf)__W, (__mmask8)__U); 1913349cc55cSDimitry Andric } 1914349cc55cSDimitry Andric 1915349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1916349cc55cSDimitry Andric _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { 1917349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmulcph256_mask( 1918349cc55cSDimitry Andric (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U); 1919349cc55cSDimitry Andric } 1920349cc55cSDimitry Andric 1921349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A, 1922349cc55cSDimitry Andric __m128h __B, 1923349cc55cSDimitry Andric __m128h __C) { 1924349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, 1925349cc55cSDimitry Andric (__v4sf)__C, (__mmask8)-1); 1926349cc55cSDimitry Andric } 1927349cc55cSDimitry Andric 1928349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1929349cc55cSDimitry Andric _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { 1930349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectps_128( 1931349cc55cSDimitry Andric __U, 1932349cc55cSDimitry Andric __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, 1933349cc55cSDimitry Andric (__mmask8)__U), 1934349cc55cSDimitry Andric (__v4sf)__A); 1935349cc55cSDimitry Andric } 1936349cc55cSDimitry Andric 1937349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1938349cc55cSDimitry Andric _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { 1939349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, 1940349cc55cSDimitry Andric (__v4sf)__C, (__mmask8)__U); 1941349cc55cSDimitry Andric } 1942349cc55cSDimitry Andric 1943349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1944349cc55cSDimitry Andric _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { 1945349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B, 1946349cc55cSDimitry Andric (__v4sf)__C, (__mmask8)__U); 1947349cc55cSDimitry Andric } 1948349cc55cSDimitry Andric 1949349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A, 1950349cc55cSDimitry Andric __m256h __B, 1951349cc55cSDimitry Andric __m256h __C) { 1952349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, 1953349cc55cSDimitry Andric (__v8sf)__C, (__mmask8)-1); 1954349cc55cSDimitry Andric } 1955349cc55cSDimitry Andric 1956349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1957349cc55cSDimitry Andric _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { 1958349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectps_256( 1959349cc55cSDimitry Andric __U, 1960349cc55cSDimitry Andric __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, 1961349cc55cSDimitry Andric (__mmask8)__U), 1962349cc55cSDimitry Andric (__v8sf)__A); 1963349cc55cSDimitry Andric } 1964349cc55cSDimitry Andric 1965349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1966349cc55cSDimitry Andric _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { 1967349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, 1968349cc55cSDimitry Andric (__v8sf)__C, (__mmask8)__U); 1969349cc55cSDimitry Andric } 1970349cc55cSDimitry Andric 1971349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1972349cc55cSDimitry Andric _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { 1973349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B, 1974349cc55cSDimitry Andric (__v8sf)__C, (__mmask8)__U); 1975349cc55cSDimitry Andric } 1976349cc55cSDimitry Andric 1977349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, 1978349cc55cSDimitry Andric __m128h __A, 1979349cc55cSDimitry Andric __m128h __W) { 1980349cc55cSDimitry Andric return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W, 1981349cc55cSDimitry Andric (__v8hf)__A); 1982349cc55cSDimitry Andric } 1983349cc55cSDimitry Andric 1984349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1985349cc55cSDimitry Andric _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { 1986349cc55cSDimitry Andric return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W, 1987349cc55cSDimitry Andric (__v16hf)__A); 1988349cc55cSDimitry Andric } 1989349cc55cSDimitry Andric 1990349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 1991349cc55cSDimitry Andric _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) { 1992349cc55cSDimitry Andric return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, 1993349cc55cSDimitry Andric (__v8hi)__B); 1994349cc55cSDimitry Andric } 1995349cc55cSDimitry Andric 1996349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 1997349cc55cSDimitry Andric _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) { 1998349cc55cSDimitry Andric return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, 1999349cc55cSDimitry Andric (__v16hi)__B); 2000349cc55cSDimitry Andric } 2001349cc55cSDimitry Andric 2002349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 2003349cc55cSDimitry Andric _mm_permutexvar_ph(__m128i __A, __m128h __B) { 2004349cc55cSDimitry Andric return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); 2005349cc55cSDimitry Andric } 2006349cc55cSDimitry Andric 2007349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 2008349cc55cSDimitry Andric _mm256_permutexvar_ph(__m256i __A, __m256h __B) { 2009349cc55cSDimitry Andric return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); 2010349cc55cSDimitry Andric } 2011349cc55cSDimitry Andric 2012349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256 2013349cc55cSDimitry Andric _mm256_reduce_add_ph(__m256h __W) { 2014349cc55cSDimitry Andric return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W); 2015349cc55cSDimitry Andric } 2016349cc55cSDimitry Andric 2017349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256 2018349cc55cSDimitry Andric _mm256_reduce_mul_ph(__m256h __W) { 2019349cc55cSDimitry Andric return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W); 2020349cc55cSDimitry Andric } 2021349cc55cSDimitry Andric 2022349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256 2023349cc55cSDimitry Andric _mm256_reduce_max_ph(__m256h __V) { 2024349cc55cSDimitry Andric return __builtin_ia32_reduce_fmax_ph256(__V); 2025349cc55cSDimitry Andric } 2026349cc55cSDimitry Andric 2027349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256 2028349cc55cSDimitry Andric _mm256_reduce_min_ph(__m256h __V) { 2029349cc55cSDimitry Andric return __builtin_ia32_reduce_fmin_ph256(__V); 2030349cc55cSDimitry Andric } 2031349cc55cSDimitry Andric 2032349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128 2033349cc55cSDimitry Andric _mm_reduce_add_ph(__m128h __W) { 2034349cc55cSDimitry Andric return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W); 2035349cc55cSDimitry Andric } 2036349cc55cSDimitry Andric 2037349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128 2038349cc55cSDimitry Andric _mm_reduce_mul_ph(__m128h __W) { 2039349cc55cSDimitry Andric return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W); 2040349cc55cSDimitry Andric } 2041349cc55cSDimitry Andric 2042349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128 2043349cc55cSDimitry Andric _mm_reduce_max_ph(__m128h __V) { 2044349cc55cSDimitry Andric return __builtin_ia32_reduce_fmax_ph128(__V); 2045349cc55cSDimitry Andric } 2046349cc55cSDimitry Andric 2047349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128 2048349cc55cSDimitry Andric _mm_reduce_min_ph(__m128h __V) { 2049349cc55cSDimitry Andric return __builtin_ia32_reduce_fmin_ph128(__V); 2050349cc55cSDimitry Andric } 2051349cc55cSDimitry Andric 2052349cc55cSDimitry Andric // intrinsics below are alias for f*mul_*ch 2053349cc55cSDimitry Andric #define _mm_mul_pch(A, B) _mm_fmul_pch(A, B) 2054349cc55cSDimitry Andric #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B) 2055349cc55cSDimitry Andric #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B) 2056349cc55cSDimitry Andric #define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B) 2057349cc55cSDimitry Andric #define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B) 2058349cc55cSDimitry Andric #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B) 2059349cc55cSDimitry Andric 2060349cc55cSDimitry Andric #define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B) 2061349cc55cSDimitry Andric #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B) 2062349cc55cSDimitry Andric #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B) 2063349cc55cSDimitry Andric #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B) 2064349cc55cSDimitry Andric #define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B) 2065349cc55cSDimitry Andric #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B) 2066349cc55cSDimitry Andric 2067349cc55cSDimitry Andric #undef __DEFAULT_FN_ATTRS128 2068349cc55cSDimitry Andric #undef __DEFAULT_FN_ATTRS256 2069349cc55cSDimitry Andric 2070349cc55cSDimitry Andric #endif 2071bdd1243dSDimitry Andric #endif 2072