// -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP_EXPERIMENTAL_SIMD #define _LIBCPP_EXPERIMENTAL_SIMD /* experimental/simd synopsis namespace std::experimental { inline namespace parallelism_v2 { namespace simd_abi { struct scalar {}; template struct fixed_size {}; template inline constexpr int max_fixed_size = implementation-defined; template using compatible = implementation-defined; template using native = implementation-defined; } // simd_abi struct element_aligned_tag {}; struct vector_aligned_tag {}; template struct overaligned_tag {}; inline constexpr element_aligned_tag element_aligned{}; inline constexpr vector_aligned_tag vector_aligned{}; template inline constexpr overaligned_tag overaligned{}; // traits [simd.traits] template struct is_abi_tag; template inline constexpr bool is_abi_tag_v = is_abi_tag::value; template struct is_simd; template inline constexpr bool is_simd_v = is_simd::value; template struct is_simd_mask; template inline constexpr bool is_simd_mask_v = is_simd_mask::value; template struct is_simd_flag_type; template inline constexpr bool is_simd_flag_type_v = is_simd_flag_type::value; template struct abi_for_size { using type = see below; }; template using abi_for_size_t = typename abi_for_size::type; template > struct simd_size; template > inline constexpr size_t simd_size_v = simd_size::value; template struct memory_alignment; template inline constexpr size_t memory_alignment_v = memory_alignment::value; // class template simd [simd.class] template > class simd; template using native_simd = simd>; template using fixed_size_simd = simd>; // class template simd_mask [simd.mask.class] template > class simd_mask; template using native_simd_mask = simd_mask>; template using fixed_size_simd_mask = simd_mask>; // casts [simd.casts] template see below simd_cast(const simd&); template see below static_simd_cast(const simd&); template fixed_size_simd> to_fixed_size(const simd&) noexcept; template fixed_size_simd_mask> to_fixed_size(const simd_mask&) noexcept; template native_simd to_native(const fixed_size_simd&) noexcept; template native_simd_mask to_native(const fixed_size_simd_mask> &) noexcept; template simd to_compatible(const fixed_size_simd&) noexcept; template simd_mask to_compatible(const fixed_size_simd_mask&) noexcept; template tuple>...> split(const simd&); template tuple>...> split(const simd_mask&); template array / V::size()> split( const simd&); template array / V::size()> split( const simd_mask&); template simd + ...)>> concat(const simd&...); template simd_mask + ...)>> concat(const simd_mask&...); // reductions [simd.mask.reductions] template bool all_of(const simd_mask&) noexcept; template bool any_of(const simd_mask&) noexcept; template bool none_of(const simd_mask&) noexcept; template bool some_of(const simd_mask&) noexcept; template int popcount(const simd_mask&) noexcept; template int find_first_set(const simd_mask&); template int find_last_set(const simd_mask&); bool all_of(see below) noexcept; bool any_of(see below) noexcept; bool none_of(see below) noexcept; bool some_of(see below) noexcept; int popcount(see below) noexcept; int find_first_set(see below) noexcept; int find_last_set(see below) noexcept; // masked assignment [simd.whereexpr] template class const_where_expression; template class where_expression; // masked assignment [simd.mask.where] template struct nodeduce { using type = T; }; // exposition only template using nodeduce_t = typename nodeduce::type; // exposition only template where_expression, simd> where(const typename simd::mask_type&, simd&) noexcept; template const_where_expression, const simd> where(const typename simd::mask_type&, const simd&) noexcept; template where_expression, simd_mask> where(const nodeduce_t>&, simd_mask&) noexcept; template const_where_expression, const simd_mask> where(const nodeduce_t>&, const simd_mask&) noexcept; template where_expression where(see below k, T& d) noexcept; template const_where_expression where(see below k, const T& d) noexcept; // reductions [simd.reductions] template > T reduce(const simd&, BinaryOperation = BinaryOperation()); template typename V::value_type reduce(const const_where_expression& x, typename V::value_type neutral_element, BinaryOperation binary_op); template typename V::value_type reduce(const const_where_expression& x, plus<> binary_op = plus<>()); template typename V::value_type reduce(const const_where_expression& x, multiplies<> binary_op); template typename V::value_type reduce(const const_where_expression& x, bit_and<> binary_op); template typename V::value_type reduce(const const_where_expression& x, bit_or<> binary_op); template typename V::value_type reduce(const const_where_expression& x, bit_xor<> binary_op); template T hmin(const simd&); template T hmin(const const_where_expression&); template T hmax(const simd&); template T hmax(const const_where_expression&); // algorithms [simd.alg] template simd min(const simd&, const simd&) noexcept; template simd max(const simd&, const simd&) noexcept; template std::pair, simd> minmax(const simd&, const simd&) noexcept; template simd clamp(const simd& v, const simd& lo, const simd& hi); // [simd.whereexpr] template class const_where_expression { const M& mask; // exposition only T& data; // exposition only public: const_where_expression(const const_where_expression&) = delete; const_where_expression& operator=(const const_where_expression&) = delete; remove_const_t operator-() const &&; template void copy_to(U* mem, Flags f) const &&; }; template class where_expression : public const_where_expression { public: where_expression(const where_expression&) = delete; where_expression& operator=(const where_expression&) = delete; template void operator=(U&& x); template void operator+=(U&& x); template void operator-=(U&& x); template void operator*=(U&& x); template void operator/=(U&& x); template void operator%=(U&& x); template void operator&=(U&& x); template void operator|=(U&& x); template void operator^=(U&& x); template void operator<<=(U&& x); template void operator>>=(U&& x); void operator++(); void operator++(int); void operator--(); void operator--(int); template void copy_from(const U* mem, Flags); }; // [simd.class] template class simd { public: using value_type = T; using reference = see below; using mask_type = simd_mask; using abi_type = Abi; static constexpr size_t size() noexcept; simd() = default; // implicit type conversion constructor template simd(const simd>&); // implicit broadcast constructor (see below for constraints) template simd(U&& value); // generator constructor (see below for constraints) template explicit simd(G&& gen); // load constructor template simd(const U* mem, Flags f); // loads [simd.load] template void copy_from(const U* mem, Flags f); // stores [simd.store] template void copy_to(U* mem, Flags f) const; // scalar access [simd.subscr] reference operator[](size_t); value_type operator[](size_t) const; // unary operators [simd.unary] simd& operator++(); simd operator++(int); simd& operator--(); simd operator--(int); mask_type operator!() const; simd operator~() const; // see below simd operator+() const; simd operator-() const; // binary operators [simd.binary] friend simd operator+ (const simd&, const simd&); friend simd operator- (const simd&, const simd&); friend simd operator* (const simd&, const simd&); friend simd operator/ (const simd&, const simd&); friend simd operator% (const simd&, const simd&); friend simd operator& (const simd&, const simd&); friend simd operator| (const simd&, const simd&); friend simd operator^ (const simd&, const simd&); friend simd operator<<(const simd&, const simd&); friend simd operator>>(const simd&, const simd&); friend simd operator<<(const simd&, int); friend simd operator>>(const simd&, int); // compound assignment [simd.cassign] friend simd& operator+= (simd&, const simd&); friend simd& operator-= (simd&, const simd&); friend simd& operator*= (simd&, const simd&); friend simd& operator/= (simd&, const simd&); friend simd& operator%= (simd&, const simd&); friend simd& operator&= (simd&, const simd&); friend simd& operator|= (simd&, const simd&); friend simd& operator^= (simd&, const simd&); friend simd& operator<<=(simd&, const simd&); friend simd& operator>>=(simd&, const simd&); friend simd& operator<<=(simd&, int); friend simd& operator>>=(simd&, int); // compares [simd.comparison] friend mask_type operator==(const simd&, const simd&); friend mask_type operator!=(const simd&, const simd&); friend mask_type operator>=(const simd&, const simd&); friend mask_type operator<=(const simd&, const simd&); friend mask_type operator> (const simd&, const simd&); friend mask_type operator< (const simd&, const simd&); }; // [simd.math] template using scharv = simd; // exposition only template using shortv = simd; // exposition only template using intv = simd; // exposition only template using longv = simd; // exposition only template using llongv = simd; // exposition only template using floatv = simd; // exposition only template using doublev = simd; // exposition only template using ldoublev = simd; // exposition only template using samesize = fixed_size_simd; // exposition only template floatv acos(floatv x); template doublev acos(doublev x); template ldoublev acos(ldoublev x); template floatv asin(floatv x); template doublev asin(doublev x); template ldoublev asin(ldoublev x); template floatv atan(floatv x); template doublev atan(doublev x); template ldoublev atan(ldoublev x); template floatv atan2(floatv y, floatv x); template doublev atan2(doublev y, doublev x); template ldoublev atan2(ldoublev y, ldoublev x); template floatv cos(floatv x); template doublev cos(doublev x); template ldoublev cos(ldoublev x); template floatv sin(floatv x); template doublev sin(doublev x); template ldoublev sin(ldoublev x); template floatv tan(floatv x); template doublev tan(doublev x); template ldoublev tan(ldoublev x); template floatv acosh(floatv x); template doublev acosh(doublev x); template ldoublev acosh(ldoublev x); template floatv asinh(floatv x); template doublev asinh(doublev x); template ldoublev asinh(ldoublev x); template floatv atanh(floatv x); template doublev atanh(doublev x); template ldoublev atanh(ldoublev x); template floatv cosh(floatv x); template doublev cosh(doublev x); template ldoublev cosh(ldoublev x); template floatv sinh(floatv x); template doublev sinh(doublev x); template ldoublev sinh(ldoublev x); template floatv tanh(floatv x); template doublev tanh(doublev x); template ldoublev tanh(ldoublev x); template floatv exp(floatv x); template doublev exp(doublev x); template ldoublev exp(ldoublev x); template floatv exp2(floatv x); template doublev exp2(doublev x); template ldoublev exp2(ldoublev x); template floatv expm1(floatv x); template doublev expm1(doublev x); template ldoublev expm1(ldoublev x); template floatv frexp(floatv value, samesize>* exp); template doublev frexp(doublev value, samesize>* exp); template ldoublev frexp(ldoublev value, samesize>* exp); template samesize> ilogb(floatv x); template samesize> ilogb(doublev x); template samesize> ilogb(ldoublev x); template floatv ldexp(floatv x, samesize> exp); template doublev ldexp(doublev x, samesize> exp); template ldoublev ldexp(ldoublev x, samesize> exp); template floatv log(floatv x); template doublev log(doublev x); template ldoublev log(ldoublev x); template floatv log10(floatv x); template doublev log10(doublev x); template ldoublev log10(ldoublev x); template floatv log1p(floatv x); template doublev log1p(doublev x); template ldoublev log1p(ldoublev x); template floatv log2(floatv x); template doublev log2(doublev x); template ldoublev log2(ldoublev x); template floatv logb(floatv x); template doublev logb(doublev x); template ldoublev logb(ldoublev x); template floatv modf(floatv value, floatv* iptr); template doublev modf(doublev value, doublev* iptr); template ldoublev modf(ldoublev value, ldoublev* iptr); template floatv scalbn(floatv x, samesize> n); template doublev scalbn(doublev x, samesize> n); template ldoublev scalbn(ldoublev x, samesize> n); template floatv scalbln(floatv x, samesize> n); template doublev scalbln(doublev x, samesize> n); template ldoublev scalbln(ldoublev x, samesize> n); template floatv cbrt(floatv x); template doublev cbrt(doublev x); template ldoublev cbrt(ldoublev x); template scharv abs(scharv j); template shortv abs(shortv j); template intv abs(intv j); template longv abs(longv j); template llongv abs(llongv j); template floatv abs(floatv j); template doublev abs(doublev j); template ldoublev abs(ldoublev j); template floatv hypot(floatv x, floatv y); template doublev hypot(doublev x, doublev y); template ldoublev hypot(doublev x, doublev y); template floatv hypot(floatv x, floatv y, floatv z); template doublev hypot(doublev x, doublev y, doublev z); template ldoublev hypot(ldoublev x, ldoublev y, ldoublev z); template floatv pow(floatv x, floatv y); template doublev pow(doublev x, doublev y); template ldoublev pow(ldoublev x, ldoublev y); template floatv sqrt(floatv x); template doublev sqrt(doublev x); template ldoublev sqrt(ldoublev x); template floatv erf(floatv x); template doublev erf(doublev x); template ldoublev erf(ldoublev x); template floatv erfc(floatv x); template doublev erfc(doublev x); template ldoublev erfc(ldoublev x); template floatv lgamma(floatv x); template doublev lgamma(doublev x); template ldoublev lgamma(ldoublev x); template floatv tgamma(floatv x); template doublev tgamma(doublev x); template ldoublev tgamma(ldoublev x); template floatv ceil(floatv x); template doublev ceil(doublev x); template ldoublev ceil(ldoublev x); template floatv floor(floatv x); template doublev floor(doublev x); template ldoublev floor(ldoublev x); template floatv nearbyint(floatv x); template doublev nearbyint(doublev x); template ldoublev nearbyint(ldoublev x); template floatv rint(floatv x); template doublev rint(doublev x); template ldoublev rint(ldoublev x); template samesize> lrint(floatv x); template samesize> lrint(doublev x); template samesize> lrint(ldoublev x); template samesize> llrint(floatv x); template samesize> llrint(doublev x); template samesize> llrint(ldoublev x); template floatv round(floatv x); template doublev round(doublev x); template ldoublev round(ldoublev x); template samesize> lround(floatv x); template samesize> lround(doublev x); template samesize> lround(ldoublev x); template samesize> llround(floatv x); template samesize> llround(doublev x); template samesize> llround(ldoublev x); template floatv trunc(floatv x); template doublev trunc(doublev x); template ldoublev trunc(ldoublev x); template floatv fmod(floatv x, floatv y); template doublev fmod(doublev x, doublev y); template ldoublev fmod(ldoublev x, ldoublev y); template floatv remainder(floatv x, floatv y); template doublev remainder(doublev x, doublev y); template ldoublev remainder(ldoublev x, ldoublev y); template floatv remquo(floatv x, floatv y, samesize>* quo); template doublev remquo(doublev x, doublev y, samesize>* quo); template ldoublev remquo(ldoublev x, ldoublev y, samesize>* quo); template floatv copysign(floatv x, floatv y); template doublev copysign(doublev x, doublev y); template ldoublev copysign(ldoublev x, ldoublev y); template doublev nan(const char* tagp); template floatv nanf(const char* tagp); template ldoublev nanl(const char* tagp); template floatv nextafter(floatv x, floatv y); template doublev nextafter(doublev x, doublev y); template ldoublev nextafter(ldoublev x, ldoublev y); template floatv nexttoward(floatv x, ldoublev y); template doublev nexttoward(doublev x, ldoublev y); template ldoublev nexttoward(ldoublev x, ldoublev y); template floatv fdim(floatv x, floatv y); template doublev fdim(doublev x, doublev y); template ldoublev fdim(ldoublev x, ldoublev y); template floatv fmax(floatv x, floatv y); template doublev fmax(doublev x, doublev y); template ldoublev fmax(ldoublev x, ldoublev y); template floatv fmin(floatv x, floatv y); template doublev fmin(doublev x, doublev y); template ldoublev fmin(ldoublev x, ldoublev y); template floatv fma(floatv x, floatv y, floatv z); template doublev fma(doublev x, doublev y, doublev z); template ldoublev fma(ldoublev x, ldoublev y, ldoublev z); template samesize> fpclassify(floatv x); template samesize> fpclassify(doublev x); template samesize> fpclassify(ldoublev x); template simd_mask isfinite(floatv x); template simd_mask isfinite(doublev x); template simd_mask isfinite(ldoublev x); template simd_mask isinf(floatv x); template simd_mask isinf(doublev x); template simd_mask isinf(ldoublev x); template simd_mask isnan(floatv x); template simd_mask isnan(doublev x); template simd_mask isnan(ldoublev x); template simd_mask isnormal(floatv x); template simd_mask isnormal(doublev x); template simd_mask isnormal(ldoublev x); template simd_mask signbit(floatv x); template simd_mask signbit(doublev x); template simd_mask signbit(ldoublev x); template simd_mask isgreater(floatv x, floatv y); template simd_mask isgreater(doublev x, doublev y); template simd_mask isgreater(ldoublev x, ldoublev y); template simd_mask isgreaterequal(floatv x, floatv y); template simd_mask isgreaterequal(doublev x, doublev y); template simd_mask isgreaterequal(ldoublev x, ldoublev y); template simd_mask isless(floatv x, floatv y); template simd_mask isless(doublev x, doublev y); template simd_mask isless(ldoublev x, ldoublev y); template simd_mask islessequal(floatv x, floatv y); template simd_mask islessequal(doublev x, doublev y); template simd_mask islessequal(ldoublev x, ldoublev y); template simd_mask islessgreater(floatv x, floatv y); template simd_mask islessgreater(doublev x, doublev y); template simd_mask islessgreater(ldoublev x, ldoublev y); template simd_mask isunordered(floatv x, floatv y); template simd_mask isunordered(doublev x, doublev y); template simd_mask isunordered(ldoublev x, ldoublev y); template struct simd_div_t { V quot, rem; }; template simd_div_t> div(scharv numer, scharv denom); template simd_div_t> div(shortv numer, shortv denom); template simd_div_t> div(intv numer, intv denom); template simd_div_t> div(longv numer, longv denom); template simd_div_t> div(llongv numer, llongv denom); // [simd.mask.class] template class simd_mask { public: using value_type = bool; using reference = see below; using simd_type = simd; using abi_type = Abi; static constexpr size_t size() noexcept; simd_mask() = default; // broadcast constructor explicit simd_mask(value_type) noexcept; // implicit type conversion constructor template simd_mask(const simd_mask>&) noexcept; // load constructor template simd_mask(const value_type* mem, Flags); // loads [simd.mask.copy] template void copy_from(const value_type* mem, Flags); template void copy_to(value_type* mem, Flags) const; // scalar access [simd.mask.subscr] reference operator[](size_t); value_type operator[](size_t) const; // unary operators [simd.mask.unary] simd_mask operator!() const noexcept; // simd_mask binary operators [simd.mask.binary] friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator& (const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator| (const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator^ (const simd_mask&, const simd_mask&) noexcept; // simd_mask compound assignment [simd.mask.cassign] friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept; friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept; friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept; // simd_mask compares [simd.mask.comparison] friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; }; } // parallelism_v2 } // std::experimental */ #include <__assert> // all public C++ headers provide the assertion handler #include <__functional/operations.h> #include #include #include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif _LIBCPP_PUSH_MACROS #include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD #if _LIBCPP_STD_VER >= 17 enum class _StorageKind { _Scalar, _Array, _VecExt, }; template <_StorageKind __kind, int _Np> struct __simd_abi {}; template class __simd_storage {}; template class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> { std::array<_Tp, __num_element> __storage_; template friend struct simd; template friend struct simd_mask; public: _LIBCPP_HIDE_FROM_ABI _Tp __get(size_t __index) const noexcept { return __storage_[__index]; } _LIBCPP_HIDE_FROM_ABI void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; } }; template class __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>> { _Tp __storage_; template friend struct simd; template friend struct simd_mask; public: _LIBCPP_HIDE_FROM_ABI _Tp __get(size_t __index) const noexcept { return (&__storage_)[__index]; } _LIBCPP_HIDE_FROM_ABI void __set(size_t __index, _Tp __val) noexcept { (&__storage_)[__index] = __val; } }; #ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION _LIBCPP_HIDE_FROM_ABI constexpr size_t __floor_pow_of_2(size_t __val) { return ((__val - 1) & __val) == 0 ? __val : __floor_pow_of_2((__val - 1) & __val); } _LIBCPP_HIDE_FROM_ABI constexpr size_t __ceil_pow_of_2(size_t __val) { return __val == 1 ? 1 : __floor_pow_of_2(__val - 1) << 1; } template struct __vec_ext_traits { #if !defined(_LIBCPP_COMPILER_CLANG_BASED) typedef _Tp type __attribute__((vector_size(__ceil_pow_of_2(__bytes)))); #endif }; #if defined(_LIBCPP_COMPILER_CLANG_BASED) #define _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, _NUM_ELEMENT) \ template <> \ struct __vec_ext_traits<_TYPE, sizeof(_TYPE) * _NUM_ELEMENT> { \ using type = \ _TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT))); \ } #define _LIBCPP_SPECIALIZE_VEC_EXT_32(_TYPE) \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 1); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 2); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 3); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 4); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 5); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 6); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 7); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 8); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 9); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 10); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 11); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 12); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 13); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 14); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 15); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 16); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 17); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 18); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 19); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 20); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 21); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 22); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 23); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 24); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 25); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 26); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 27); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 28); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 29); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 30); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 31); \ _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 32) _LIBCPP_SPECIALIZE_VEC_EXT_32(char); _LIBCPP_SPECIALIZE_VEC_EXT_32(char16_t); _LIBCPP_SPECIALIZE_VEC_EXT_32(char32_t); _LIBCPP_SPECIALIZE_VEC_EXT_32(wchar_t); _LIBCPP_SPECIALIZE_VEC_EXT_32(signed char); _LIBCPP_SPECIALIZE_VEC_EXT_32(signed short); _LIBCPP_SPECIALIZE_VEC_EXT_32(signed int); _LIBCPP_SPECIALIZE_VEC_EXT_32(signed long); _LIBCPP_SPECIALIZE_VEC_EXT_32(signed long long); _LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned char); _LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned short); _LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned int); _LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long); _LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long long); _LIBCPP_SPECIALIZE_VEC_EXT_32(float); _LIBCPP_SPECIALIZE_VEC_EXT_32(double); _LIBCPP_SPECIALIZE_VEC_EXT_32(long double); #undef _LIBCPP_SPECIALIZE_VEC_EXT_32 #undef _LIBCPP_SPECIALIZE_VEC_EXT #endif template class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> { using _StorageType = typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type; _StorageType __storage_; template friend struct simd; template friend struct simd_mask; public: _LIBCPP_HIDE_FROM_ABI _Tp __get(size_t __index) const noexcept { return __storage_[__index]; } _LIBCPP_HIDE_FROM_ABI void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; } }; #endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION template class __simd_reference { static_assert(std::is_same<_Vp, _Tp>::value, ""); template friend struct simd; template friend struct simd_mask; __simd_storage<_Tp, _Abi>* __ptr_; size_t __index_; _LIBCPP_HIDE_FROM_ABI __simd_reference(__simd_storage<_Tp, _Abi>* __ptr, size_t __index) : __ptr_(__ptr), __index_(__index) {} _LIBCPP_HIDE_FROM_ABI __simd_reference(const __simd_reference&) = default; public: __simd_reference() = delete; __simd_reference& operator=(const __simd_reference&) = delete; _LIBCPP_HIDE_FROM_ABI operator _Vp() const { return __ptr_->__get(__index_); } _LIBCPP_HIDE_FROM_ABI __simd_reference operator=(_Vp __value) && { __ptr_->__set(__index_, __value); return *this; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator++() && { return std::move(*this) = __ptr_->__get(__index_) + 1; } _LIBCPP_HIDE_FROM_ABI _Vp operator++(int) && { auto __val = __ptr_->__get(__index_); __ptr_->__set(__index_, __val + 1); return __val; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator--() && { return std::move(*this) = __ptr_->__get(__index_) - 1; } _LIBCPP_HIDE_FROM_ABI _Vp operator--(int) && { auto __val = __ptr_->__get(__index_); __ptr_->__set(__index_, __val - 1); return __val; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator+=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) + __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator-=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) - __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator*=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) * __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator/=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) / __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator%=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) % __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator>>=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) >> __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator<<=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) << __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator&=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) & __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator|=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) | __value; } _LIBCPP_HIDE_FROM_ABI __simd_reference operator^=(_Vp __value) && { return std::move(*this) = __ptr_->__get(__index_) ^ __value; } }; template _LIBCPP_HIDE_FROM_ABI constexpr decltype(_To{std::declval<_From>()}, true) __is_non_narrowing_convertible_impl(_From) { return true; } template _LIBCPP_HIDE_FROM_ABI constexpr bool __is_non_narrowing_convertible_impl(...) { return false; } template _LIBCPP_HIDE_FROM_ABI constexpr typename std::enable_if::value && std::is_arithmetic<_From>::value, bool>::type __is_non_narrowing_arithmetic_convertible() { return experimental::__is_non_narrowing_convertible_impl<_To>(_From{}); } template _LIBCPP_HIDE_FROM_ABI constexpr typename std::enable_if::value && std::is_arithmetic<_From>::value), bool>::type __is_non_narrowing_arithmetic_convertible() { return false; } template _LIBCPP_HIDE_FROM_ABI constexpr _Tp __variadic_sum() { return _Tp{}; } template _LIBCPP_HIDE_FROM_ABI constexpr _Tp __variadic_sum(_Up __first, _Args... __rest) { return static_cast<_Tp>(__first) + experimental::__variadic_sum<_Tp>(__rest...); } template struct __nodeduce { using type = _Tp; }; template _LIBCPP_HIDE_FROM_ABI constexpr bool __vectorizable() { return std::is_arithmetic<_Tp>::value && !std::is_const<_Tp>::value && !std::is_volatile<_Tp>::value && !std::is_same<_Tp, bool>::value; } _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI using scalar = __simd_abi<_StorageKind::_Scalar, 1>; template using fixed_size = __simd_abi<_StorageKind::_Array, _Np>; template inline constexpr size_t max_fixed_size = 32; template using compatible = fixed_size<16 / sizeof(_Tp)>; #ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION template using native = __simd_abi<_StorageKind::_VecExt, _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>; #else template using native = fixed_size<_Tp, _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>; #endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD template > class simd; template > class simd_mask; struct element_aligned_tag {}; struct vector_aligned_tag {}; template struct overaligned_tag {}; inline constexpr element_aligned_tag element_aligned{}; inline constexpr vector_aligned_tag vector_aligned{}; template inline constexpr overaligned_tag<_Np> overaligned{}; // traits [simd.traits] template struct is_abi_tag : std::integral_constant {}; template <_StorageKind __kind, int _Np> struct is_abi_tag<__simd_abi<__kind, _Np>> : std::integral_constant {}; template struct is_simd : std::integral_constant {}; template struct is_simd> : std::integral_constant {}; template struct is_simd_mask : std::integral_constant {}; template struct is_simd_mask> : std::integral_constant { }; template struct is_simd_flag_type : std::integral_constant {}; template <> struct is_simd_flag_type : std::integral_constant {}; template <> struct is_simd_flag_type : std::integral_constant {}; template struct is_simd_flag_type> : std::integral_constant {}; template inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value; template inline constexpr bool is_simd_v = is_simd<_Tp>::value; template inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value; template inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value; template struct abi_for_size { using type = simd_abi::fixed_size<_Np>; }; template using abi_for_size_t = typename abi_for_size<_Tp, _Np>::type; template > struct simd_size; template struct simd_size<_Tp, __simd_abi<__kind, _Np>> : std::integral_constant { static_assert( std::is_arithmetic<_Tp>::value && !std::is_same<__remove_const_t<_Tp>, bool>::value, "Element type should be vectorizable"); }; // TODO: implement it. template struct memory_alignment; template > inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; template inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value; // class template simd [simd.class] template using native_simd = simd<_Tp, simd_abi::native<_Tp>>; template using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>; // class template simd_mask [simd.mask.class] template using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>; template using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>; // casts [simd.casts] template struct __static_simd_cast_traits { template static simd<_Tp, _Abi> __apply(const simd<_Up, _Abi>& __v); }; template struct __static_simd_cast_traits> { template static typename std::enable_if::size() == simd<_Tp, _NewAbi>::size(), simd<_Tp, _NewAbi>>::type __apply(const simd<_Up, _Abi>& __v); }; template struct __simd_cast_traits { template static typename std::enable_if< __is_non_narrowing_arithmetic_convertible<_Up, _Tp>(), simd<_Tp, _Abi>>::type __apply(const simd<_Up, _Abi>& __v); }; template struct __simd_cast_traits> { template static typename std::enable_if< __is_non_narrowing_arithmetic_convertible<_Up, _Tp>() && simd<_Up, _Abi>::size() == simd<_Tp, _NewAbi>::size(), simd<_Tp, _NewAbi>>::type __apply(const simd<_Up, _Abi>& __v); }; template _LIBCPP_HIDE_FROM_ABI auto simd_cast(const simd<_Up, _Abi>& __v) -> decltype(__simd_cast_traits<_Tp>::__apply(__v)) { return __simd_cast_traits<_Tp>::__apply(__v); } template _LIBCPP_HIDE_FROM_ABI auto static_simd_cast(const simd<_Up, _Abi>& __v) -> decltype(__static_simd_cast_traits<_Tp>::__apply(__v)) { return __static_simd_cast_traits<_Tp>::__apply(__v); } template fixed_size_simd<_Tp, simd_size<_Tp, _Abi>::value> to_fixed_size(const simd<_Tp, _Abi>&) noexcept; template fixed_size_simd_mask<_Tp, simd_size<_Tp, _Abi>::value> to_fixed_size(const simd_mask<_Tp, _Abi>&) noexcept; template native_simd<_Tp> to_native(const fixed_size_simd<_Tp, _Np>&) noexcept; template native_simd_mask<_Tp> to_native(const fixed_size_simd_mask<_Tp, _Np>&) noexcept; template simd<_Tp> to_compatible(const fixed_size_simd<_Tp, _Np>&) noexcept; template simd_mask<_Tp> to_compatible(const fixed_size_simd_mask<_Tp, _Np>&) noexcept; template tuple>...> split(const simd<_Tp, _Abi>&); template tuple>...> split(const simd_mask<_Tp, _Abi>&); template array<_SimdType, simd_size::value / _SimdType::size()> split(const simd&); template array<_SimdType, simd_size::value / _SimdType::size()> split(const simd_mask&); template simd<_Tp, abi_for_size_t<_Tp, experimental::__variadic_sum(simd_size<_Tp, _Abis>::value...)>> concat(const simd<_Tp, _Abis>&...); template simd_mask<_Tp, abi_for_size_t<_Tp, experimental::__variadic_sum(simd_size<_Tp, _Abis>::value...)>> concat(const simd_mask<_Tp, _Abis>&...); // reductions [simd.mask.reductions] template bool all_of(const simd_mask<_Tp, _Abi>&) noexcept; template bool any_of(const simd_mask<_Tp, _Abi>&) noexcept; template bool none_of(const simd_mask<_Tp, _Abi>&) noexcept; template bool some_of(const simd_mask<_Tp, _Abi>&) noexcept; template int popcount(const simd_mask<_Tp, _Abi>&) noexcept; template int find_first_set(const simd_mask<_Tp, _Abi>&); template int find_last_set(const simd_mask<_Tp, _Abi>&); bool all_of(bool) noexcept; bool any_of(bool) noexcept; bool none_of(bool) noexcept; bool some_of(bool) noexcept; int popcount(bool) noexcept; int find_first_set(bool) noexcept; int find_last_set(bool) noexcept; // masked assignment [simd.whereexpr] template class const_where_expression; template class where_expression; // masked assignment [simd.mask.where] template where_expression, simd<_Tp, _Abi>> where(const typename simd<_Tp, _Abi>::mask_type&, simd<_Tp, _Abi>&) noexcept; template const_where_expression, const simd<_Tp, _Abi>> where(const typename simd<_Tp, _Abi>::mask_type&, const simd<_Tp, _Abi>&) noexcept; template where_expression, simd_mask<_Tp, _Abi>> where(const typename __nodeduce>::type&, simd_mask<_Tp, _Abi>&) noexcept; template const_where_expression, const simd_mask<_Tp, _Abi>> where(const typename __nodeduce>::type&, const simd_mask<_Tp, _Abi>&) noexcept; template where_expression where(bool, _Tp&) noexcept; template const_where_expression where(bool, const _Tp&) noexcept; // reductions [simd.reductions] template > _Tp reduce(const simd<_Tp, _Abi>&, _BinaryOp = _BinaryOp()); template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, typename _SimdType::value_type __neutral_element, _BinaryOp); template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, plus = {}); template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, multiplies); template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, bit_and); template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, bit_or); template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>&, bit_xor); template _Tp hmin(const simd<_Tp, _Abi>&); template typename _SimdType::value_type hmin(const const_where_expression<_MaskType, _SimdType>&); template _Tp hmax(const simd<_Tp, _Abi>&); template typename _SimdType::value_type hmax(const const_where_expression<_MaskType, _SimdType>&); // algorithms [simd.alg] template simd<_Tp, _Abi> min(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept; template simd<_Tp, _Abi> max(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept; template std::pair, simd<_Tp, _Abi>> minmax(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept; template simd<_Tp, _Abi> clamp(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&); // [simd.whereexpr] // TODO implement where expressions. template class const_where_expression { public: const_where_expression(const const_where_expression&) = delete; const_where_expression& operator=(const const_where_expression&) = delete; __remove_const_t<_Tp> operator-() const&&; template void copy_to(_Up*, _Flags) const&&; }; template class where_expression : public const_where_expression<_MaskType, _Tp> { public: where_expression(const where_expression&) = delete; where_expression& operator=(const where_expression&) = delete; template void operator=(_Up&&); template void operator+=(_Up&&); template void operator-=(_Up&&); template void operator*=(_Up&&); template void operator/=(_Up&&); template void operator%=(_Up&&); template void operator&=(_Up&&); template void operator|=(_Up&&); template void operator^=(_Up&&); template void operator<<=(_Up&&); template void operator>>=(_Up&&); void operator++(); void operator++(int); void operator--(); void operator--(int); template void copy_from(const _Up*, _Flags); }; // [simd.class] // TODO: implement simd template class simd { public: using value_type = _Tp; using reference = __simd_reference<_Tp, _Tp, _Abi>; using mask_type = simd_mask<_Tp, _Abi>; using abi_type = _Abi; _LIBCPP_HIDE_FROM_ABI simd() = default; _LIBCPP_HIDE_FROM_ABI simd(const simd&) = default; _LIBCPP_HIDE_FROM_ABI simd& operator=(const simd&) = default; static _LIBCPP_HIDE_FROM_ABI constexpr size_t size() noexcept { return simd_size<_Tp, _Abi>::value; } private: __simd_storage<_Tp, _Abi> __s_; template static _LIBCPP_HIDE_FROM_ABI constexpr bool __can_broadcast() { return (std::is_arithmetic<_Up>::value && __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()) || (!std::is_arithmetic<_Up>::value && std::is_convertible<_Up, _Tp>::value) || std::is_same<__remove_const_t<_Up>, int>::value || (std::is_same<__remove_const_t<_Up>, unsigned int>::value && std::is_unsigned<_Tp>::value); } template static _LIBCPP_HIDE_FROM_ABI constexpr decltype( std::forward_as_tuple(std::declval<_Generator>()( std::integral_constant())...), bool()) __can_generate(std::index_sequence<__indicies...>) { return !experimental::__variadic_sum( !__can_broadcast()( std::integral_constant()))>()...); } template static _LIBCPP_HIDE_FROM_ABI bool __can_generate(...) { return false; } template _LIBCPP_HIDE_FROM_ABI void __generator_init(_Generator&& __g, std::index_sequence<__indicies...>) { int __not_used[]{((*this)[__indicies] = __g(std::integral_constant()), 0)...}; (void)__not_used; } public: // implicit type conversion constructor template >::value && __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()>::type> _LIBCPP_HIDE_FROM_ABI simd(const simd<_Up, simd_abi::fixed_size>& __v) { for (size_t __i = 0; __i < size(); __i++) { (*this)[__i] = static_cast<_Tp>(__v[__i]); } } // implicit broadcast constructor template ()>::type> _LIBCPP_HIDE_FROM_ABI simd(_Up&& __rv) { auto __v = static_cast<_Tp>(__rv); for (size_t __i = 0; __i < size(); __i++) { (*this)[__i] = __v; } } // generator constructor template (std::make_index_sequence()), int>::type()> explicit _LIBCPP_HIDE_FROM_ABI simd(_Generator&& __g) { __generator_init(std::forward<_Generator>(__g), std::make_index_sequence()); } // load constructor template < class _Up, class _Flags, class = typename std::enable_if<__vectorizable<_Up>()>::type, class = typename std::enable_if::value>::type> _LIBCPP_HIDE_FROM_ABI simd(const _Up* __buffer, _Flags) { // TODO: optimize for overaligned flags for (size_t __i = 0; __i < size(); __i++) { (*this)[__i] = static_cast<_Tp>(__buffer[__i]); } } // loads [simd.load] template typename std::enable_if<__vectorizable<_Up>() && is_simd_flag_type<_Flags>::value>::type _LIBCPP_HIDE_FROM_ABI copy_from(const _Up* __buffer, _Flags) { *this = simd(__buffer, _Flags()); } // stores [simd.store] template typename std::enable_if<__vectorizable<_Up>() && is_simd_flag_type<_Flags>::value>::type _LIBCPP_HIDE_FROM_ABI copy_to(_Up* __buffer, _Flags) const { // TODO: optimize for overaligned flags for (size_t __i = 0; __i < size(); __i++) { __buffer[__i] = static_cast<_Up>((*this)[__i]); } } // scalar access [simd.subscr] _LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) { return reference(&__s_, __i); } _LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const { return __s_.__get(__i); } // unary operators [simd.unary] simd& operator++(); simd operator++(int); simd& operator--(); simd operator--(int); mask_type operator!() const; simd operator~() const; simd operator+() const; simd operator-() const; #if 0 // binary operators [simd.binary] friend simd operator+(const simd&, const simd&); friend simd operator-(const simd&, const simd&); friend simd operator*(const simd&, const simd&); friend simd operator/(const simd&, const simd&); friend simd operator%(const simd&, const simd&); friend simd operator&(const simd&, const simd&); friend simd operator|(const simd&, const simd&); friend simd operator^(const simd&, const simd&); friend simd operator<<(const simd&, const simd&); friend simd operator>>(const simd&, const simd&); friend simd operator<<(const simd&, int); friend simd operator>>(const simd&, int); // compound assignment [simd.cassign] friend simd& operator+=(simd&, const simd&); friend simd& operator-=(simd&, const simd&); friend simd& operator*=(simd&, const simd&); friend simd& operator/=(simd&, const simd&); friend simd& operator%=(simd&, const simd&); friend simd& operator&=(simd&, const simd&); friend simd& operator|=(simd&, const simd&); friend simd& operator^=(simd&, const simd&); friend simd& operator<<=(simd&, const simd&); friend simd& operator>>=(simd&, const simd&); friend simd& operator<<=(simd&, int); friend simd& operator>>=(simd&, int); // compares [simd.comparison] friend mask_type operator==(const simd&, const simd&); friend mask_type operator!=(const simd&, const simd&); friend mask_type operator>=(const simd&, const simd&); friend mask_type operator<=(const simd&, const simd&); friend mask_type operator>(const simd&, const simd&); friend mask_type operator<(const simd&, const simd&); #endif }; // [simd.mask.class] template // TODO: implement simd_mask class simd_mask { public: using value_type = bool; // TODO: this is strawman implementation. Turn it into a proxy type. using reference = bool&; using simd_type = simd<_Tp, _Abi>; using abi_type = _Abi; static constexpr size_t size() noexcept; _LIBCPP_HIDE_FROM_ABI simd_mask() = default; // broadcast constructor explicit simd_mask(value_type) noexcept; // implicit type conversion constructor template simd_mask(const simd_mask<_Up, simd_abi::fixed_size>&) noexcept; // load constructor template simd_mask(const value_type*, _Flags); // loads [simd.mask.copy] template void copy_from(const value_type*, _Flags); template void copy_to(value_type*, _Flags) const; // scalar access [simd.mask.subscr] reference operator[](size_t); value_type operator[](size_t) const; // unary operators [simd.mask.unary] simd_mask operator!() const noexcept; #if 0 // simd_mask binary operators [simd.mask.binary] friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator&(const simd_mask&, const simd_mask&)noexcept; friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept; // simd_mask compound assignment [simd.mask.cassign] friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept; friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept; friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept; // simd_mask compares [simd.mask.comparison] friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; #endif }; #endif // _LIBCPP_STD_VER >= 17 _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include #endif #endif /* _LIBCPP_EXPERIMENTAL_SIMD */