fmaintrin.h - OpenGrok cross reference for /freebsd/contrib/llvm-project/clang/lib/Headers/fmaintrin.h

Lines Matching +full:2 +full:- +full:bit
1 /*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7  *===-----------------------------------------------------------------------===
21 /// Computes a multiply-add of 128-bit vectors of [4 x float].
29 ///    A 128-bit vector of [4 x float] containing the multiplicand.
31 ///    A 128-bit vector of [4 x float] containing the multiplier.
33 ///    A 128-bit vector of [4 x float] containing the addend.
34 /// \returns A 128-bit vector of [4 x float] containing the result.
41 /// Computes a multiply-add of 128-bit vectors of [2 x double].
49 ///    A 128-bit vector of [2 x double] containing the multiplicand.
51 ///    A 128-bit vector of [2 x double] containing the multiplier.
53 ///    A 128-bit vector of [2 x double] containing the addend.
54 /// \returns A 128-bit [2 x double] vector containing the result.
61 /// Computes a scalar multiply-add of the single-precision values in the
62 ///    low 32 bits of 128-bit vectors of [4 x float].
74 ///    A 128-bit vector of [4 x float] containing the multiplicand in the low
77 ///    A 128-bit vector of [4 x float] containing the multiplier in the low
80 ///    A 128-bit vector of [4 x float] containing the addend in the low
82 /// \returns A 128-bit vector of [4 x float] containing the result in the low
90 /// Computes a scalar multiply-add of the double-precision values in the
91 ///    low 64 bits of 128-bit vectors of [2 x double].
103 ///    A 128-bit vector of [2 x double] containing the multiplicand in the low
106 ///    A 128-bit vector of [2 x double] containing the multiplier in the low
109 ///    A 128-bit vector of [2 x double] containing the addend in the low
111 /// \returns A 128-bit vector of [2 x double] containing the result in the low
119 /// Computes a multiply-subtract of 128-bit vectors of [4 x float].
120 ///    For each element, computes <c> (__A * __B) - __C </c>.
127 ///    A 128-bit vector of [4 x float] containing the multiplicand.
129 ///    A 128-bit vector of [4 x float] containing the multiplier.
131 ///    A 128-bit vector of [4 x float] containing the subtrahend.
132 /// \returns A 128-bit vector of [4 x float] containing the result.
136   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);  in _mm_fmsub_ps()
139 /// Computes a multiply-subtract of 128-bit vectors of [2 x double].
140 ///    For each element, computes <c> (__A * __B) - __C </c>.
147 ///    A 128-bit vector of [2 x double] containing the multiplicand.
149 ///    A 128-bit vector of [2 x double] containing the multiplier.
151 ///    A 128-bit vector of [2 x double] containing the addend.
152 /// \returns A 128-bit vector of [2 x double] containing the result.
156   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);  in _mm_fmsub_pd()
159 /// Computes a scalar multiply-subtract of the single-precision values in
160 ///    the low 32 bits of 128-bit vectors of [4 x float].
163 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
172 ///    A 128-bit vector of [4 x float] containing the multiplicand in the low
175 ///    A 128-bit vector of [4 x float] containing the multiplier in the low
178 ///    A 128-bit vector of [4 x float] containing the subtrahend in the low
180 /// \returns A 128-bit vector of [4 x float] containing the result in the low
185   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);  in _mm_fmsub_ss()
188 /// Computes a scalar multiply-subtract of the double-precision values in
189 ///    the low 64 bits of 128-bit vectors of [2 x double].
192 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
201 ///    A 128-bit vector of [2 x double] containing the multiplicand in the low
204 ///    A 128-bit vector of [2 x double] containing the multiplier in the low
207 ///    A 128-bit vector of [2 x double] containing the subtrahend in the low
209 /// \returns A 128-bit vector of [2 x double] containing the result in the low
214   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);  in _mm_fmsub_sd()
217 /// Computes a negated multiply-add of 128-bit vectors of [4 x float].
218 ///    For each element, computes <c> -(__A * __B) + __C </c>.
225 ///    A 128-bit vector of [4 x float] containing the multiplicand.
227 ///    A 128-bit vector of [4 x float] containing the multiplier.
229 ///    A 128-bit vector of [4 x float] containing the addend.
230 /// \returns A 128-bit [4 x float] vector containing the result.
234   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);  in _mm_fnmadd_ps()
237 /// Computes a negated multiply-add of 128-bit vectors of [2 x double].
238 ///    For each element, computes <c> -(__A * __B) + __C </c>.
245 ///    A 128-bit vector of [2 x double] containing the multiplicand.
247 ///    A 128-bit vector of [2 x double] containing the multiplier.
249 ///    A 128-bit vector of [2 x double] containing the addend.
250 /// \returns A 128-bit vector of [2 x double] containing the result.
254   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);  in _mm_fnmadd_pd()
257 /// Computes a scalar negated multiply-add of the single-precision values in
258 ///    the low 32 bits of 128-bit vectors of [4 x float].
261 /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
270 ///    A 128-bit vector of [4 x float] containing the multiplicand in the low
273 ///    A 128-bit vector of [4 x float] containing the multiplier in the low
276 ///    A 128-bit vector of [4 x float] containing the addend in the low
278 /// \returns A 128-bit vector of [4 x float] containing the result in the low
283   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);  in _mm_fnmadd_ss()
286 /// Computes a scalar negated multiply-add of the double-precision values
287 ///    in the low 64 bits of 128-bit vectors of [2 x double].
290 /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
299 ///    A 128-bit vector of [2 x double] containing the multiplicand in the low
302 ///    A 128-bit vector of [2 x double] containing the multiplier in the low
305 ///    A 128-bit vector of [2 x double] containing the addend in the low
307 /// \returns A 128-bit vector of [2 x double] containing the result in the low
312   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);  in _mm_fnmadd_sd()
315 /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
316 ///    For each element, computes <c> -(__A * __B) - __C </c>.
323 ///    A 128-bit vector of [4 x float] containing the multiplicand.
325 ///    A 128-bit vector of [4 x float] containing the multiplier.
327 ///    A 128-bit vector of [4 x float] containing the subtrahend.
328 /// \returns A 128-bit vector of [4 x float] containing the result.
332   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);  in _mm_fnmsub_ps()
335 /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
336 ///    For each element, computes <c> -(__A * __B) - __C </c>.
343 ///    A 128-bit vector of [2 x double] containing the multiplicand.
345 ///    A 128-bit vector of [2 x double] containing the multiplier.
347 ///    A 128-bit vector of [2 x double] containing the subtrahend.
348 /// \returns A 128-bit vector of [2 x double] containing the result.
352   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);  in _mm_fnmsub_pd()
355 /// Computes a scalar negated multiply-subtract of the single-precision
356 ///    values in the low 32 bits of 128-bit vectors of [4 x float].
359 /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
368 ///    A 128-bit vector of [4 x float] containing the multiplicand in the low
371 ///    A 128-bit vector of [4 x float] containing the multiplier in the low
374 ///    A 128-bit vector of [4 x float] containing the subtrahend in the low
376 /// \returns A 128-bit vector of [4 x float] containing the result in the low
381   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);  in _mm_fnmsub_ss()
384 /// Computes a scalar negated multiply-subtract of the double-precision
385 ///    values in the low 64 bits of 128-bit vectors of [2 x double].
388 /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
397 ///    A 128-bit vector of [2 x double] containing the multiplicand in the low
400 ///    A 128-bit vector of [2 x double] containing the multiplier in the low
403 ///    A 128-bit vector of [2 x double] containing the subtrahend in the low
405 /// \returns A 128-bit vector of [2 x double] containing the result in the low
410   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);  in _mm_fnmsub_sd()
413 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
417 /// result[31:0]  = (__A[31:0] * __B[31:0]) - __C[31:0]
419 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
428 ///    A 128-bit vector of [4 x float] containing the multiplicand.
430 ///    A 128-bit vector of [4 x float] containing the multiplier.
432 ///    A 128-bit vector of [4 x float] containing the addend/subtrahend.
433 /// \returns A 128-bit vector of [4 x float] containing the result.
440 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
441 ///    [2 x double].
444 /// result[63:0]  = (__A[63:0] * __B[63:0]) - __C[63:0]
453 ///    A 128-bit vector of [2 x double] containing the multiplicand.
455 ///    A 128-bit vector of [2 x double] containing the multiplier.
457 ///    A 128-bit vector of [2 x double] containing the addend/subtrahend.
458 /// \returns A 128-bit vector of [2 x double] containing the result.
465 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
470 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
472 /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96]
480 ///    A 128-bit vector of [4 x float] containing the multiplicand.
482 ///    A 128-bit vector of [4 x float] containing the multiplier.
484 ///    A 128-bit vector of [4 x float] containing the addend/subtrahend.
485 /// \returns A 128-bit vector of [4 x float] containing the result.
489   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);  in _mm_fmsubadd_ps()
492 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
493 ///    [2 x double].
497 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
505 ///    A 128-bit vector of [2 x double] containing the multiplicand.
507 ///    A 128-bit vector of [2 x double] containing the multiplier.
509 ///    A 128-bit vector of [2 x double] containing the addend/subtrahend.
510 /// \returns A 128-bit vector of [2 x double] containing the result.
514   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);  in _mm_fmsubadd_pd()
517 /// Computes a multiply-add of 256-bit vectors of [8 x float].
525 ///    A 256-bit vector of [8 x float] containing the multiplicand.
527 ///    A 256-bit vector of [8 x float] containing the multiplier.
529 ///    A 256-bit vector of [8 x float] containing the addend.
530 /// \returns A 256-bit vector of [8 x float] containing the result.
537 /// Computes a multiply-add of 256-bit vectors of [4 x double].
545 ///    A 256-bit vector of [4 x double] containing the multiplicand.
547 ///    A 256-bit vector of [4 x double] containing the multiplier.
549 ///    A 256-bit vector of [4 x double] containing the addend.
550 /// \returns A 256-bit vector of [4 x double] containing the result.
557 /// Computes a multiply-subtract of 256-bit vectors of [8 x float].
558 ///    For each element, computes <c> (__A * __B) - __C </c>.
565 ///    A 256-bit vector of [8 x float] containing the multiplicand.
567 ///    A 256-bit vector of [8 x float] containing the multiplier.
569 ///    A 256-bit vector of [8 x float] containing the subtrahend.
570 /// \returns A 256-bit vector of [8 x float] containing the result.
574   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);  in _mm256_fmsub_ps()
577 /// Computes a multiply-subtract of 256-bit vectors of [4 x double].
578 ///    For each element, computes <c> (__A * __B) - __C </c>.
585 ///    A 256-bit vector of [4 x double] containing the multiplicand.
587 ///    A 256-bit vector of [4 x double] containing the multiplier.
589 ///    A 256-bit vector of [4 x double] containing the subtrahend.
590 /// \returns A 256-bit vector of [4 x double] containing the result.
594   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);  in _mm256_fmsub_pd()
597 /// Computes a negated multiply-add of 256-bit vectors of [8 x float].
598 ///    For each element, computes <c> -(__A * __B) + __C </c>.
605 ///    A 256-bit vector of [8 x float] containing the multiplicand.
607 ///    A 256-bit vector of [8 x float] containing the multiplier.
609 ///    A 256-bit vector of [8 x float] containing the addend.
610 /// \returns A 256-bit vector of [8 x float] containing the result.
614   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);  in _mm256_fnmadd_ps()
617 /// Computes a negated multiply-add of 256-bit vectors of [4 x double].
618 ///    For each element, computes <c> -(__A * __B) + __C </c>.
625 ///    A 256-bit vector of [4 x double] containing the multiplicand.
627 ///    A 256-bit vector of [4 x double] containing the multiplier.
629 ///    A 256-bit vector of [4 x double] containing the addend.
630 /// \returns A 256-bit vector of [4 x double] containing the result.
634   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);  in _mm256_fnmadd_pd()
637 /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
638 ///    For each element, computes <c> -(__A * __B) - __C </c>.
645 ///    A 256-bit vector of [8 x float] containing the multiplicand.
647 ///    A 256-bit vector of [8 x float] containing the multiplier.
649 ///    A 256-bit vector of [8 x float] containing the subtrahend.
650 /// \returns A 256-bit vector of [8 x float] containing the result.
654   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);  in _mm256_fnmsub_ps()
657 /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
658 ///    For each element, computes <c> -(__A * __B) - __C </c>.
665 ///    A 256-bit vector of [4 x double] containing the multiplicand.
667 ///    A 256-bit vector of [4 x double] containing the multiplier.
669 ///    A 256-bit vector of [4 x double] containing the subtrahend.
670 /// \returns A 256-bit vector of [4 x double] containing the result.
674   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);  in _mm256_fnmsub_pd()
677 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
681 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
683 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
685 /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128]
687 /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192]
696 ///    A 256-bit vector of [8 x float] containing the multiplicand.
698 ///    A 256-bit vector of [8 x float] containing the multiplier.
700 ///    A 256-bit vector of [8 x float] containing the addend/subtrahend.
701 /// \returns A 256-bit vector of [8 x float] containing the result.
708 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
712 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
714 /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
723 ///    A 256-bit vector of [4 x double] containing the multiplicand.
725 ///    A 256-bit vector of [4 x double] containing the multiplier.
727 ///    A 256-bit vector of [4 x double] containing the addend/subtrahend.
728 /// \returns A 256-bit vector of [4 x double] containing the result.
735 /// Computes a vector multiply with alternating add/subtract of 256-bit
740 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
742 /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96]
744 /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160]
746 /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224]
754 ///    A 256-bit vector of [8 x float] containing the multiplicand.
756 ///    A 256-bit vector of [8 x float] containing the multiplier.
758 ///    A 256-bit vector of [8 x float] containing the addend/subtrahend.
759 /// \returns A 256-bit vector of [8 x float] containing the result.
763   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);  in _mm256_fmsubadd_ps()
766 /// Computes a vector multiply with alternating add/subtract of 256-bit
771 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
773 /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192]
781 ///    A 256-bit vector of [4 x double] containing the multiplicand.
783 ///    A 256-bit vector of [4 x double] containing the multiplier.
785 ///    A 256-bit vector of [4 x double] containing the addend/subtrahend.
786 /// \returns A 256-bit vector of [4 x double] containing the result.
790   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);  in _mm256_fmsubadd_pd()