Lines Matching +full:2 +full:- +full:bit

1 /*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
21 /// Computes a multiply-add of 128-bit vectors of [4 x float].
29 /// A 128-bit vector of [4 x float] containing the multiplicand.
31 /// A 128-bit vector of [4 x float] containing the multiplier.
33 /// A 128-bit vector of [4 x float] containing the addend.
34 /// \returns A 128-bit vector of [4 x float] containing the result.
41 /// Computes a multiply-add of 128-bit vectors of [2 x double].
49 /// A 128-bit vector of [2 x double] containing the multiplicand.
51 /// A 128-bit vector of [2 x double] containing the multiplier.
53 /// A 128-bit vector of [2 x double] containing the addend.
54 /// \returns A 128-bit [2 x double] vector containing the result.
61 /// Computes a scalar multiply-add of the single-precision values in the
62 /// low 32 bits of 128-bit vectors of [4 x float].
74 /// A 128-bit vector of [4 x float] containing the multiplicand in the low
77 /// A 128-bit vector of [4 x float] containing the multiplier in the low
80 /// A 128-bit vector of [4 x float] containing the addend in the low
82 /// \returns A 128-bit vector of [4 x float] containing the result in the low
90 /// Computes a scalar multiply-add of the double-precision values in the
91 /// low 64 bits of 128-bit vectors of [2 x double].
103 /// A 128-bit vector of [2 x double] containing the multiplicand in the low
106 /// A 128-bit vector of [2 x double] containing the multiplier in the low
109 /// A 128-bit vector of [2 x double] containing the addend in the low
111 /// \returns A 128-bit vector of [2 x double] containing the result in the low
119 /// Computes a multiply-subtract of 128-bit vectors of [4 x float].
120 /// For each element, computes <c> (__A * __B) - __C </c>.
127 /// A 128-bit vector of [4 x float] containing the multiplicand.
129 /// A 128-bit vector of [4 x float] containing the multiplier.
131 /// A 128-bit vector of [4 x float] containing the subtrahend.
132 /// \returns A 128-bit vector of [4 x float] containing the result.
136 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); in _mm_fmsub_ps()
139 /// Computes a multiply-subtract of 128-bit vectors of [2 x double].
140 /// For each element, computes <c> (__A * __B) - __C </c>.
147 /// A 128-bit vector of [2 x double] containing the multiplicand.
149 /// A 128-bit vector of [2 x double] containing the multiplier.
151 /// A 128-bit vector of [2 x double] containing the addend.
152 /// \returns A 128-bit vector of [2 x double] containing the result.
156 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); in _mm_fmsub_pd()
159 /// Computes a scalar multiply-subtract of the single-precision values in
160 /// the low 32 bits of 128-bit vectors of [4 x float].
163 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
172 /// A 128-bit vector of [4 x float] containing the multiplicand in the low
175 /// A 128-bit vector of [4 x float] containing the multiplier in the low
178 /// A 128-bit vector of [4 x float] containing the subtrahend in the low
180 /// \returns A 128-bit vector of [4 x float] containing the result in the low
185 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); in _mm_fmsub_ss()
188 /// Computes a scalar multiply-subtract of the double-precision values in
189 /// the low 64 bits of 128-bit vectors of [2 x double].
192 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
201 /// A 128-bit vector of [2 x double] containing the multiplicand in the low
204 /// A 128-bit vector of [2 x double] containing the multiplier in the low
207 /// A 128-bit vector of [2 x double] containing the subtrahend in the low
209 /// \returns A 128-bit vector of [2 x double] containing the result in the low
214 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); in _mm_fmsub_sd()
217 /// Computes a negated multiply-add of 128-bit vectors of [4 x float].
218 /// For each element, computes <c> -(__A * __B) + __C </c>.
225 /// A 128-bit vector of [4 x float] containing the multiplicand.
227 /// A 128-bit vector of [4 x float] containing the multiplier.
229 /// A 128-bit vector of [4 x float] containing the addend.
230 /// \returns A 128-bit [4 x float] vector containing the result.
234 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); in _mm_fnmadd_ps()
237 /// Computes a negated multiply-add of 128-bit vectors of [2 x double].
238 /// For each element, computes <c> -(__A * __B) + __C </c>.
245 /// A 128-bit vector of [2 x double] containing the multiplicand.
247 /// A 128-bit vector of [2 x double] containing the multiplier.
249 /// A 128-bit vector of [2 x double] containing the addend.
250 /// \returns A 128-bit vector of [2 x double] containing the result.
254 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); in _mm_fnmadd_pd()
257 /// Computes a scalar negated multiply-add of the single-precision values in
258 /// the low 32 bits of 128-bit vectors of [4 x float].
261 /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
270 /// A 128-bit vector of [4 x float] containing the multiplicand in the low
273 /// A 128-bit vector of [4 x float] containing the multiplier in the low
276 /// A 128-bit vector of [4 x float] containing the addend in the low
278 /// \returns A 128-bit vector of [4 x float] containing the result in the low
283 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); in _mm_fnmadd_ss()
286 /// Computes a scalar negated multiply-add of the double-precision values
287 /// in the low 64 bits of 128-bit vectors of [2 x double].
290 /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
299 /// A 128-bit vector of [2 x double] containing the multiplicand in the low
302 /// A 128-bit vector of [2 x double] containing the multiplier in the low
305 /// A 128-bit vector of [2 x double] containing the addend in the low
307 /// \returns A 128-bit vector of [2 x double] containing the result in the low
312 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); in _mm_fnmadd_sd()
315 /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
316 /// For each element, computes <c> -(__A * __B) - __C </c>.
323 /// A 128-bit vector of [4 x float] containing the multiplicand.
325 /// A 128-bit vector of [4 x float] containing the multiplier.
327 /// A 128-bit vector of [4 x float] containing the subtrahend.
328 /// \returns A 128-bit vector of [4 x float] containing the result.
332 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); in _mm_fnmsub_ps()
335 /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
336 /// For each element, computes <c> -(__A * __B) - __C </c>.
343 /// A 128-bit vector of [2 x double] containing the multiplicand.
345 /// A 128-bit vector of [2 x double] containing the multiplier.
347 /// A 128-bit vector of [2 x double] containing the subtrahend.
348 /// \returns A 128-bit vector of [2 x double] containing the result.
352 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); in _mm_fnmsub_pd()
355 /// Computes a scalar negated multiply-subtract of the single-precision
356 /// values in the low 32 bits of 128-bit vectors of [4 x float].
359 /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
368 /// A 128-bit vector of [4 x float] containing the multiplicand in the low
371 /// A 128-bit vector of [4 x float] containing the multiplier in the low
374 /// A 128-bit vector of [4 x float] containing the subtrahend in the low
376 /// \returns A 128-bit vector of [4 x float] containing the result in the low
381 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); in _mm_fnmsub_ss()
384 /// Computes a scalar negated multiply-subtract of the double-precision
385 /// values in the low 64 bits of 128-bit vectors of [2 x double].
388 /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
397 /// A 128-bit vector of [2 x double] containing the multiplicand in the low
400 /// A 128-bit vector of [2 x double] containing the multiplier in the low
403 /// A 128-bit vector of [2 x double] containing the subtrahend in the low
405 /// \returns A 128-bit vector of [2 x double] containing the result in the low
410 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); in _mm_fnmsub_sd()
413 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
417 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
419 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
428 /// A 128-bit vector of [4 x float] containing the multiplicand.
430 /// A 128-bit vector of [4 x float] containing the multiplier.
432 /// A 128-bit vector of [4 x float] containing the addend/subtrahend.
433 /// \returns A 128-bit vector of [4 x float] containing the result.
440 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
441 /// [2 x double].
444 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
453 /// A 128-bit vector of [2 x double] containing the multiplicand.
455 /// A 128-bit vector of [2 x double] containing the multiplier.
457 /// A 128-bit vector of [2 x double] containing the addend/subtrahend.
458 /// \returns A 128-bit vector of [2 x double] containing the result.
465 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
470 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
472 /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96]
480 /// A 128-bit vector of [4 x float] containing the multiplicand.
482 /// A 128-bit vector of [4 x float] containing the multiplier.
484 /// A 128-bit vector of [4 x float] containing the addend/subtrahend.
485 /// \returns A 128-bit vector of [4 x float] containing the result.
489 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); in _mm_fmsubadd_ps()
492 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
493 /// [2 x double].
497 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
505 /// A 128-bit vector of [2 x double] containing the multiplicand.
507 /// A 128-bit vector of [2 x double] containing the multiplier.
509 /// A 128-bit vector of [2 x double] containing the addend/subtrahend.
510 /// \returns A 128-bit vector of [2 x double] containing the result.
514 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); in _mm_fmsubadd_pd()
517 /// Computes a multiply-add of 256-bit vectors of [8 x float].
525 /// A 256-bit vector of [8 x float] containing the multiplicand.
527 /// A 256-bit vector of [8 x float] containing the multiplier.
529 /// A 256-bit vector of [8 x float] containing the addend.
530 /// \returns A 256-bit vector of [8 x float] containing the result.
537 /// Computes a multiply-add of 256-bit vectors of [4 x double].
545 /// A 256-bit vector of [4 x double] containing the multiplicand.
547 /// A 256-bit vector of [4 x double] containing the multiplier.
549 /// A 256-bit vector of [4 x double] containing the addend.
550 /// \returns A 256-bit vector of [4 x double] containing the result.
557 /// Computes a multiply-subtract of 256-bit vectors of [8 x float].
558 /// For each element, computes <c> (__A * __B) - __C </c>.
565 /// A 256-bit vector of [8 x float] containing the multiplicand.
567 /// A 256-bit vector of [8 x float] containing the multiplier.
569 /// A 256-bit vector of [8 x float] containing the subtrahend.
570 /// \returns A 256-bit vector of [8 x float] containing the result.
574 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); in _mm256_fmsub_ps()
577 /// Computes a multiply-subtract of 256-bit vectors of [4 x double].
578 /// For each element, computes <c> (__A * __B) - __C </c>.
585 /// A 256-bit vector of [4 x double] containing the multiplicand.
587 /// A 256-bit vector of [4 x double] containing the multiplier.
589 /// A 256-bit vector of [4 x double] containing the subtrahend.
590 /// \returns A 256-bit vector of [4 x double] containing the result.
594 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); in _mm256_fmsub_pd()
597 /// Computes a negated multiply-add of 256-bit vectors of [8 x float].
598 /// For each element, computes <c> -(__A * __B) + __C </c>.
605 /// A 256-bit vector of [8 x float] containing the multiplicand.
607 /// A 256-bit vector of [8 x float] containing the multiplier.
609 /// A 256-bit vector of [8 x float] containing the addend.
610 /// \returns A 256-bit vector of [8 x float] containing the result.
614 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); in _mm256_fnmadd_ps()
617 /// Computes a negated multiply-add of 256-bit vectors of [4 x double].
618 /// For each element, computes <c> -(__A * __B) + __C </c>.
625 /// A 256-bit vector of [4 x double] containing the multiplicand.
627 /// A 256-bit vector of [4 x double] containing the multiplier.
629 /// A 256-bit vector of [4 x double] containing the addend.
630 /// \returns A 256-bit vector of [4 x double] containing the result.
634 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); in _mm256_fnmadd_pd()
637 /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
638 /// For each element, computes <c> -(__A * __B) - __C </c>.
645 /// A 256-bit vector of [8 x float] containing the multiplicand.
647 /// A 256-bit vector of [8 x float] containing the multiplier.
649 /// A 256-bit vector of [8 x float] containing the subtrahend.
650 /// \returns A 256-bit vector of [8 x float] containing the result.
654 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); in _mm256_fnmsub_ps()
657 /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
658 /// For each element, computes <c> -(__A * __B) - __C </c>.
665 /// A 256-bit vector of [4 x double] containing the multiplicand.
667 /// A 256-bit vector of [4 x double] containing the multiplier.
669 /// A 256-bit vector of [4 x double] containing the subtrahend.
670 /// \returns A 256-bit vector of [4 x double] containing the result.
674 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); in _mm256_fnmsub_pd()
677 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
681 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
683 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
685 /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128]
687 /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192]
696 /// A 256-bit vector of [8 x float] containing the multiplicand.
698 /// A 256-bit vector of [8 x float] containing the multiplier.
700 /// A 256-bit vector of [8 x float] containing the addend/subtrahend.
701 /// \returns A 256-bit vector of [8 x float] containing the result.
708 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
712 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
714 /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
723 /// A 256-bit vector of [4 x double] containing the multiplicand.
725 /// A 256-bit vector of [4 x double] containing the multiplier.
727 /// A 256-bit vector of [4 x double] containing the addend/subtrahend.
728 /// \returns A 256-bit vector of [4 x double] containing the result.
735 /// Computes a vector multiply with alternating add/subtract of 256-bit
740 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
742 /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96]
744 /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160]
746 /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224]
754 /// A 256-bit vector of [8 x float] containing the multiplicand.
756 /// A 256-bit vector of [8 x float] containing the multiplier.
758 /// A 256-bit vector of [8 x float] containing the addend/subtrahend.
759 /// \returns A 256-bit vector of [8 x float] containing the result.
763 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); in _mm256_fmsubadd_ps()
766 /// Computes a vector multiply with alternating add/subtract of 256-bit
771 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
773 /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192]
781 /// A 256-bit vector of [4 x double] containing the multiplicand.
783 /// A 256-bit vector of [4 x double] containing the multiplier.
785 /// A 256-bit vector of [4 x double] containing the addend/subtrahend.
786 /// \returns A 256-bit vector of [4 x double] containing the result.
790 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); in _mm256_fmsubadd_pd()