Lines Matching +full:128 +full:m

58                  __min_vector_width__(128)))
402 /// __m256d _mm256_round_pd(__m256d V, const int M);
409 /// \param M
416 /// 0: Use bits [1:0] of \a M. \n
424 #define _mm256_round_pd(V, M) \ argument
425 ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)))
434 /// __m256 _mm256_round_ps(__m256 V, const int M);
441 /// \param M
448 /// 0: Use bits [1:0] of \a M. \n
456 #define _mm256_round_ps(V, M) \ argument
457 ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)))
773 /// Copies the values in a 128-bit vector of [2 x double] as specified
774 /// by the 128-bit integer vector operand.
781 /// A 128-bit vector of [2 x double].
783 /// A 128-bit integer vector operand specifying how the values are to be
795 /// \returns A 128-bit vector of [2 x double] containing the copied values.
825 /// 0: Bits [191:128] of the source are copied to bits [191:128] of the
827 /// 1: Bits [255:192] of the source are copied to bits [191:128] of the
830 /// 0: Bits [191:128] of the source are copied to bits [255:192] of the
841 /// Copies the values stored in a 128-bit vector of [4 x float] as
842 /// specified by the 128-bit integer vector operand.
849 /// A 128-bit vector of [4 x float].
851 /// A 128-bit integer vector operand specifying how the values are to be
889 /// \returns A 128-bit vector of [4 x float] containing the copied values.
944 /// Bits [129:128]: \n
945 /// 00: Bits [159:128] of the source are copied to bits [159:128] of the
947 /// 01: Bits [191:160] of the source are copied to bits [159:128] of the
949 /// 10: Bits [223:192] of the source are copied to bits [159:128] of the
951 /// 11: Bits [255:224] of the source are copied to bits [159:128] of the
954 /// 00: Bits [159:128] of the source are copied to bits [191:160] of the
963 /// 00: Bits [159:128] of the source are copied to bits [223:192] of the
972 /// 00: Bits [159:128] of the source are copied to bits [255:224] of the
987 /// Copies the values in a 128-bit vector of [2 x double] as specified
999 /// A 128-bit vector of [2 x double].
1013 /// \returns A 128-bit vector of [2 x double] containing the copied values.
1044 /// 0: Bits [191:128] of the source are copied to bits [191:128] of the
1046 /// 1: Bits [255:192] of the source are copied to bits [191:128] of the
1049 /// 0: Bits [191:128] of the source are copied to bits [255:192] of the
1057 /// Copies the values in a 128-bit vector of [4 x float] as specified by
1069 /// A 128-bit vector of [4 x float].
1109 /// \returns A 128-bit vector of [4 x float] containing the copied values.
1166 /// 00: Bits [159:128] of the source are copied to bits [159:128] of the
1168 /// 01: Bits [191:160] of the source are copied to bits [159:128] of the
1170 /// 10: Bits [223:192] of the source are copied to bits [159:128] of the
1172 /// 11: Bits [255:224] of the source are copied to bits [159:128] of the
1175 /// 00: Bits [159:128] of the source are copied to bits [191:160] of the
1184 /// 00: Bits [159:128] of the source are copied to bits [223:192] of the
1193 /// 00: Bits [159:128] of the source are copied to bits [255:224] of the
1205 /// Permutes 128-bit data values stored in two 256-bit vectors of
1211 /// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);
1220 /// \param M
1226 /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1230 /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1233 /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1235 /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1237 /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1239 /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1242 #define _mm256_permute2f128_pd(V1, V2, M) \ argument
1244 (__v4df)(__m256d)(V2), (int)(M)))
1246 /// Permutes 128-bit data values stored in two 256-bit vectors of
1252 /// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);
1261 /// \param M
1267 /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1271 /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1274 /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1276 /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1278 /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1280 /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1283 #define _mm256_permute2f128_ps(V1, V2, M) \ argument
1285 (__v8sf)(__m256)(V2), (int)(M)))
1287 /// Permutes 128-bit data values stored in two 256-bit integer vectors,
1293 /// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);
1302 /// \param M
1307 /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1311 /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1314 /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1316 /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1318 /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1320 /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1323 #define _mm256_permute2f128_si256(V1, V2, M) \ argument
1325 (__v8si)(__m256i)(V2), (int)(M)))
1335 /// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);
1344 /// \param M
1352 #define _mm256_blend_pd(V1, V2, M) \ argument
1354 (__v4df)(__m256d)(V2), (int)(M)))
1363 /// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);
1372 /// \param M
1380 #define _mm256_blend_ps(V1, V2, M) \ argument
1382 (__v8sf)(__m256)(V2), (int)(M)))
1455 /// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);
1464 /// \param M
1478 #define _mm256_dp_ps(V1, V2, M) \ argument
1480 (__v8sf)(__m256)(V2), (M)))
1489 /// bits [191:128] of the destination, and the selected elements from the
1505 /// operand are copied to bits [63:0] and bits [191:128] in the destination,
1518 /// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the
1527 /// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n
1543 /// [63:0] and bits [191:128] in the destination, and the selected elements
1572 /// Bit [2]=0: Bits [191:128] are copied from \a a to bits [191:128] of the
1574 /// Bit [2]=1: Bits [255:192] are copied from \a a to bits [191:128] of the
1576 /// Bit [3]=0: Bits [191:128] are copied from \a b to bits [255:192] of the
1613 /// 128-bit vectors of [2 x double], using the operation specified by the
1629 /// A 128-bit vector of [2 x double].
1631 /// A 128-bit vector of [2 x double].
1667 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1671 /// Compares each of the corresponding values of two 128-bit vectors of
1688 /// A 128-bit vector of [4 x float].
1690 /// A 128-bit vector of [4 x float].
1726 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1851 /// two 128-bit vectors of [2 x double], using the operation specified by the
1867 /// A 128-bit vector of [2 x double].
1869 /// A 128-bit vector of [2 x double].
1905 /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1909 /// Compares each of the corresponding scalar values of two 128-bit
1926 /// A 128-bit vector of [4 x float].
1928 /// A 128-bit vector of [4 x float].
1964 /// \returns A 128-bit vector of [4 x float] containing the comparison results.
2174 /// A 128-bit integer vector of [4 x i32].
2197 /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
2206 /// \returns A 128-bit vector of [4 x float] containing the converted values.
2232 /// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4
2240 /// A 128-bit vector of [4 x float].
2249 /// (rounded toward zero) 32-bit integers returned in a 128-bit vector of
2262 /// \returns A 128-bit integer vector containing the converted values.
2269 /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
2282 /// \returns A 128-bit integer vector containing the converted values.
2370 /// Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of
2395 /// Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of
2421 /// Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of
2442 /// Bits [255:192] are written to bits [191:128] of the return value. \n
2464 /// Bits [191:128] are written to bits [191:128] of the return value.
2468 /// Bits [191:128] are written to bits [255:192] of the return value. \n
2488 /// Bits [223:192] are written to bits [159:128] of the return value. \n
2515 /// Bits [159:128] are written to bits [159:128] of the return value. \n
2521 /// Bits [159:128] are written to bits [191:160] of the return value. \n
2531 /// Given two 128-bit floating-point vectors of [2 x double], perform an
2550 /// A 128-bit vector of [2 x double].
2552 /// A 128-bit vector of [2 x double].
2560 /// Given two 128-bit floating-point vectors of [2 x double], perform an
2579 /// A 128-bit vector of [2 x double].
2581 /// A 128-bit vector of [2 x double].
2589 /// Given two 128-bit floating-point vectors of [2 x double], perform an
2609 /// A 128-bit vector of [2 x double].
2611 /// A 128-bit vector of [2 x double].
2619 /// Given two 128-bit floating-point vectors of [4 x float], perform an
2638 /// A 128-bit vector of [4 x float].
2640 /// A 128-bit vector of [4 x float].
2648 /// Given two 128-bit floating-point vectors of [4 x float], perform an
2667 /// A 128-bit vector of [4 x float].
2669 /// A 128-bit vector of [4 x float].
2677 /// Given two 128-bit floating-point vectors of [4 x float], perform an
2697 /// A 128-bit vector of [4 x float].
2699 /// A 128-bit vector of [4 x float].
3011 /// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
3033 /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set
3089 /// Loads the data from a 128-bit vector of [2 x double] from the
3090 /// specified address pointed to by \a __a and broadcasts it to 128-bit
3098 /// The 128-bit vector of [2 x double] to be broadcast.
3099 /// \returns A 256-bit vector of [4 x double] whose 128-bit elements are set
3109 /// Loads the data from a 128-bit vector of [4 x float] from the
3110 /// specified address pointed to by \a __a and broadcasts it to 128-bit
3118 /// The 128-bit vector of [4 x float] to be broadcast.
3119 /// \returns A 256-bit vector of [8 x float] whose 128-bit elements are set
3376 /// memory location pointed to by \a __p into a 128-bit vector of
3388 /// A 128-bit integer vector containing the mask. The most significant bit of
3392 /// \returns A 128-bit vector of [2 x double] containing the loaded values.
3425 /// memory location pointed to by \a __p into a 128-bit vector of
3437 /// A 128-bit integer vector containing the mask. The most significant bit of
3441 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
3497 /// Moves double-precision values from a 128-bit vector of [2 x double]
3508 /// A 128-bit integer vector containing the mask. The most significant bit of
3514 /// A 128-bit vector of [2 x double] containing the values to be stored.
3545 /// Moves single-precision floating point values from a 128-bit vector
3556 /// A 128-bit integer vector containing the mask. The most significant bit of
3562 /// A 128-bit vector of [4 x float] containing the values to be stored.
3683 /// A double-precision floating-point value used to initialize bits [191:128]
3716 /// A single-precision floating-point value used to initialize bits [159:128]
3753 /// A 32-bit integral value used to initialize bits [159:128] of the result.
3793 /// A 16-bit integral value used to initialize bits [143:128] of the result.
3860 /// An 8-bit integral value used to initialize bits [135:128] of the result.
3923 /// A 64-bit integral value used to initialize bits [191:128] of the result.
3952 /// A double-precision floating-point value used to initialize bits [191:128]
3986 /// A single-precision floating-point value used to initialize bits [159:128]
4022 /// A 32-bit integral value used to initialize bits [159:128] of the result.
4062 /// A 16-bit integral value used to initialize bits [143:128] of the result.
4131 /// An 8-bit integral value used to initialize bits [135:128] of the result.
4192 /// A 64-bit integral value used to initialize bits [191:128] of the result.
4461 /// Returns the lower 128 bits of a 256-bit floating-point vector of
4462 /// [4 x double] as a 128-bit floating-point vector of [2 x double].
4470 /// \returns A 128-bit floating-point vector of [2 x double] containing the
4471 /// lower 128 bits of the parameter.
4478 /// Returns the lower 128 bits of a 256-bit floating-point vector of
4479 /// [8 x float] as a 128-bit floating-point vector of [4 x float].
4487 /// \returns A 128-bit floating-point vector of [4 x float] containing the
4488 /// lower 128 bits of the parameter.
4495 /// Truncates a 256-bit integer vector into a 128-bit integer vector.
4503 /// \returns A 128-bit integer vector containing the lower 128 bits of the
4512 /// 128-bit floating-point vector of [2 x double].
4514 /// The lower 128 bits contain the value of the source vector. The contents
4515 /// of the upper 128 bits are undefined.
4522 /// A 128-bit vector of [2 x double].
4523 /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
4524 /// contain the value of the parameter. The contents of the upper 128 bits
4534 /// 128-bit floating-point vector of [4 x float].
4536 /// The lower 128 bits contain the value of the source vector. The contents
4537 /// of the upper 128 bits are undefined.
4544 /// A 128-bit vector of [4 x float].
4545 /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
4546 /// contain the value of the parameter. The contents of the upper 128 bits
4556 /// Constructs a 256-bit integer vector from a 128-bit integer vector.
4558 /// The lower 128 bits contain the value of the source vector. The contents
4559 /// of the upper 128 bits are undefined.
4566 /// A 128-bit integer vector.
4567 /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
4568 /// the parameter. The contents of the upper 128 bits are undefined.
4577 /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
4578 /// contain the value of the source vector. The upper 128 bits are set
4586 /// A 128-bit vector of [2 x double].
4587 /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
4588 /// contain the value of the parameter. The upper 128 bits are set to zero.
4596 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
4597 /// the value of the source vector. The upper 128 bits are set to zero.
4604 /// A 128-bit vector of [4 x float].
4605 /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
4606 /// contain the value of the parameter. The upper 128 bits are set to zero.
4613 /// Constructs a 256-bit integer vector from a 128-bit integer vector.
4614 /// The lower 128 bits contain the value of the source vector. The upper
4615 /// 128 bits are set to zero.
4622 /// A 128-bit integer vector.
4623 /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
4624 /// the parameter. The upper 128 bits are set to zero.
4634 invocations where the immediate M is a constant expression.
4638 /// replacing either the upper or the lower 128 bits with the contents of a
4639 /// 128-bit vector of [4 x float] in the second parameter.
4642 /// 128 bits.
4647 /// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);
4654 /// first, and then either the upper or the lower 128 bits of the result will
4657 /// A 128-bit vector of [4 x float]. The contents of this parameter are
4658 /// written to either the upper or the lower 128 bits of the result depending
4659 /// on the value of parameter \a M.
4660 /// \param M
4663 /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4664 /// and bits [255:128] of \a V1 are copied to bits [255:128] of the
4666 /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4670 #define _mm256_insertf128_ps(V1, V2, M) \ argument
4672 (__v4sf)(__m128)(V2), (int)(M)))
4676 /// replacing either the upper or the lower 128 bits with the contents of a
4677 /// 128-bit vector of [2 x double] in the second parameter.
4680 /// 128 bits.
4685 /// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);
4692 /// first, and then either the upper or the lower 128 bits of the result will
4695 /// A 128-bit vector of [2 x double]. The contents of this parameter are
4696 /// written to either the upper or the lower 128 bits of the result depending
4697 /// on the value of parameter \a M.
4698 /// \param M
4701 /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4702 /// and bits [255:128] of \a V1 are copied to bits [255:128] of the
4704 /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4708 #define _mm256_insertf128_pd(V1, V2, M) \ argument
4710 (__v2df)(__m128d)(V2), (int)(M)))
4714 /// either the upper or the lower 128 bits with the contents of a 128-bit
4718 /// 128 bits.
4723 /// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);
4730 /// then either the upper or the lower 128 bits of the result will be
4733 /// A 128-bit integer vector. The contents of this parameter are written to
4734 /// either the upper or the lower 128 bits of the result depending on the
4735 /// value of parameter \a M.
4736 /// \param M
4739 /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4740 /// and bits [255:128] of \a V1 are copied to bits [255:128] of the
4742 /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4746 #define _mm256_insertf128_si256(V1, V2, M) \ argument
4748 (__v4si)(__m128i)(V2), (int)(M)))
4753 invocations where the immediate M is a constant expression.
4755 /// Extracts either the upper or the lower 128 bits from a 256-bit vector
4757 /// returns the extracted bits as a 128-bit vector of [4 x float].
4762 /// __m128 _mm256_extractf128_ps(__m256 V, const int M);
4769 /// \param M
4772 /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4774 /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4775 /// \returns A 128-bit vector of [4 x float] containing the extracted bits.
4776 #define _mm256_extractf128_ps(V, M) \ argument
4777 ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M)))
4779 /// Extracts either the upper or the lower 128 bits from a 256-bit vector
4781 /// returns the extracted bits as a 128-bit vector of [2 x double].
4786 /// __m128d _mm256_extractf128_pd(__m256d V, const int M);
4793 /// \param M
4796 /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4798 /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4799 /// \returns A 128-bit vector of [2 x double] containing the extracted bits.
4800 #define _mm256_extractf128_pd(V, M) \ argument
4801 ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M)))
4803 /// Extracts either the upper or the lower 128 bits from a 256-bit
4805 /// returns the extracted bits as a 128-bit integer vector.
4810 /// __m128i _mm256_extractf128_si256(__m256i V, const int M);
4817 /// \param M
4820 /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4822 /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4823 /// \returns A 128-bit integer vector containing the extracted bits.
4824 #define _mm256_extractf128_si256(V, M) \ argument
4825 ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M)))
4828 /// concatenating two 128-bit floating-point vectors of [4 x float].
4835 /// A 128-bit floating-point vector of [4 x float] to be copied to the upper
4836 /// 128 bits of the result.
4838 /// A 128-bit floating-point vector of [4 x float] to be copied to the lower
4839 /// 128 bits of the result.
4849 /// concatenating two 128-bit floating-point vectors of [2 x double].
4856 /// A 128-bit floating-point vector of [2 x double] to be copied to the upper
4857 /// 128 bits of the result.
4859 /// A 128-bit floating-point vector of [2 x double] to be copied to the lower
4860 /// 128 bits of the result.
4869 /// Constructs a 256-bit integer vector by concatenating two 128-bit
4877 /// A 128-bit integer vector to be copied to the upper 128 bits of the
4880 /// A 128-bit integer vector to be copied to the lower 128 bits of the
4890 /// concatenating two 128-bit floating-point vectors of [4 x float]. This is
4899 /// A 128-bit floating-point vector of [4 x float] to be copied to the lower
4900 /// 128 bits of the result.
4902 /// A 128-bit floating-point vector of [4 x float] to be copied to the upper
4903 /// 128 bits of the result.
4913 /// concatenating two 128-bit floating-point vectors of [2 x double]. This is
4922 /// A 128-bit floating-point vector of [2 x double] to be copied to the lower
4923 /// 128 bits of the result.
4925 /// A 128-bit floating-point vector of [2 x double] to be copied to the upper
4926 /// 128 bits of the result.
4935 /// Constructs a 256-bit integer vector by concatenating two 128-bit
4944 /// A 128-bit integer vector to be copied to the lower 128 bits of the
4947 /// A 128-bit integer vector to be copied to the upper 128 bits of the
4957 /// Loads two 128-bit floating-point vectors of [4 x float] from
4959 /// of [8 x float] by concatenating the two 128-bit vectors.
4967 /// A pointer to a 128-bit memory location containing 4 consecutive
4969 /// bits[255:128] of the result. The address of the memory location does not
4972 /// A pointer to a 128-bit memory location containing 4 consecutive
4984 /// Loads two 128-bit floating-point vectors of [2 x double] from
4986 /// of [4 x double] by concatenating the two 128-bit vectors.
4994 /// A pointer to a 128-bit memory location containing two consecutive
4996 /// bits[255:128] of the result. The address of the memory location does not
4999 /// A pointer to a 128-bit memory location containing two consecutive
5011 /// Loads two 128-bit integer vectors from unaligned memory locations and
5012 /// constructs a 256-bit integer vector by concatenating the two 128-bit
5021 /// A pointer to a 128-bit memory location containing a 128-bit integer
5022 /// vector. This vector is to be copied to bits[255:128] of the result. The
5025 /// A pointer to a 128-bit memory location containing a 128-bit integer
5036 /// Stores the upper and lower 128 bits of a 256-bit floating-point
5045 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5049 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5065 /// Stores the upper and lower 128 bits of a 256-bit floating-point
5074 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5078 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5094 /// Stores the upper and lower 128 bits of a 256-bit integer vector into
5103 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5107 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be