/*===--------------- sha512intrin.h - SHA512 intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SHA512INTRIN_H #define __SHA512INTRIN_H #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \ __min_vector_width__(256))) /// This intrinisc is one of the two SHA512 message scheduling instructions. /// The intrinsic performs an intermediate calculation for the next four /// SHA512 message qwords. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512MSG1 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 128-bit vector of [2 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE s0(qword): /// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) /// } /// W[4] := __B.qword[0] /// W[3] := __A.qword[3] /// W[2] := __A.qword[2] /// W[1] := __A.qword[1] /// W[0] := __A.qword[0] /// dst.qword[3] := W[3] + s0(W[4]) /// dst.qword[2] := W[2] + s0(W[3]) /// dst.qword[1] := W[1] + s0(W[2]) /// dst.qword[0] := W[0] + s0(W[1]) /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B); } /// This intrinisc is one of the two SHA512 message scheduling instructions. /// The intrinsic performs the final calculation for the next four SHA512 /// message qwords. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512MSG2 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 256-bit vector of [4 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE s1(qword) { /// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) /// } /// W[14] := __B.qword[2] /// W[15] := __B.qword[3] /// W[16] := __A.qword[0] + s1(W[14]) /// W[17] := __A.qword[1] + s1(W[15]) /// W[18] := __A.qword[2] + s1(W[16]) /// W[19] := __A.qword[3] + s1(W[17]) /// dst.qword[3] := W[19] /// dst.qword[2] := W[18] /// dst.qword[1] := W[17] /// dst.qword[0] := W[16] /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B); } /// This intrinisc performs two rounds of SHA512 operation using initial SHA512 /// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from /// \a __A, and a pre-computed sum of the next two round message qwords and /// the corresponding round constants from \a __C (only the two lower qwords /// of the third operand). The updated SHA512 state (A,B,E,F) is written to /// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later /// rounds. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512RNDS2 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 256-bit vector of [4 x long long]. /// \param __C /// A 128-bit vector of [2 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE cap_sigma0(qword) { /// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) /// } /// DEFINE cap_sigma1(qword) { /// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) /// } /// DEFINE MAJ(a,b,c) { /// RETURN (a & b) ^ (a & c) ^ (b & c) /// } /// DEFINE CH(e,f,g) { /// RETURN (e & f) ^ (g & ~e) /// } /// A[0] := __B.qword[3] /// B[0] := __B.qword[2] /// C[0] := __C.qword[3] /// D[0] := __C.qword[2] /// E[0] := __B.qword[1] /// F[0] := __B.qword[0] /// G[0] := __C.qword[1] /// H[0] := __C.qword[0] /// WK[0]:= __A.qword[0] /// WK[1]:= __A.qword[1] /// FOR i := 0 to 1: /// A[i+1] := CH(E[i], F[i], G[i]) + /// cap_sigma1(E[i]) + WK[i] + H[i] + /// MAJ(A[i], B[i], C[i]) + /// cap_sigma0(A[i]) /// B[i+1] := A[i] /// C[i+1] := B[i] /// D[i+1] := C[i] /// E[i+1] := CH(E[i], F[i], G[i]) + /// cap_sigma1(E[i]) + WK[i] + H[i] + D[i] /// F[i+1] := E[i] /// G[i+1] := F[i] /// H[i+1] := G[i] /// ENDFOR /// dst.qword[3] := A[2] /// dst.qword[2] := B[2] /// dst.qword[1] := E[2] /// dst.qword[0] := F[2] /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) { return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B, (__v2du)__C); } #undef __DEFAULT_FN_ATTRS256 #endif // __SHA512INTRIN_H