xref: /freebsd/contrib/llvm-project/clang/lib/Headers/sm3intrin.h (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric /*===-------------------- sm3intrin.h - SM3 intrinsics ---------------------===
2*06c3fb27SDimitry Andric  *
3*06c3fb27SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric  *
7*06c3fb27SDimitry Andric  *===-----------------------------------------------------------------------===
8*06c3fb27SDimitry Andric  */
9*06c3fb27SDimitry Andric 
10*06c3fb27SDimitry Andric #ifndef __IMMINTRIN_H
11*06c3fb27SDimitry Andric #error "Never use <sm3intrin.h> directly; include <immintrin.h> instead."
12*06c3fb27SDimitry Andric #endif // __IMMINTRIN_H
13*06c3fb27SDimitry Andric 
14*06c3fb27SDimitry Andric #ifndef __SM3INTRIN_H
15*06c3fb27SDimitry Andric #define __SM3INTRIN_H
16*06c3fb27SDimitry Andric 
17*06c3fb27SDimitry Andric #define __DEFAULT_FN_ATTRS128                                                  \
18*06c3fb27SDimitry Andric   __attribute__((__always_inline__, __nodebug__, __target__("sm3"),            \
19*06c3fb27SDimitry Andric                  __min_vector_width__(128)))
20*06c3fb27SDimitry Andric 
21*06c3fb27SDimitry Andric /// This intrinisc is one of the two SM3 message scheduling intrinsics. The
22*06c3fb27SDimitry Andric ///    intrinsic performs an initial calculation for the next four SM3 message
23*06c3fb27SDimitry Andric ///    words. The calculated results are stored in \a dst.
24*06c3fb27SDimitry Andric ///
25*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
26*06c3fb27SDimitry Andric ///
27*06c3fb27SDimitry Andric /// \code
28*06c3fb27SDimitry Andric /// __m128i _mm_sm3msg1_epi32(__m128i __A, __m128i __B, __m128i __C)
29*06c3fb27SDimitry Andric /// \endcode
30*06c3fb27SDimitry Andric ///
31*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VSM3MSG1 instruction.
32*06c3fb27SDimitry Andric ///
33*06c3fb27SDimitry Andric /// \param __A
34*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
35*06c3fb27SDimitry Andric /// \param __B
36*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
37*06c3fb27SDimitry Andric /// \param __C
38*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
39*06c3fb27SDimitry Andric /// \returns
40*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
41*06c3fb27SDimitry Andric ///
42*06c3fb27SDimitry Andric /// \code{.operation}
43*06c3fb27SDimitry Andric /// DEFINE ROL32(dword, n) {
44*06c3fb27SDimitry Andric /// 	count := n % 32
45*06c3fb27SDimitry Andric /// 	dest := (dword << count) | (dword >> (32 - count))
46*06c3fb27SDimitry Andric /// 	RETURN dest
47*06c3fb27SDimitry Andric /// }
48*06c3fb27SDimitry Andric /// DEFINE P1(x) {
49*06c3fb27SDimitry Andric /// 	RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23)
50*06c3fb27SDimitry Andric /// }
51*06c3fb27SDimitry Andric /// W[0] := __C.dword[0]
52*06c3fb27SDimitry Andric /// W[1] := __C.dword[1]
53*06c3fb27SDimitry Andric /// W[2] := __C.dword[2]
54*06c3fb27SDimitry Andric /// W[3] := __C.dword[3]
55*06c3fb27SDimitry Andric /// W[7] := __A.dword[0]
56*06c3fb27SDimitry Andric /// W[8] := __A.dword[1]
57*06c3fb27SDimitry Andric /// W[9] := __A.dword[2]
58*06c3fb27SDimitry Andric /// W[10] := __A.dword[3]
59*06c3fb27SDimitry Andric /// W[13] := __B.dword[0]
60*06c3fb27SDimitry Andric /// W[14] := __B.dword[1]
61*06c3fb27SDimitry Andric /// W[15] := __B.dword[2]
62*06c3fb27SDimitry Andric /// TMP0 := W[7] ^ W[0] ^ ROL32(W[13], 15)
63*06c3fb27SDimitry Andric /// TMP1 := W[8] ^ W[1] ^ ROL32(W[14], 15)
64*06c3fb27SDimitry Andric /// TMP2 := W[9] ^ W[2] ^ ROL32(W[15], 15)
65*06c3fb27SDimitry Andric /// TMP3 := W[10] ^ W[3]
66*06c3fb27SDimitry Andric /// dst.dword[0] := P1(TMP0)
67*06c3fb27SDimitry Andric /// dst.dword[1] := P1(TMP1)
68*06c3fb27SDimitry Andric /// dst.dword[2] := P1(TMP2)
69*06c3fb27SDimitry Andric /// dst.dword[3] := P1(TMP3)
70*06c3fb27SDimitry Andric /// dst[MAX:128] := 0
71*06c3fb27SDimitry Andric /// \endcode
_mm_sm3msg1_epi32(__m128i __A,__m128i __B,__m128i __C)72*06c3fb27SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg1_epi32(__m128i __A,
73*06c3fb27SDimitry Andric                                                                   __m128i __B,
74*06c3fb27SDimitry Andric                                                                   __m128i __C) {
75*06c3fb27SDimitry Andric   return (__m128i)__builtin_ia32_vsm3msg1((__v4su)__A, (__v4su)__B,
76*06c3fb27SDimitry Andric                                           (__v4su)__C);
77*06c3fb27SDimitry Andric }
78*06c3fb27SDimitry Andric 
79*06c3fb27SDimitry Andric /// This intrinisc is one of the two SM3 message scheduling intrinsics. The
80*06c3fb27SDimitry Andric ///    intrinsic performs the final calculation for the next four SM3 message
81*06c3fb27SDimitry Andric ///    words. The calculated results are stored in \a dst.
82*06c3fb27SDimitry Andric ///
83*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
84*06c3fb27SDimitry Andric ///
85*06c3fb27SDimitry Andric /// \code
86*06c3fb27SDimitry Andric /// __m128i _mm_sm3msg2_epi32(__m128i __A, __m128i __B, __m128i __C)
87*06c3fb27SDimitry Andric /// \endcode
88*06c3fb27SDimitry Andric ///
89*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VSM3MSG2 instruction.
90*06c3fb27SDimitry Andric ///
91*06c3fb27SDimitry Andric /// \param __A
92*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
93*06c3fb27SDimitry Andric /// \param __B
94*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
95*06c3fb27SDimitry Andric /// \param __C
96*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
97*06c3fb27SDimitry Andric /// \returns
98*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
99*06c3fb27SDimitry Andric ///
100*06c3fb27SDimitry Andric /// \code{.operation}
101*06c3fb27SDimitry Andric /// DEFINE ROL32(dword, n) {
102*06c3fb27SDimitry Andric /// 	count := n % 32
103*06c3fb27SDimitry Andric /// 	dest := (dword << count) | (dword >> (32-count))
104*06c3fb27SDimitry Andric /// 	RETURN dest
105*06c3fb27SDimitry Andric /// }
106*06c3fb27SDimitry Andric /// WTMP[0] := __A.dword[0]
107*06c3fb27SDimitry Andric /// WTMP[1] := __A.dword[1]
108*06c3fb27SDimitry Andric /// WTMP[2] := __A.dword[2]
109*06c3fb27SDimitry Andric /// WTMP[3] := __A.dword[3]
110*06c3fb27SDimitry Andric /// W[3] := __B.dword[0]
111*06c3fb27SDimitry Andric /// W[4] := __B.dword[1]
112*06c3fb27SDimitry Andric /// W[5] := __B.dword[2]
113*06c3fb27SDimitry Andric /// W[6] := __B.dword[3]
114*06c3fb27SDimitry Andric /// W[10] := __C.dword[0]
115*06c3fb27SDimitry Andric /// W[11] := __C.dword[1]
116*06c3fb27SDimitry Andric /// W[12] := __C.dword[2]
117*06c3fb27SDimitry Andric /// W[13] := __C.dword[3]
118*06c3fb27SDimitry Andric /// W[16] := ROL32(W[3], 7) ^ W[10] ^ WTMP[0]
119*06c3fb27SDimitry Andric /// W[17] := ROL32(W[4], 7) ^ W[11] ^ WTMP[1]
120*06c3fb27SDimitry Andric /// W[18] := ROL32(W[5], 7) ^ W[12] ^ WTMP[2]
121*06c3fb27SDimitry Andric /// W[19] := ROL32(W[6], 7) ^ W[13] ^ WTMP[3]
122*06c3fb27SDimitry Andric /// W[19] := W[19] ^ ROL32(W[16], 6) ^ ROL32(W[16], 15) ^ ROL32(W[16], 30)
123*06c3fb27SDimitry Andric /// dst.dword[0] := W[16]
124*06c3fb27SDimitry Andric /// dst.dword[1] := W[17]
125*06c3fb27SDimitry Andric /// dst.dword[2] := W[18]
126*06c3fb27SDimitry Andric /// dst.dword[3] := W[19]
127*06c3fb27SDimitry Andric /// dst[MAX:128] := 0
128*06c3fb27SDimitry Andric /// \endcode
_mm_sm3msg2_epi32(__m128i __A,__m128i __B,__m128i __C)129*06c3fb27SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg2_epi32(__m128i __A,
130*06c3fb27SDimitry Andric                                                                   __m128i __B,
131*06c3fb27SDimitry Andric                                                                   __m128i __C) {
132*06c3fb27SDimitry Andric   return (__m128i)__builtin_ia32_vsm3msg2((__v4su)__A, (__v4su)__B,
133*06c3fb27SDimitry Andric                                           (__v4su)__C);
134*06c3fb27SDimitry Andric }
135*06c3fb27SDimitry Andric 
136*06c3fb27SDimitry Andric /// This intrinsic performs two rounds of SM3 operation using initial SM3 state
137*06c3fb27SDimitry Andric ///    (C, D, G, H) from \a __A, an initial SM3 states (A, B, E, F)
138*06c3fb27SDimitry Andric ///    from \a __B and a pre-computed words from the \a __C. \a __A with
139*06c3fb27SDimitry Andric ///    initial SM3 state of (C, D, G, H) assumes input of non-rotated left
140*06c3fb27SDimitry Andric ///    variables from previous state. The updated SM3 state (A, B, E, F) is
141*06c3fb27SDimitry Andric ///    written to \a __A. The \a imm8 should contain the even round number
142*06c3fb27SDimitry Andric ///    for the first of the two rounds computed by this instruction. The
143*06c3fb27SDimitry Andric ///    computation masks the \a imm8 value by AND’ing it with 0x3E so that only
144*06c3fb27SDimitry Andric ///    even round numbers from 0 through 62 are used for this operation. The
145*06c3fb27SDimitry Andric ///    calculated results are stored in \a dst.
146*06c3fb27SDimitry Andric ///
147*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
148*06c3fb27SDimitry Andric ///
149*06c3fb27SDimitry Andric /// \code
150*06c3fb27SDimitry Andric /// __m128i _mm_sm3rnds2_epi32(__m128i __A, __m128i __B, __m128i __C, const int
151*06c3fb27SDimitry Andric /// imm8) \endcode
152*06c3fb27SDimitry Andric ///
153*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VSM3RNDS2 instruction.
154*06c3fb27SDimitry Andric ///
155*06c3fb27SDimitry Andric /// \param __A
156*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
157*06c3fb27SDimitry Andric /// \param __B
158*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
159*06c3fb27SDimitry Andric /// \param __C
160*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
161*06c3fb27SDimitry Andric /// \param imm8
162*06c3fb27SDimitry Andric ///    A 8-bit constant integer.
163*06c3fb27SDimitry Andric /// \returns
164*06c3fb27SDimitry Andric ///    A 128-bit vector of [4 x int].
165*06c3fb27SDimitry Andric ///
166*06c3fb27SDimitry Andric /// \code{.operation}
167*06c3fb27SDimitry Andric /// DEFINE ROL32(dword, n) {
168*06c3fb27SDimitry Andric /// 	count := n % 32
169*06c3fb27SDimitry Andric /// 	dest := (dword << count) | (dword >> (32-count))
170*06c3fb27SDimitry Andric /// 	RETURN dest
171*06c3fb27SDimitry Andric /// }
172*06c3fb27SDimitry Andric /// DEFINE P0(dword) {
173*06c3fb27SDimitry Andric /// 	RETURN dword ^ ROL32(dword, 9) ^ ROL32(dword, 17)
174*06c3fb27SDimitry Andric /// }
175*06c3fb27SDimitry Andric /// DEFINE FF(x,y,z, round){
176*06c3fb27SDimitry Andric /// 	IF round < 16
177*06c3fb27SDimitry Andric /// 		RETURN (x ^ y ^ z)
178*06c3fb27SDimitry Andric /// 	ELSE
179*06c3fb27SDimitry Andric /// 		RETURN (x & y) | (x & z) | (y & z)
180*06c3fb27SDimitry Andric /// 	FI
181*06c3fb27SDimitry Andric /// }
182*06c3fb27SDimitry Andric /// DEFINE GG(x, y, z, round){
183*06c3fb27SDimitry Andric ///   IF round < 16
184*06c3fb27SDimitry Andric ///   	RETURN (x ^ y ^ z)
185*06c3fb27SDimitry Andric ///   ELSE
186*06c3fb27SDimitry Andric ///   	RETURN (x & y) | (~x & z)
187*06c3fb27SDimitry Andric ///   FI
188*06c3fb27SDimitry Andric /// }
189*06c3fb27SDimitry Andric /// A[0] := __B.dword[3]
190*06c3fb27SDimitry Andric /// B[0] := __B.dword[2]
191*06c3fb27SDimitry Andric /// C[0] := __A.dword[3]
192*06c3fb27SDimitry Andric /// D[0] := __A.dword[2]
193*06c3fb27SDimitry Andric /// E[0] := __B.dword[1]
194*06c3fb27SDimitry Andric /// F[0] := __B.dword[0]
195*06c3fb27SDimitry Andric /// G[0] := __A.dword[1]
196*06c3fb27SDimitry Andric /// H[0] := __A.dword[0]
197*06c3fb27SDimitry Andric /// W[0] := __C.dword[0]
198*06c3fb27SDimitry Andric /// W[1] := __C.dword[1]
199*06c3fb27SDimitry Andric /// W[4] := __C.dword[2]
200*06c3fb27SDimitry Andric /// W[5] := __C.dword[3]
201*06c3fb27SDimitry Andric /// C[0] := ROL32(C[0], 9)
202*06c3fb27SDimitry Andric /// D[0] := ROL32(D[0], 9)
203*06c3fb27SDimitry Andric /// G[0] := ROL32(G[0], 19)
204*06c3fb27SDimitry Andric /// H[0] := ROL32(H[0], 19)
205*06c3fb27SDimitry Andric /// ROUND := __D & 0x3E
206*06c3fb27SDimitry Andric /// IF ROUND < 16
207*06c3fb27SDimitry Andric /// 	CONST := 0x79CC4519
208*06c3fb27SDimitry Andric /// ELSE
209*06c3fb27SDimitry Andric /// 	CONST := 0x7A879D8A
210*06c3fb27SDimitry Andric /// FI
211*06c3fb27SDimitry Andric /// CONST := ROL32(CONST,ROUND)
212*06c3fb27SDimitry Andric /// FOR i:= 0 to 1
213*06c3fb27SDimitry Andric /// 	S1 := ROL32((ROL32(A[i], 12) + E[i] + CONST), 7)
214*06c3fb27SDimitry Andric /// 	S2 := S1 ^ ROL32(A[i], 12)
215*06c3fb27SDimitry Andric /// 	T1 := FF(A[i], B[i], C[i], ROUND) + D[i] + S2 + (W[i] ^ W[i+4])
216*06c3fb27SDimitry Andric /// 	T2 := GG(E[i], F[i], G[i], ROUND) + H[i] + S1 + W[i]
217*06c3fb27SDimitry Andric /// 	D[i+1] := C[i]
218*06c3fb27SDimitry Andric /// 	C[i+1] := ROL32(B[i],9)
219*06c3fb27SDimitry Andric /// 	B[i+1] := A[i]
220*06c3fb27SDimitry Andric /// 	A[i+1] := T1
221*06c3fb27SDimitry Andric /// 	H[i+1] := G[i]
222*06c3fb27SDimitry Andric /// 	G[i+1] := ROL32(F[i], 19)
223*06c3fb27SDimitry Andric /// 	F[i+1] := E[i]
224*06c3fb27SDimitry Andric /// 	E[i+1] := P0(T2)
225*06c3fb27SDimitry Andric /// 	CONST := ROL32(CONST, 1)
226*06c3fb27SDimitry Andric /// ENDFOR
227*06c3fb27SDimitry Andric /// dst.dword[3] := A[2]
228*06c3fb27SDimitry Andric /// dst.dword[2] := B[2]
229*06c3fb27SDimitry Andric /// dst.dword[1] := E[2]
230*06c3fb27SDimitry Andric /// dst.dword[0] := F[2]
231*06c3fb27SDimitry Andric /// dst[MAX:128] := 0
232*06c3fb27SDimitry Andric /// \endcode
233*06c3fb27SDimitry Andric #define _mm_sm3rnds2_epi32(A, B, C, D)                                         \
234*06c3fb27SDimitry Andric   (__m128i) __builtin_ia32_vsm3rnds2((__v4su)A, (__v4su)B, (__v4su)C, (int)D)
235*06c3fb27SDimitry Andric 
236*06c3fb27SDimitry Andric #undef __DEFAULT_FN_ATTRS128
237*06c3fb27SDimitry Andric 
238*06c3fb27SDimitry Andric #endif // __SM3INTRIN_H
239