xref: /illumos-gate/usr/src/common/crypto/sha2/sha2.c (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1 /*
2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 /*
6  * Copyright 2013 Saso Kiselkov.  All rights reserved.
7  * Copyright 2024 Bill Sommerfeld <sommerfeld@hamachi.org>
8  */
9 
10 /*
11  * The basic framework for this code came from the reference
12  * implementation for MD5.  That implementation is Copyright (C)
13  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
14  *
15  * License to copy and use this software is granted provided that it
16  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
17  * Algorithm" in all material mentioning or referencing this software
18  * or this function.
19  *
20  * License is also granted to make and use derivative works provided
21  * that such works are identified as "derived from the RSA Data
22  * Security, Inc. MD5 Message-Digest Algorithm" in all material
23  * mentioning or referencing the derived work.
24  *
25  * RSA Data Security, Inc. makes no representations concerning either
26  * the merchantability of this software or the suitability of this
27  * software for any particular purpose. It is provided "as is"
28  * without express or implied warranty of any kind.
29  *
30  * These notices must be retained in any copies of any part of this
31  * documentation and/or software.
32  *
33  * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
34  * standard, available at
35  * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
36  * Not as fast as one would like -- further optimizations are encouraged
37  * and appreciated.
38  */
39 
40 #ifndef _KERNEL
41 #include <stdint.h>
42 #include <strings.h>
43 #include <stdlib.h>
44 #include <errno.h>
45 #endif /* _KERNEL */
46 
47 #include <sys/types.h>
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/sysmacros.h>
51 #define	_SHA2_IMPL
52 #include <sys/sha2.h>
53 #include <sys/sha2_consts.h>
54 
55 #ifdef _KERNEL
56 #include <sys/cmn_err.h>
57 
58 #else
59 #pragma weak SHA256Update = SHA2Update
60 #pragma weak SHA384Update = SHA2Update
61 #pragma weak SHA512Update = SHA2Update
62 
63 #pragma weak SHA256Final = SHA2Final
64 #pragma weak SHA384Final = SHA2Final
65 #pragma weak SHA512Final = SHA2Final
66 
67 #endif	/* _KERNEL */
68 
69 #ifdef _LITTLE_ENDIAN
70 #include <sys/byteorder.h>
71 #define	HAVE_HTONL
72 #endif
73 
74 static void Encode(uint8_t *, uint32_t *, size_t);
75 static void Encode64(uint8_t *, uint64_t *, size_t);
76 
77 #if	defined(__amd64)
78 #define	SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
79 #define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
80 
81 void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
82 void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
83 
84 #else
85 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
86 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
87 #endif	/* __amd64 */
88 
89 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
90 
91 /* Ch and Maj are the basic SHA2 functions. */
92 #define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
93 #define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
94 
95 /* Rotates x right n bits. */
96 #define	ROTR(x, n)	\
97 	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
98 
99 /* Shift x right n bits */
100 #define	SHR(x, n)	((x) >> (n))
101 
102 /* SHA256 Functions */
103 #define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
104 #define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
105 #define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
106 #define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
107 
108 #define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
109 	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
110 	d += T1;							\
111 	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
112 	h = T1 + T2
113 
114 /* SHA384/512 Functions */
115 #define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
116 #define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
117 #define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
118 #define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
119 #define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
120 	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
121 	d += T1;							\
122 	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
123 	h = T1 + T2
124 
125 /*
126  * sparc optimization:
127  *
128  * on the sparc, we can load big endian 32-bit data easily.  note that
129  * special care must be taken to ensure the address is 32-bit aligned.
130  * in the interest of speed, we don't check to make sure, since
131  * careful programming can guarantee this for us.
132  */
133 
134 #if	defined(_BIG_ENDIAN)
135 #define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
136 #define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
137 
138 #elif	defined(HAVE_HTONL)
139 #define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
140 #define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
141 
142 #else
143 /* little endian -- will work on big endian, but slowly */
144 #define	LOAD_BIG_32(addr)	\
145 	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
146 #define	LOAD_BIG_64(addr)	\
147 	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
148 	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
149 	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
150 	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
151 #endif	/* _BIG_ENDIAN */
152 
153 
154 #if	!defined(__amd64)
155 /* SHA256 Transform */
156 
157 static void
158 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
159 {
160 	uint32_t a = ctx->state.s32[0];
161 	uint32_t b = ctx->state.s32[1];
162 	uint32_t c = ctx->state.s32[2];
163 	uint32_t d = ctx->state.s32[3];
164 	uint32_t e = ctx->state.s32[4];
165 	uint32_t f = ctx->state.s32[5];
166 	uint32_t g = ctx->state.s32[6];
167 	uint32_t h = ctx->state.s32[7];
168 
169 	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
170 	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
171 	uint32_t T1, T2;
172 
173 #if	defined(__sparc)
174 	static const uint32_t sha256_consts[] = {
175 		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
176 		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
177 		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
178 		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
179 		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
180 		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
181 		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
182 		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
183 		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
184 		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
185 		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
186 		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
187 		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
188 		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
189 		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
190 		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
191 		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
192 		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
193 		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
194 		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
195 		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
196 		SHA256_CONST_63
197 	};
198 #endif	/* __sparc */
199 
200 	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
201 		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
202 		blk = (uint8_t *)ctx->buf_un.buf32;
203 	}
204 
205 	/* LINTED E_BAD_PTR_CAST_ALIGN */
206 	w0 =  LOAD_BIG_32(blk + 4 * 0);
207 	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
208 	/* LINTED E_BAD_PTR_CAST_ALIGN */
209 	w1 =  LOAD_BIG_32(blk + 4 * 1);
210 	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
211 	/* LINTED E_BAD_PTR_CAST_ALIGN */
212 	w2 =  LOAD_BIG_32(blk + 4 * 2);
213 	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
214 	/* LINTED E_BAD_PTR_CAST_ALIGN */
215 	w3 =  LOAD_BIG_32(blk + 4 * 3);
216 	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
217 	/* LINTED E_BAD_PTR_CAST_ALIGN */
218 	w4 =  LOAD_BIG_32(blk + 4 * 4);
219 	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
220 	/* LINTED E_BAD_PTR_CAST_ALIGN */
221 	w5 =  LOAD_BIG_32(blk + 4 * 5);
222 	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
223 	/* LINTED E_BAD_PTR_CAST_ALIGN */
224 	w6 =  LOAD_BIG_32(blk + 4 * 6);
225 	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
226 	/* LINTED E_BAD_PTR_CAST_ALIGN */
227 	w7 =  LOAD_BIG_32(blk + 4 * 7);
228 	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
229 	/* LINTED E_BAD_PTR_CAST_ALIGN */
230 	w8 =  LOAD_BIG_32(blk + 4 * 8);
231 	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
232 	/* LINTED E_BAD_PTR_CAST_ALIGN */
233 	w9 =  LOAD_BIG_32(blk + 4 * 9);
234 	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
235 	/* LINTED E_BAD_PTR_CAST_ALIGN */
236 	w10 =  LOAD_BIG_32(blk + 4 * 10);
237 	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
238 	/* LINTED E_BAD_PTR_CAST_ALIGN */
239 	w11 =  LOAD_BIG_32(blk + 4 * 11);
240 	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
241 	/* LINTED E_BAD_PTR_CAST_ALIGN */
242 	w12 =  LOAD_BIG_32(blk + 4 * 12);
243 	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
244 	/* LINTED E_BAD_PTR_CAST_ALIGN */
245 	w13 =  LOAD_BIG_32(blk + 4 * 13);
246 	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
247 	/* LINTED E_BAD_PTR_CAST_ALIGN */
248 	w14 =  LOAD_BIG_32(blk + 4 * 14);
249 	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
250 	/* LINTED E_BAD_PTR_CAST_ALIGN */
251 	w15 =  LOAD_BIG_32(blk + 4 * 15);
252 	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
253 
254 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
255 	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
256 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
257 	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
258 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
259 	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
260 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
261 	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
262 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
263 	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
264 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
265 	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
266 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
267 	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
268 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
269 	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
270 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
271 	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
272 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
273 	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
274 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
275 	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
276 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
277 	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
278 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
279 	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
280 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
281 	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
282 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
283 	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
284 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
285 	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
286 
287 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
288 	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
289 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
290 	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
291 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
292 	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
293 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
294 	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
295 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
296 	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
297 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
298 	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
299 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
300 	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
301 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
302 	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
303 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
304 	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
305 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
306 	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
307 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
308 	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
309 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
310 	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
311 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
312 	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
313 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
314 	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
315 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
316 	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
317 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
318 	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
319 
320 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
321 	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
322 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
323 	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
324 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
325 	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
326 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
327 	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
328 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
329 	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
330 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
331 	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
332 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
333 	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
334 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
335 	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
336 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
337 	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
338 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
339 	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
340 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
341 	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
342 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
343 	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
344 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
345 	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
346 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
347 	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
348 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
349 	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
350 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
351 	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
352 
353 	ctx->state.s32[0] += a;
354 	ctx->state.s32[1] += b;
355 	ctx->state.s32[2] += c;
356 	ctx->state.s32[3] += d;
357 	ctx->state.s32[4] += e;
358 	ctx->state.s32[5] += f;
359 	ctx->state.s32[6] += g;
360 	ctx->state.s32[7] += h;
361 }
362 
363 
364 /* SHA384 and SHA512 Transform */
365 
366 static void
367 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
368 {
369 
370 	uint64_t a = ctx->state.s64[0];
371 	uint64_t b = ctx->state.s64[1];
372 	uint64_t c = ctx->state.s64[2];
373 	uint64_t d = ctx->state.s64[3];
374 	uint64_t e = ctx->state.s64[4];
375 	uint64_t f = ctx->state.s64[5];
376 	uint64_t g = ctx->state.s64[6];
377 	uint64_t h = ctx->state.s64[7];
378 
379 	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
380 	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
381 	uint64_t T1, T2;
382 
383 #if	defined(__sparc)
384 	static const uint64_t sha512_consts[] = {
385 		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
386 		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
387 		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
388 		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
389 		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
390 		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
391 		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
392 		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
393 		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
394 		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
395 		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
396 		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
397 		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
398 		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
399 		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
400 		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
401 		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
402 		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
403 		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
404 		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
405 		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
406 		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
407 		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
408 		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
409 		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
410 		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
411 		SHA512_CONST_78, SHA512_CONST_79
412 	};
413 #endif	/* __sparc */
414 
415 
416 	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
417 		bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
418 		blk = (uint8_t *)ctx->buf_un.buf64;
419 	}
420 
421 	/* LINTED E_BAD_PTR_CAST_ALIGN */
422 	w0 =  LOAD_BIG_64(blk + 8 * 0);
423 	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
424 	/* LINTED E_BAD_PTR_CAST_ALIGN */
425 	w1 =  LOAD_BIG_64(blk + 8 * 1);
426 	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
427 	/* LINTED E_BAD_PTR_CAST_ALIGN */
428 	w2 =  LOAD_BIG_64(blk + 8 * 2);
429 	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
430 	/* LINTED E_BAD_PTR_CAST_ALIGN */
431 	w3 =  LOAD_BIG_64(blk + 8 * 3);
432 	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
433 	/* LINTED E_BAD_PTR_CAST_ALIGN */
434 	w4 =  LOAD_BIG_64(blk + 8 * 4);
435 	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
436 	/* LINTED E_BAD_PTR_CAST_ALIGN */
437 	w5 =  LOAD_BIG_64(blk + 8 * 5);
438 	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
439 	/* LINTED E_BAD_PTR_CAST_ALIGN */
440 	w6 =  LOAD_BIG_64(blk + 8 * 6);
441 	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
442 	/* LINTED E_BAD_PTR_CAST_ALIGN */
443 	w7 =  LOAD_BIG_64(blk + 8 * 7);
444 	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
445 	/* LINTED E_BAD_PTR_CAST_ALIGN */
446 	w8 =  LOAD_BIG_64(blk + 8 * 8);
447 	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
448 	/* LINTED E_BAD_PTR_CAST_ALIGN */
449 	w9 =  LOAD_BIG_64(blk + 8 * 9);
450 	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
451 	/* LINTED E_BAD_PTR_CAST_ALIGN */
452 	w10 =  LOAD_BIG_64(blk + 8 * 10);
453 	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
454 	/* LINTED E_BAD_PTR_CAST_ALIGN */
455 	w11 =  LOAD_BIG_64(blk + 8 * 11);
456 	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
457 	/* LINTED E_BAD_PTR_CAST_ALIGN */
458 	w12 =  LOAD_BIG_64(blk + 8 * 12);
459 	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
460 	/* LINTED E_BAD_PTR_CAST_ALIGN */
461 	w13 =  LOAD_BIG_64(blk + 8 * 13);
462 	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
463 	/* LINTED E_BAD_PTR_CAST_ALIGN */
464 	w14 =  LOAD_BIG_64(blk + 8 * 14);
465 	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
466 	/* LINTED E_BAD_PTR_CAST_ALIGN */
467 	w15 =  LOAD_BIG_64(blk + 8 * 15);
468 	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
469 
470 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
471 	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
472 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
473 	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
474 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
475 	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
476 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
477 	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
478 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
479 	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
480 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
481 	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
482 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
483 	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
484 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
485 	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
486 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
487 	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
488 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
489 	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
490 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
491 	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
492 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
493 	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
494 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
495 	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
496 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
497 	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
498 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
499 	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
500 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
501 	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
502 
503 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
504 	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
505 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
506 	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
507 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
508 	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
509 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
510 	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
511 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
512 	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
513 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
514 	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
515 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
516 	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
517 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
518 	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
519 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
520 	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
521 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
522 	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
523 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
524 	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
525 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
526 	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
527 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
528 	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
529 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
530 	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
531 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
532 	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
533 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
534 	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
535 
536 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
537 	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
538 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
539 	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
540 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
541 	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
542 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
543 	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
544 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
545 	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
546 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
547 	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
548 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
549 	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
550 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
551 	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
552 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
553 	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
554 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
555 	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
556 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
557 	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
558 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
559 	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
560 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
561 	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
562 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
563 	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
564 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
565 	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
566 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
567 	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
568 
569 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
570 	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
571 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
572 	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
573 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
574 	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
575 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
576 	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
577 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
578 	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
579 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
580 	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
581 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
582 	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
583 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
584 	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
585 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
586 	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
587 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
588 	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
589 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
590 	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
591 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
592 	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
593 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
594 	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
595 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
596 	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
597 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
598 	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
599 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
600 	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
601 
602 	ctx->state.s64[0] += a;
603 	ctx->state.s64[1] += b;
604 	ctx->state.s64[2] += c;
605 	ctx->state.s64[3] += d;
606 	ctx->state.s64[4] += e;
607 	ctx->state.s64[5] += f;
608 	ctx->state.s64[6] += g;
609 	ctx->state.s64[7] += h;
610 
611 }
612 #endif	/* !__amd64 */
613 
614 
615 /*
616  * Encode()
617  *
618  * purpose: to convert a list of numbers from little endian to big endian
619  *   input: uint8_t *	: place to store the converted big endian numbers
620  *	    uint32_t *	: place to get numbers to convert from
621  *          size_t	: the length of the input in bytes
622  *  output: void
623  */
624 
625 static void
626 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
627     size_t len)
628 {
629 	size_t		i, j;
630 
631 #if	defined(__sparc)
632 	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
633 		for (i = 0, j = 0; j < len; i++, j += 4) {
634 			/* LINTED E_BAD_PTR_CAST_ALIGN */
635 			*((uint32_t *)(output + j)) = input[i];
636 		}
637 	} else {
638 #endif	/* little endian -- will work on big endian, but slowly */
639 		for (i = 0, j = 0; j < len; i++, j += 4) {
640 			output[j]	= (input[i] >> 24) & 0xff;
641 			output[j + 1]	= (input[i] >> 16) & 0xff;
642 			output[j + 2]	= (input[i] >>  8) & 0xff;
643 			output[j + 3]	= input[i] & 0xff;
644 		}
645 #if	defined(__sparc)
646 	}
647 #endif
648 }
649 
650 static void
651 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
652     size_t len)
653 {
654 	size_t		i, j;
655 
656 #if	defined(__sparc)
657 	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
658 		for (i = 0, j = 0; j < len; i++, j += 8) {
659 			/* LINTED E_BAD_PTR_CAST_ALIGN */
660 			*((uint64_t *)(output + j)) = input[i];
661 		}
662 	} else {
663 #endif	/* little endian -- will work on big endian, but slowly */
664 		for (i = 0, j = 0; j < len; i++, j += 8) {
665 
666 			output[j]	= (input[i] >> 56) & 0xff;
667 			output[j + 1]	= (input[i] >> 48) & 0xff;
668 			output[j + 2]	= (input[i] >> 40) & 0xff;
669 			output[j + 3]	= (input[i] >> 32) & 0xff;
670 			output[j + 4]	= (input[i] >> 24) & 0xff;
671 			output[j + 5]	= (input[i] >> 16) & 0xff;
672 			output[j + 6]	= (input[i] >>  8) & 0xff;
673 			output[j + 7]	= input[i] & 0xff;
674 		}
675 #if	defined(__sparc)
676 	}
677 #endif
678 }
679 
680 
681 void
682 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
683 {
684 
685 	switch (mech) {
686 	case SHA256_MECH_INFO_TYPE:
687 	case SHA256_HMAC_MECH_INFO_TYPE:
688 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
689 		ctx->state.s32[0] = 0x6a09e667U;
690 		ctx->state.s32[1] = 0xbb67ae85U;
691 		ctx->state.s32[2] = 0x3c6ef372U;
692 		ctx->state.s32[3] = 0xa54ff53aU;
693 		ctx->state.s32[4] = 0x510e527fU;
694 		ctx->state.s32[5] = 0x9b05688cU;
695 		ctx->state.s32[6] = 0x1f83d9abU;
696 		ctx->state.s32[7] = 0x5be0cd19U;
697 		break;
698 	case SHA384_MECH_INFO_TYPE:
699 	case SHA384_HMAC_MECH_INFO_TYPE:
700 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
701 		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
702 		ctx->state.s64[1] = 0x629a292a367cd507ULL;
703 		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
704 		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
705 		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
706 		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
707 		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
708 		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
709 		break;
710 	case SHA512_MECH_INFO_TYPE:
711 	case SHA512_HMAC_MECH_INFO_TYPE:
712 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
713 		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
714 		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
715 		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
716 		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
717 		ctx->state.s64[4] = 0x510e527fade682d1ULL;
718 		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
719 		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
720 		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
721 		break;
722 	case SHA512_224_MECH_INFO_TYPE:
723 		ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
724 		ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
725 		ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
726 		ctx->state.s64[3] = 0x679DD514582F9FCFULL;
727 		ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
728 		ctx->state.s64[5] = 0x77E36F7304C48942ULL;
729 		ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
730 		ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
731 		break;
732 	case SHA512_256_MECH_INFO_TYPE:
733 		ctx->state.s64[0] = 0x22312194FC2BF72CULL;
734 		ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
735 		ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
736 		ctx->state.s64[3] = 0x963877195940EABDULL;
737 		ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
738 		ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
739 		ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
740 		ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
741 		break;
742 #ifdef _KERNEL
743 	default:
744 		cmn_err(CE_PANIC,
745 		    "sha2_init: failed to find a supported algorithm: 0x%x",
746 		    (uint32_t)mech);
747 
748 #endif /* _KERNEL */
749 	}
750 
751 	ctx->algotype = (uint32_t)mech;
752 	ctx->count.c64[0] = ctx->count.c64[1] = 0;
753 }
754 
755 #ifndef _KERNEL
756 
757 #pragma inline(SHA256Init, SHA384Init, SHA512Init)
758 void
759 SHA256Init(SHA256_CTX *ctx)
760 {
761 	SHA2Init(SHA256, ctx);
762 }
763 
764 void
765 SHA384Init(SHA384_CTX *ctx)
766 {
767 	SHA2Init(SHA384, ctx);
768 }
769 
770 void
771 SHA512Init(SHA512_CTX *ctx)
772 {
773 	SHA2Init(SHA512, ctx);
774 }
775 
776 #endif /* _KERNEL */
777 
778 /*
779  * SHA2Update()
780  *
781  * purpose: continues an sha2 digest operation, using the message block
782  *          to update the context.
783  *   input: SHA2_CTX *	: the context to update
784  *          void *	: the message block
785  *          size_t      : the length of the message block, in bytes
786  *  output: void
787  */
788 
789 void
790 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
791 {
792 	size_t		i, buf_index, buf_len, buf_limit;
793 	const uint8_t	*input = inptr;
794 	uint32_t	algotype = ctx->algotype;
795 #if defined(__amd64)
796 	size_t		block_count;
797 #endif	/* !__amd64 */
798 
799 
800 	/* check for noop */
801 	if (input_len == 0)
802 		return;
803 
804 	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
805 		/*
806 		 * Extract low 32 bits of input_len; when we adjust
807 		 * count.c32[0] we must fold in the carry from the
808 		 * addition of the low bits along with the nonzero
809 		 * upper bits (if any) from input_len.
810 		 */
811 		uint32_t il = input_len & UINT32_MAX;
812 
813 		il = il << 3;
814 		buf_limit = 64;
815 
816 		/* compute number of bytes mod 64 */
817 		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
818 
819 		/* update number of bits */
820 		if ((ctx->count.c32[1] += il) < il)
821 			ctx->count.c32[0]++;
822 
823 		ctx->count.c32[0] += (input_len >> 29);
824 
825 	} else {
826 		uint64_t il = input_len;
827 
828 		il = il << 3;
829 		buf_limit = 128;
830 
831 		/* compute number of bytes mod 128 */
832 		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
833 
834 		/* update number of bits */
835 		if ((ctx->count.c64[1] += il) < il)
836 			ctx->count.c64[0]++;
837 
838 		ctx->count.c64[0] += ((uintmax_t)input_len >> 61);
839 	}
840 
841 	buf_len = buf_limit - buf_index;
842 
843 	/* transform as many times as possible */
844 	i = 0;
845 	if (input_len >= buf_len) {
846 
847 		/*
848 		 * general optimization:
849 		 *
850 		 * only do initial bcopy() and SHA2Transform() if
851 		 * buf_index != 0.  if buf_index == 0, we're just
852 		 * wasting our time doing the bcopy() since there
853 		 * wasn't any data left over from a previous call to
854 		 * SHA2Update().
855 		 */
856 		if (buf_index) {
857 			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
858 			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
859 				SHA256Transform(ctx, ctx->buf_un.buf8);
860 			else
861 				SHA512Transform(ctx, ctx->buf_un.buf8);
862 
863 			i = buf_len;
864 		}
865 
866 #if !defined(__amd64)
867 		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
868 			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
869 				SHA256Transform(ctx, &input[i]);
870 			}
871 		} else {
872 			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
873 				SHA512Transform(ctx, &input[i]);
874 			}
875 		}
876 
877 #else
878 		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
879 			block_count = (input_len - i) >> 6;
880 			if (block_count > 0) {
881 				SHA256TransformBlocks(ctx, &input[i],
882 				    block_count);
883 				i += block_count << 6;
884 			}
885 		} else {
886 			block_count = (input_len - i) >> 7;
887 			if (block_count > 0) {
888 				SHA512TransformBlocks(ctx, &input[i],
889 				    block_count);
890 				i += block_count << 7;
891 			}
892 		}
893 #endif	/* !__amd64 */
894 
895 		/*
896 		 * general optimization:
897 		 *
898 		 * if i and input_len are the same, return now instead
899 		 * of calling bcopy(), since the bcopy() in this case
900 		 * will be an expensive noop.
901 		 */
902 
903 		if (input_len == i)
904 			return;
905 
906 		buf_index = 0;
907 	}
908 
909 	/* buffer remaining input */
910 	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
911 }
912 
913 
914 /*
915  * SHA2Final()
916  *
917  * purpose: ends an sha2 digest operation, finalizing the message digest and
918  *          zeroing the context.
919  *   input: uchar_t *	: a buffer to store the digest
920  *			: The function actually uses void* because many
921  *			: callers pass things other than uchar_t here.
922  *          SHA2_CTX *  : the context to finalize, save, and zero
923  *  output: void
924  */
925 
926 void
927 SHA2Final(void *digest, SHA2_CTX *ctx)
928 {
929 	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
930 	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
931 	uint32_t	index;
932 	uint32_t	algotype = ctx->algotype;
933 
934 	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
935 		index  = (ctx->count.c32[1] >> 3) & 0x3f;
936 		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
937 		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
938 		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
939 		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
940 	} else {
941 		index  = (ctx->count.c64[1] >> 3) & 0x7f;
942 		Encode64(bitcount_be64, ctx->count.c64,
943 		    sizeof (bitcount_be64));
944 		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
945 		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
946 		if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
947 			ctx->state.s64[6] = ctx->state.s64[7] = 0;
948 			Encode64(digest, ctx->state.s64,
949 			    sizeof (uint64_t) * 6);
950 		} else if (algotype == SHA512_224_MECH_INFO_TYPE) {
951 			uint8_t last[sizeof (uint64_t)];
952 			/*
953 			 * Since SHA-512/224 doesn't align well to 64-bit
954 			 * boundaries, we must do the encoding in three steps:
955 			 * 1) encode the three 64-bit words that fit neatly
956 			 * 2) encode the last 64-bit word to a temp buffer
957 			 * 3) chop out the lower 32-bits from the temp buffer
958 			 *    and append them to the digest
959 			 */
960 			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
961 			Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
962 			bcopy(last, (uint8_t *)digest + 24, 4);
963 		} else if (algotype == SHA512_256_MECH_INFO_TYPE) {
964 			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
965 		} else {
966 			Encode64(digest, ctx->state.s64,
967 			    sizeof (ctx->state.s64));
968 		}
969 	}
970 
971 	/* zeroize sensitive information */
972 	bzero(ctx, sizeof (*ctx));
973 }
974