xref: /illumos-gate/usr/src/common/crypto/sha2/sha2.c (revision eb6b10e69fa5ba733da194d3ad71a0e63338be29)
1 /*
2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 /*
6  * Copyright 2013 Saso Kiselkov.  All rights reserved.
7  */
8 
9 /*
10  * The basic framework for this code came from the reference
11  * implementation for MD5.  That implementation is Copyright (C)
12  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
13  *
14  * License to copy and use this software is granted provided that it
15  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
16  * Algorithm" in all material mentioning or referencing this software
17  * or this function.
18  *
19  * License is also granted to make and use derivative works provided
20  * that such works are identified as "derived from the RSA Data
21  * Security, Inc. MD5 Message-Digest Algorithm" in all material
22  * mentioning or referencing the derived work.
23  *
24  * RSA Data Security, Inc. makes no representations concerning either
25  * the merchantability of this software or the suitability of this
26  * software for any particular purpose. It is provided "as is"
27  * without express or implied warranty of any kind.
28  *
29  * These notices must be retained in any copies of any part of this
30  * documentation and/or software.
31  *
32  * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
33  * standard, available at
34  * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
35  * Not as fast as one would like -- further optimizations are encouraged
36  * and appreciated.
37  */
38 
39 #ifndef _KERNEL
40 #include <stdint.h>
41 #include <strings.h>
42 #include <stdlib.h>
43 #include <errno.h>
44 #endif /* _KERNEL */
45 
46 #include <sys/types.h>
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/sysmacros.h>
50 #define	_SHA2_IMPL
51 #include <sys/sha2.h>
52 #include <sys/sha2_consts.h>
53 
54 #ifdef _KERNEL
55 #include <sys/cmn_err.h>
56 
57 #else
58 #pragma weak SHA256Update = SHA2Update
59 #pragma weak SHA384Update = SHA2Update
60 #pragma weak SHA512Update = SHA2Update
61 
62 #pragma weak SHA256Final = SHA2Final
63 #pragma weak SHA384Final = SHA2Final
64 #pragma weak SHA512Final = SHA2Final
65 
66 #endif	/* _KERNEL */
67 
68 #ifdef _LITTLE_ENDIAN
69 #include <sys/byteorder.h>
70 #define	HAVE_HTONL
71 #endif
72 
73 static void Encode(uint8_t *, uint32_t *, size_t);
74 static void Encode64(uint8_t *, uint64_t *, size_t);
75 
76 #if	defined(__amd64)
77 #define	SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
78 #define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
79 
80 void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
81 void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
82 
83 #else
84 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
85 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
86 #endif	/* __amd64 */
87 
88 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
89 
90 /* Ch and Maj are the basic SHA2 functions. */
91 #define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
92 #define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
93 
94 /* Rotates x right n bits. */
95 #define	ROTR(x, n)	\
96 	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
97 
98 /* Shift x right n bits */
99 #define	SHR(x, n)	((x) >> (n))
100 
101 /* SHA256 Functions */
102 #define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
103 #define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
104 #define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
105 #define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
106 
107 #define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
108 	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
109 	d += T1;							\
110 	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
111 	h = T1 + T2
112 
113 /* SHA384/512 Functions */
114 #define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
115 #define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
116 #define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
117 #define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
118 #define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
119 	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
120 	d += T1;							\
121 	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
122 	h = T1 + T2
123 
124 /*
125  * sparc optimization:
126  *
127  * on the sparc, we can load big endian 32-bit data easily.  note that
128  * special care must be taken to ensure the address is 32-bit aligned.
129  * in the interest of speed, we don't check to make sure, since
130  * careful programming can guarantee this for us.
131  */
132 
133 #if	defined(_BIG_ENDIAN)
134 #define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
135 #define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
136 
137 #elif	defined(HAVE_HTONL)
138 #define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
139 #define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
140 
141 #else
142 /* little endian -- will work on big endian, but slowly */
143 #define	LOAD_BIG_32(addr)	\
144 	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
145 #define	LOAD_BIG_64(addr)	\
146 	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
147 	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
148 	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
149 	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
150 #endif	/* _BIG_ENDIAN */
151 
152 
153 #if	!defined(__amd64)
154 /* SHA256 Transform */
155 
156 static void
157 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
158 {
159 	uint32_t a = ctx->state.s32[0];
160 	uint32_t b = ctx->state.s32[1];
161 	uint32_t c = ctx->state.s32[2];
162 	uint32_t d = ctx->state.s32[3];
163 	uint32_t e = ctx->state.s32[4];
164 	uint32_t f = ctx->state.s32[5];
165 	uint32_t g = ctx->state.s32[6];
166 	uint32_t h = ctx->state.s32[7];
167 
168 	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
169 	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
170 	uint32_t T1, T2;
171 
172 #if	defined(__sparc)
173 	static const uint32_t sha256_consts[] = {
174 		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
175 		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
176 		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
177 		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
178 		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
179 		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
180 		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
181 		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
182 		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
183 		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
184 		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
185 		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
186 		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
187 		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
188 		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
189 		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
190 		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
191 		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
192 		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
193 		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
194 		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
195 		SHA256_CONST_63
196 	};
197 #endif	/* __sparc */
198 
199 	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
200 		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
201 		blk = (uint8_t *)ctx->buf_un.buf32;
202 	}
203 
204 	/* LINTED E_BAD_PTR_CAST_ALIGN */
205 	w0 =  LOAD_BIG_32(blk + 4 * 0);
206 	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
207 	/* LINTED E_BAD_PTR_CAST_ALIGN */
208 	w1 =  LOAD_BIG_32(blk + 4 * 1);
209 	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
210 	/* LINTED E_BAD_PTR_CAST_ALIGN */
211 	w2 =  LOAD_BIG_32(blk + 4 * 2);
212 	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
213 	/* LINTED E_BAD_PTR_CAST_ALIGN */
214 	w3 =  LOAD_BIG_32(blk + 4 * 3);
215 	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
216 	/* LINTED E_BAD_PTR_CAST_ALIGN */
217 	w4 =  LOAD_BIG_32(blk + 4 * 4);
218 	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
219 	/* LINTED E_BAD_PTR_CAST_ALIGN */
220 	w5 =  LOAD_BIG_32(blk + 4 * 5);
221 	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
222 	/* LINTED E_BAD_PTR_CAST_ALIGN */
223 	w6 =  LOAD_BIG_32(blk + 4 * 6);
224 	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
225 	/* LINTED E_BAD_PTR_CAST_ALIGN */
226 	w7 =  LOAD_BIG_32(blk + 4 * 7);
227 	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
228 	/* LINTED E_BAD_PTR_CAST_ALIGN */
229 	w8 =  LOAD_BIG_32(blk + 4 * 8);
230 	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
231 	/* LINTED E_BAD_PTR_CAST_ALIGN */
232 	w9 =  LOAD_BIG_32(blk + 4 * 9);
233 	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
234 	/* LINTED E_BAD_PTR_CAST_ALIGN */
235 	w10 =  LOAD_BIG_32(blk + 4 * 10);
236 	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
237 	/* LINTED E_BAD_PTR_CAST_ALIGN */
238 	w11 =  LOAD_BIG_32(blk + 4 * 11);
239 	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
240 	/* LINTED E_BAD_PTR_CAST_ALIGN */
241 	w12 =  LOAD_BIG_32(blk + 4 * 12);
242 	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
243 	/* LINTED E_BAD_PTR_CAST_ALIGN */
244 	w13 =  LOAD_BIG_32(blk + 4 * 13);
245 	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
246 	/* LINTED E_BAD_PTR_CAST_ALIGN */
247 	w14 =  LOAD_BIG_32(blk + 4 * 14);
248 	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
249 	/* LINTED E_BAD_PTR_CAST_ALIGN */
250 	w15 =  LOAD_BIG_32(blk + 4 * 15);
251 	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
252 
253 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
254 	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
255 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
256 	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
257 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
258 	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
259 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
260 	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
261 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
262 	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
263 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
264 	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
265 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
266 	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
267 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
268 	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
269 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
270 	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
271 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
272 	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
273 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
274 	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
275 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
276 	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
277 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
278 	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
279 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
280 	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
281 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
282 	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
283 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
284 	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
285 
286 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
287 	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
288 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
289 	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
290 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
291 	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
292 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
293 	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
294 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
295 	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
296 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
297 	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
298 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
299 	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
300 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
301 	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
302 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
303 	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
304 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
305 	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
306 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
307 	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
308 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
309 	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
310 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
311 	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
312 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
313 	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
314 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
315 	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
316 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
317 	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
318 
319 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
320 	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
321 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
322 	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
323 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
324 	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
325 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
326 	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
327 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
328 	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
329 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
330 	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
331 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
332 	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
333 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
334 	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
335 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
336 	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
337 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
338 	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
339 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
340 	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
341 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
342 	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
343 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
344 	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
345 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
346 	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
347 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
348 	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
349 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
350 	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
351 
352 	ctx->state.s32[0] += a;
353 	ctx->state.s32[1] += b;
354 	ctx->state.s32[2] += c;
355 	ctx->state.s32[3] += d;
356 	ctx->state.s32[4] += e;
357 	ctx->state.s32[5] += f;
358 	ctx->state.s32[6] += g;
359 	ctx->state.s32[7] += h;
360 }
361 
362 
363 /* SHA384 and SHA512 Transform */
364 
365 static void
366 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
367 {
368 
369 	uint64_t a = ctx->state.s64[0];
370 	uint64_t b = ctx->state.s64[1];
371 	uint64_t c = ctx->state.s64[2];
372 	uint64_t d = ctx->state.s64[3];
373 	uint64_t e = ctx->state.s64[4];
374 	uint64_t f = ctx->state.s64[5];
375 	uint64_t g = ctx->state.s64[6];
376 	uint64_t h = ctx->state.s64[7];
377 
378 	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
379 	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
380 	uint64_t T1, T2;
381 
382 #if	defined(__sparc)
383 	static const uint64_t sha512_consts[] = {
384 		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
385 		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
386 		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
387 		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
388 		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
389 		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
390 		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
391 		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
392 		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
393 		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
394 		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
395 		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
396 		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
397 		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
398 		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
399 		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
400 		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
401 		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
402 		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
403 		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
404 		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
405 		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
406 		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
407 		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
408 		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
409 		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
410 		SHA512_CONST_78, SHA512_CONST_79
411 	};
412 #endif	/* __sparc */
413 
414 
415 	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
416 		bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
417 		blk = (uint8_t *)ctx->buf_un.buf64;
418 	}
419 
420 	/* LINTED E_BAD_PTR_CAST_ALIGN */
421 	w0 =  LOAD_BIG_64(blk + 8 * 0);
422 	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
423 	/* LINTED E_BAD_PTR_CAST_ALIGN */
424 	w1 =  LOAD_BIG_64(blk + 8 * 1);
425 	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
426 	/* LINTED E_BAD_PTR_CAST_ALIGN */
427 	w2 =  LOAD_BIG_64(blk + 8 * 2);
428 	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
429 	/* LINTED E_BAD_PTR_CAST_ALIGN */
430 	w3 =  LOAD_BIG_64(blk + 8 * 3);
431 	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
432 	/* LINTED E_BAD_PTR_CAST_ALIGN */
433 	w4 =  LOAD_BIG_64(blk + 8 * 4);
434 	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
435 	/* LINTED E_BAD_PTR_CAST_ALIGN */
436 	w5 =  LOAD_BIG_64(blk + 8 * 5);
437 	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
438 	/* LINTED E_BAD_PTR_CAST_ALIGN */
439 	w6 =  LOAD_BIG_64(blk + 8 * 6);
440 	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
441 	/* LINTED E_BAD_PTR_CAST_ALIGN */
442 	w7 =  LOAD_BIG_64(blk + 8 * 7);
443 	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
444 	/* LINTED E_BAD_PTR_CAST_ALIGN */
445 	w8 =  LOAD_BIG_64(blk + 8 * 8);
446 	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
447 	/* LINTED E_BAD_PTR_CAST_ALIGN */
448 	w9 =  LOAD_BIG_64(blk + 8 * 9);
449 	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
450 	/* LINTED E_BAD_PTR_CAST_ALIGN */
451 	w10 =  LOAD_BIG_64(blk + 8 * 10);
452 	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
453 	/* LINTED E_BAD_PTR_CAST_ALIGN */
454 	w11 =  LOAD_BIG_64(blk + 8 * 11);
455 	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
456 	/* LINTED E_BAD_PTR_CAST_ALIGN */
457 	w12 =  LOAD_BIG_64(blk + 8 * 12);
458 	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
459 	/* LINTED E_BAD_PTR_CAST_ALIGN */
460 	w13 =  LOAD_BIG_64(blk + 8 * 13);
461 	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
462 	/* LINTED E_BAD_PTR_CAST_ALIGN */
463 	w14 =  LOAD_BIG_64(blk + 8 * 14);
464 	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
465 	/* LINTED E_BAD_PTR_CAST_ALIGN */
466 	w15 =  LOAD_BIG_64(blk + 8 * 15);
467 	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
468 
469 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
470 	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
471 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
472 	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
473 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
474 	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
475 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
476 	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
477 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
478 	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
479 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
480 	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
481 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
482 	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
483 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
484 	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
485 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
486 	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
487 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
488 	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
489 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
490 	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
491 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
492 	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
493 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
494 	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
495 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
496 	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
497 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
498 	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
499 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
500 	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
501 
502 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
503 	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
504 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
505 	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
506 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
507 	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
508 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
509 	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
510 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
511 	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
512 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
513 	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
514 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
515 	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
516 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
517 	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
518 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
519 	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
520 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
521 	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
522 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
523 	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
524 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
525 	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
526 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
527 	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
528 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
529 	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
530 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
531 	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
532 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
533 	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
534 
535 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
536 	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
537 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
538 	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
539 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
540 	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
541 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
542 	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
543 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
544 	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
545 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
546 	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
547 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
548 	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
549 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
550 	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
551 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
552 	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
553 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
554 	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
555 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
556 	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
557 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
558 	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
559 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
560 	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
561 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
562 	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
563 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
564 	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
565 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
566 	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
567 
568 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
569 	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
570 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
571 	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
572 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
573 	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
574 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
575 	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
576 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
577 	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
578 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
579 	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
580 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
581 	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
582 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
583 	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
584 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
585 	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
586 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
587 	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
588 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
589 	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
590 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
591 	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
592 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
593 	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
594 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
595 	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
596 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
597 	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
598 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
599 	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
600 
601 	ctx->state.s64[0] += a;
602 	ctx->state.s64[1] += b;
603 	ctx->state.s64[2] += c;
604 	ctx->state.s64[3] += d;
605 	ctx->state.s64[4] += e;
606 	ctx->state.s64[5] += f;
607 	ctx->state.s64[6] += g;
608 	ctx->state.s64[7] += h;
609 
610 }
611 #endif	/* !__amd64 */
612 
613 
614 /*
615  * Encode()
616  *
617  * purpose: to convert a list of numbers from little endian to big endian
618  *   input: uint8_t *	: place to store the converted big endian numbers
619  *	    uint32_t *	: place to get numbers to convert from
620  *          size_t	: the length of the input in bytes
621  *  output: void
622  */
623 
624 static void
625 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
626     size_t len)
627 {
628 	size_t		i, j;
629 
630 #if	defined(__sparc)
631 	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
632 		for (i = 0, j = 0; j < len; i++, j += 4) {
633 			/* LINTED E_BAD_PTR_CAST_ALIGN */
634 			*((uint32_t *)(output + j)) = input[i];
635 		}
636 	} else {
637 #endif	/* little endian -- will work on big endian, but slowly */
638 		for (i = 0, j = 0; j < len; i++, j += 4) {
639 			output[j]	= (input[i] >> 24) & 0xff;
640 			output[j + 1]	= (input[i] >> 16) & 0xff;
641 			output[j + 2]	= (input[i] >>  8) & 0xff;
642 			output[j + 3]	= input[i] & 0xff;
643 		}
644 #if	defined(__sparc)
645 	}
646 #endif
647 }
648 
649 static void
650 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
651     size_t len)
652 {
653 	size_t		i, j;
654 
655 #if	defined(__sparc)
656 	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
657 		for (i = 0, j = 0; j < len; i++, j += 8) {
658 			/* LINTED E_BAD_PTR_CAST_ALIGN */
659 			*((uint64_t *)(output + j)) = input[i];
660 		}
661 	} else {
662 #endif	/* little endian -- will work on big endian, but slowly */
663 		for (i = 0, j = 0; j < len; i++, j += 8) {
664 
665 			output[j]	= (input[i] >> 56) & 0xff;
666 			output[j + 1]	= (input[i] >> 48) & 0xff;
667 			output[j + 2]	= (input[i] >> 40) & 0xff;
668 			output[j + 3]	= (input[i] >> 32) & 0xff;
669 			output[j + 4]	= (input[i] >> 24) & 0xff;
670 			output[j + 5]	= (input[i] >> 16) & 0xff;
671 			output[j + 6]	= (input[i] >>  8) & 0xff;
672 			output[j + 7]	= input[i] & 0xff;
673 		}
674 #if	defined(__sparc)
675 	}
676 #endif
677 }
678 
679 
680 void
681 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
682 {
683 
684 	switch (mech) {
685 	case SHA256_MECH_INFO_TYPE:
686 	case SHA256_HMAC_MECH_INFO_TYPE:
687 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
688 		ctx->state.s32[0] = 0x6a09e667U;
689 		ctx->state.s32[1] = 0xbb67ae85U;
690 		ctx->state.s32[2] = 0x3c6ef372U;
691 		ctx->state.s32[3] = 0xa54ff53aU;
692 		ctx->state.s32[4] = 0x510e527fU;
693 		ctx->state.s32[5] = 0x9b05688cU;
694 		ctx->state.s32[6] = 0x1f83d9abU;
695 		ctx->state.s32[7] = 0x5be0cd19U;
696 		break;
697 	case SHA384_MECH_INFO_TYPE:
698 	case SHA384_HMAC_MECH_INFO_TYPE:
699 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
700 		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
701 		ctx->state.s64[1] = 0x629a292a367cd507ULL;
702 		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
703 		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
704 		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
705 		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
706 		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
707 		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
708 		break;
709 	case SHA512_MECH_INFO_TYPE:
710 	case SHA512_HMAC_MECH_INFO_TYPE:
711 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
712 		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
713 		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
714 		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
715 		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
716 		ctx->state.s64[4] = 0x510e527fade682d1ULL;
717 		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
718 		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
719 		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
720 		break;
721 	case SHA512_224_MECH_INFO_TYPE:
722 		ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
723 		ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
724 		ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
725 		ctx->state.s64[3] = 0x679DD514582F9FCFULL;
726 		ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
727 		ctx->state.s64[5] = 0x77E36F7304C48942ULL;
728 		ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
729 		ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
730 		break;
731 	case SHA512_256_MECH_INFO_TYPE:
732 		ctx->state.s64[0] = 0x22312194FC2BF72CULL;
733 		ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
734 		ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
735 		ctx->state.s64[3] = 0x963877195940EABDULL;
736 		ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
737 		ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
738 		ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
739 		ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
740 		break;
741 #ifdef _KERNEL
742 	default:
743 		cmn_err(CE_PANIC,
744 		    "sha2_init: failed to find a supported algorithm: 0x%x",
745 		    (uint32_t)mech);
746 
747 #endif /* _KERNEL */
748 	}
749 
750 	ctx->algotype = (uint32_t)mech;
751 	ctx->count.c64[0] = ctx->count.c64[1] = 0;
752 }
753 
754 #ifndef _KERNEL
755 
756 #pragma inline(SHA256Init, SHA384Init, SHA512Init)
757 void
758 SHA256Init(SHA256_CTX *ctx)
759 {
760 	SHA2Init(SHA256, ctx);
761 }
762 
763 void
764 SHA384Init(SHA384_CTX *ctx)
765 {
766 	SHA2Init(SHA384, ctx);
767 }
768 
769 void
770 SHA512Init(SHA512_CTX *ctx)
771 {
772 	SHA2Init(SHA512, ctx);
773 }
774 
775 #endif /* _KERNEL */
776 
777 /*
778  * SHA2Update()
779  *
780  * purpose: continues an sha2 digest operation, using the message block
781  *          to update the context.
782  *   input: SHA2_CTX *	: the context to update
783  *          void *	: the message block
784  *          size_t      : the length of the message block, in bytes
785  *  output: void
786  */
787 
788 void
789 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
790 {
791 	uint32_t	i, buf_index, buf_len, buf_limit;
792 	const uint8_t	*input = inptr;
793 	uint32_t	algotype = ctx->algotype;
794 #if defined(__amd64)
795 	uint32_t	block_count;
796 #endif	/* !__amd64 */
797 
798 
799 	/* check for noop */
800 	if (input_len == 0)
801 		return;
802 
803 	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
804 		buf_limit = 64;
805 
806 		/* compute number of bytes mod 64 */
807 		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
808 
809 		/* update number of bits */
810 		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
811 			ctx->count.c32[0]++;
812 
813 		ctx->count.c32[0] += (input_len >> 29);
814 
815 	} else {
816 		buf_limit = 128;
817 
818 		/* compute number of bytes mod 128 */
819 		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
820 
821 		/* update number of bits */
822 		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
823 			ctx->count.c64[0]++;
824 
825 		ctx->count.c64[0] += (input_len >> 29);
826 	}
827 
828 	buf_len = buf_limit - buf_index;
829 
830 	/* transform as many times as possible */
831 	i = 0;
832 	if (input_len >= buf_len) {
833 
834 		/*
835 		 * general optimization:
836 		 *
837 		 * only do initial bcopy() and SHA2Transform() if
838 		 * buf_index != 0.  if buf_index == 0, we're just
839 		 * wasting our time doing the bcopy() since there
840 		 * wasn't any data left over from a previous call to
841 		 * SHA2Update().
842 		 */
843 		if (buf_index) {
844 			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
845 			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
846 				SHA256Transform(ctx, ctx->buf_un.buf8);
847 			else
848 				SHA512Transform(ctx, ctx->buf_un.buf8);
849 
850 			i = buf_len;
851 		}
852 
853 #if !defined(__amd64)
854 		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
855 			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
856 				SHA256Transform(ctx, &input[i]);
857 			}
858 		} else {
859 			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
860 				SHA512Transform(ctx, &input[i]);
861 			}
862 		}
863 
864 #else
865 		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
866 			block_count = (input_len - i) >> 6;
867 			if (block_count > 0) {
868 				SHA256TransformBlocks(ctx, &input[i],
869 				    block_count);
870 				i += block_count << 6;
871 			}
872 		} else {
873 			block_count = (input_len - i) >> 7;
874 			if (block_count > 0) {
875 				SHA512TransformBlocks(ctx, &input[i],
876 				    block_count);
877 				i += block_count << 7;
878 			}
879 		}
880 #endif	/* !__amd64 */
881 
882 		/*
883 		 * general optimization:
884 		 *
885 		 * if i and input_len are the same, return now instead
886 		 * of calling bcopy(), since the bcopy() in this case
887 		 * will be an expensive noop.
888 		 */
889 
890 		if (input_len == i)
891 			return;
892 
893 		buf_index = 0;
894 	}
895 
896 	/* buffer remaining input */
897 	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
898 }
899 
900 
901 /*
902  * SHA2Final()
903  *
904  * purpose: ends an sha2 digest operation, finalizing the message digest and
905  *          zeroing the context.
906  *   input: uchar_t *	: a buffer to store the digest
907  *			: The function actually uses void* because many
908  *			: callers pass things other than uchar_t here.
909  *          SHA2_CTX *  : the context to finalize, save, and zero
910  *  output: void
911  */
912 
913 void
914 SHA2Final(void *digest, SHA2_CTX *ctx)
915 {
916 	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
917 	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
918 	uint32_t	index;
919 	uint32_t	algotype = ctx->algotype;
920 
921 	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
922 		index  = (ctx->count.c32[1] >> 3) & 0x3f;
923 		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
924 		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
925 		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
926 		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
927 	} else {
928 		index  = (ctx->count.c64[1] >> 3) & 0x7f;
929 		Encode64(bitcount_be64, ctx->count.c64,
930 		    sizeof (bitcount_be64));
931 		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
932 		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
933 		if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
934 			ctx->state.s64[6] = ctx->state.s64[7] = 0;
935 			Encode64(digest, ctx->state.s64,
936 			    sizeof (uint64_t) * 6);
937 		} else if (algotype == SHA512_224_MECH_INFO_TYPE) {
938 			uint8_t last[sizeof (uint64_t)];
939 			/*
940 			 * Since SHA-512/224 doesn't align well to 64-bit
941 			 * boundaries, we must do the encoding in three steps:
942 			 * 1) encode the three 64-bit words that fit neatly
943 			 * 2) encode the last 64-bit word to a temp buffer
944 			 * 3) chop out the lower 32-bits from the temp buffer
945 			 *    and append them to the digest
946 			 */
947 			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
948 			Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
949 			bcopy(last, (uint8_t *)digest + 24, 4);
950 		} else if (algotype == SHA512_256_MECH_INFO_TYPE) {
951 			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
952 		} else {
953 			Encode64(digest, ctx->state.s64,
954 			    sizeof (ctx->state.s64));
955 		}
956 	}
957 
958 	/* zeroize sensitive information */
959 	bzero(ctx, sizeof (*ctx));
960 }
961