xref: /illumos-gate/usr/src/common/crypto/sha2/sha2.c (revision fb2a9bae0030340ad72b9c26ba1ffee2ee3cafec)
1 /*
2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * The basic framework for this code came from the reference
8  * implementation for MD5.  That implementation is Copyright (C)
9  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
10  *
11  * License to copy and use this software is granted provided that it
12  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
13  * Algorithm" in all material mentioning or referencing this software
14  * or this function.
15  *
16  * License is also granted to make and use derivative works provided
17  * that such works are identified as "derived from the RSA Data
18  * Security, Inc. MD5 Message-Digest Algorithm" in all material
19  * mentioning or referencing the derived work.
20  *
21  * RSA Data Security, Inc. makes no representations concerning either
22  * the merchantability of this software or the suitability of this
23  * software for any particular purpose. It is provided "as is"
24  * without express or implied warranty of any kind.
25  *
26  * These notices must be retained in any copies of any part of this
27  * documentation and/or software.
28  *
29  * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
30  * standard, available at
31  * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
32  * Not as fast as one would like -- further optimizations are encouraged
33  * and appreciated.
34  */
35 
36 #ifndef _KERNEL
37 #include <stdint.h>
38 #include <strings.h>
39 #include <stdlib.h>
40 #include <errno.h>
41 #endif /* _KERNEL */
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/sysmacros.h>
47 #define	_SHA2_IMPL
48 #include <sys/sha2.h>
49 #include <sys/sha2_consts.h>
50 
51 #ifdef _KERNEL
52 #include <sys/cmn_err.h>
53 
54 #else
55 #pragma weak SHA256Update = SHA2Update
56 #pragma weak SHA384Update = SHA2Update
57 #pragma weak SHA512Update = SHA2Update
58 
59 #pragma weak SHA256Final = SHA2Final
60 #pragma weak SHA384Final = SHA2Final
61 #pragma weak SHA512Final = SHA2Final
62 
63 #endif	/* _KERNEL */
64 
65 #ifdef _LITTLE_ENDIAN
66 #include <sys/byteorder.h>
67 #define	HAVE_HTONL
68 #endif
69 
70 static void Encode(uint8_t *, uint32_t *, size_t);
71 static void Encode64(uint8_t *, uint64_t *, size_t);
72 
73 #if	defined(__amd64)
74 #define	SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
75 #define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
76 
77 void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
78 void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
79 
80 #else
81 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
82 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
83 #endif	/* __amd64 */
84 
85 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
86 
87 /* Ch and Maj are the basic SHA2 functions. */
88 #define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
89 #define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
90 
91 /* Rotates x right n bits. */
92 #define	ROTR(x, n)	\
93 	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
94 
95 /* Shift x right n bits */
96 #define	SHR(x, n)	((x) >> (n))
97 
98 /* SHA256 Functions */
99 #define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
100 #define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
101 #define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
102 #define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
103 
104 #define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
105 	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
106 	d += T1;							\
107 	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
108 	h = T1 + T2
109 
110 /* SHA384/512 Functions */
111 #define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
112 #define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
113 #define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
114 #define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
115 #define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
116 	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
117 	d += T1;							\
118 	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
119 	h = T1 + T2
120 
121 /*
122  * sparc optimization:
123  *
124  * on the sparc, we can load big endian 32-bit data easily.  note that
125  * special care must be taken to ensure the address is 32-bit aligned.
126  * in the interest of speed, we don't check to make sure, since
127  * careful programming can guarantee this for us.
128  */
129 
130 #if	defined(_BIG_ENDIAN)
131 #define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
132 #define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
133 
134 #elif	defined(HAVE_HTONL)
135 #define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
136 #define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
137 
138 #else
139 /* little endian -- will work on big endian, but slowly */
140 #define	LOAD_BIG_32(addr)	\
141 	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
142 #define	LOAD_BIG_64(addr)	\
143 	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
144 	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
145 	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
146 	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
147 #endif	/* _BIG_ENDIAN */
148 
149 
150 #if	!defined(__amd64)
151 /* SHA256 Transform */
152 
153 static void
154 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
155 {
156 	uint32_t a = ctx->state.s32[0];
157 	uint32_t b = ctx->state.s32[1];
158 	uint32_t c = ctx->state.s32[2];
159 	uint32_t d = ctx->state.s32[3];
160 	uint32_t e = ctx->state.s32[4];
161 	uint32_t f = ctx->state.s32[5];
162 	uint32_t g = ctx->state.s32[6];
163 	uint32_t h = ctx->state.s32[7];
164 
165 	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
166 	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
167 	uint32_t T1, T2;
168 
169 #if	defined(__sparc)
170 	static const uint32_t sha256_consts[] = {
171 		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
172 		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
173 		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
174 		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
175 		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
176 		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
177 		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
178 		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
179 		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
180 		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
181 		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
182 		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
183 		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
184 		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
185 		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
186 		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
187 		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
188 		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
189 		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
190 		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
191 		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
192 		SHA256_CONST_63
193 	};
194 #endif	/* __sparc */
195 
196 	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
197 		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
198 		blk = (uint8_t *)ctx->buf_un.buf32;
199 	}
200 
201 	/* LINTED E_BAD_PTR_CAST_ALIGN */
202 	w0 =  LOAD_BIG_32(blk + 4 * 0);
203 	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
204 	/* LINTED E_BAD_PTR_CAST_ALIGN */
205 	w1 =  LOAD_BIG_32(blk + 4 * 1);
206 	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
207 	/* LINTED E_BAD_PTR_CAST_ALIGN */
208 	w2 =  LOAD_BIG_32(blk + 4 * 2);
209 	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
210 	/* LINTED E_BAD_PTR_CAST_ALIGN */
211 	w3 =  LOAD_BIG_32(blk + 4 * 3);
212 	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
213 	/* LINTED E_BAD_PTR_CAST_ALIGN */
214 	w4 =  LOAD_BIG_32(blk + 4 * 4);
215 	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
216 	/* LINTED E_BAD_PTR_CAST_ALIGN */
217 	w5 =  LOAD_BIG_32(blk + 4 * 5);
218 	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
219 	/* LINTED E_BAD_PTR_CAST_ALIGN */
220 	w6 =  LOAD_BIG_32(blk + 4 * 6);
221 	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
222 	/* LINTED E_BAD_PTR_CAST_ALIGN */
223 	w7 =  LOAD_BIG_32(blk + 4 * 7);
224 	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
225 	/* LINTED E_BAD_PTR_CAST_ALIGN */
226 	w8 =  LOAD_BIG_32(blk + 4 * 8);
227 	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
228 	/* LINTED E_BAD_PTR_CAST_ALIGN */
229 	w9 =  LOAD_BIG_32(blk + 4 * 9);
230 	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
231 	/* LINTED E_BAD_PTR_CAST_ALIGN */
232 	w10 =  LOAD_BIG_32(blk + 4 * 10);
233 	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
234 	/* LINTED E_BAD_PTR_CAST_ALIGN */
235 	w11 =  LOAD_BIG_32(blk + 4 * 11);
236 	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
237 	/* LINTED E_BAD_PTR_CAST_ALIGN */
238 	w12 =  LOAD_BIG_32(blk + 4 * 12);
239 	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
240 	/* LINTED E_BAD_PTR_CAST_ALIGN */
241 	w13 =  LOAD_BIG_32(blk + 4 * 13);
242 	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
243 	/* LINTED E_BAD_PTR_CAST_ALIGN */
244 	w14 =  LOAD_BIG_32(blk + 4 * 14);
245 	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
246 	/* LINTED E_BAD_PTR_CAST_ALIGN */
247 	w15 =  LOAD_BIG_32(blk + 4 * 15);
248 	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
249 
250 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
251 	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
252 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
253 	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
254 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
255 	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
256 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
257 	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
258 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
259 	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
260 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
261 	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
262 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
263 	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
264 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
265 	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
266 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
267 	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
268 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
269 	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
270 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
271 	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
272 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
273 	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
274 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
275 	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
276 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
277 	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
278 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
279 	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
280 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
281 	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
282 
283 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
284 	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
285 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
286 	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
287 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
288 	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
289 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
290 	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
291 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
292 	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
293 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
294 	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
295 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
296 	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
297 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
298 	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
299 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
300 	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
301 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
302 	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
303 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
304 	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
305 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
306 	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
307 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
308 	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
309 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
310 	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
311 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
312 	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
313 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
314 	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
315 
316 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
317 	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
318 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
319 	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
320 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
321 	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
322 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
323 	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
324 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
325 	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
326 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
327 	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
328 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
329 	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
330 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
331 	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
332 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
333 	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
334 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
335 	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
336 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
337 	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
338 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
339 	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
340 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
341 	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
342 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
343 	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
344 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
345 	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
346 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
347 	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
348 
349 	ctx->state.s32[0] += a;
350 	ctx->state.s32[1] += b;
351 	ctx->state.s32[2] += c;
352 	ctx->state.s32[3] += d;
353 	ctx->state.s32[4] += e;
354 	ctx->state.s32[5] += f;
355 	ctx->state.s32[6] += g;
356 	ctx->state.s32[7] += h;
357 }
358 
359 
360 /* SHA384 and SHA512 Transform */
361 
362 static void
363 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
364 {
365 
366 	uint64_t a = ctx->state.s64[0];
367 	uint64_t b = ctx->state.s64[1];
368 	uint64_t c = ctx->state.s64[2];
369 	uint64_t d = ctx->state.s64[3];
370 	uint64_t e = ctx->state.s64[4];
371 	uint64_t f = ctx->state.s64[5];
372 	uint64_t g = ctx->state.s64[6];
373 	uint64_t h = ctx->state.s64[7];
374 
375 	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
376 	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
377 	uint64_t T1, T2;
378 
379 #if	defined(__sparc)
380 	static const uint64_t sha512_consts[] = {
381 		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
382 		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
383 		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
384 		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
385 		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
386 		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
387 		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
388 		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
389 		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
390 		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
391 		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
392 		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
393 		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
394 		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
395 		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
396 		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
397 		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
398 		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
399 		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
400 		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
401 		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
402 		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
403 		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
404 		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
405 		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
406 		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
407 		SHA512_CONST_78, SHA512_CONST_79
408 	};
409 #endif	/* __sparc */
410 
411 
412 	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
413 		bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
414 		blk = (uint8_t *)ctx->buf_un.buf64;
415 	}
416 
417 	/* LINTED E_BAD_PTR_CAST_ALIGN */
418 	w0 =  LOAD_BIG_64(blk + 8 * 0);
419 	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
420 	/* LINTED E_BAD_PTR_CAST_ALIGN */
421 	w1 =  LOAD_BIG_64(blk + 8 * 1);
422 	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
423 	/* LINTED E_BAD_PTR_CAST_ALIGN */
424 	w2 =  LOAD_BIG_64(blk + 8 * 2);
425 	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
426 	/* LINTED E_BAD_PTR_CAST_ALIGN */
427 	w3 =  LOAD_BIG_64(blk + 8 * 3);
428 	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
429 	/* LINTED E_BAD_PTR_CAST_ALIGN */
430 	w4 =  LOAD_BIG_64(blk + 8 * 4);
431 	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
432 	/* LINTED E_BAD_PTR_CAST_ALIGN */
433 	w5 =  LOAD_BIG_64(blk + 8 * 5);
434 	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
435 	/* LINTED E_BAD_PTR_CAST_ALIGN */
436 	w6 =  LOAD_BIG_64(blk + 8 * 6);
437 	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
438 	/* LINTED E_BAD_PTR_CAST_ALIGN */
439 	w7 =  LOAD_BIG_64(blk + 8 * 7);
440 	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
441 	/* LINTED E_BAD_PTR_CAST_ALIGN */
442 	w8 =  LOAD_BIG_64(blk + 8 * 8);
443 	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
444 	/* LINTED E_BAD_PTR_CAST_ALIGN */
445 	w9 =  LOAD_BIG_64(blk + 8 * 9);
446 	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
447 	/* LINTED E_BAD_PTR_CAST_ALIGN */
448 	w10 =  LOAD_BIG_64(blk + 8 * 10);
449 	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
450 	/* LINTED E_BAD_PTR_CAST_ALIGN */
451 	w11 =  LOAD_BIG_64(blk + 8 * 11);
452 	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
453 	/* LINTED E_BAD_PTR_CAST_ALIGN */
454 	w12 =  LOAD_BIG_64(blk + 8 * 12);
455 	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
456 	/* LINTED E_BAD_PTR_CAST_ALIGN */
457 	w13 =  LOAD_BIG_64(blk + 8 * 13);
458 	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
459 	/* LINTED E_BAD_PTR_CAST_ALIGN */
460 	w14 =  LOAD_BIG_64(blk + 8 * 14);
461 	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
462 	/* LINTED E_BAD_PTR_CAST_ALIGN */
463 	w15 =  LOAD_BIG_64(blk + 8 * 15);
464 	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
465 
466 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
467 	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
468 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
469 	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
470 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
471 	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
472 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
473 	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
474 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
475 	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
476 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
477 	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
478 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
479 	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
480 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
481 	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
482 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
483 	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
484 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
485 	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
486 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
487 	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
488 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
489 	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
490 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
491 	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
492 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
493 	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
494 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
495 	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
496 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
497 	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
498 
499 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
500 	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
501 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
502 	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
503 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
504 	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
505 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
506 	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
507 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
508 	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
509 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
510 	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
511 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
512 	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
513 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
514 	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
515 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
516 	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
517 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
518 	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
519 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
520 	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
521 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
522 	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
523 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
524 	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
525 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
526 	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
527 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
528 	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
529 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
530 	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
531 
532 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
533 	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
534 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
535 	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
536 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
537 	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
538 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
539 	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
540 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
541 	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
542 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
543 	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
544 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
545 	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
546 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
547 	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
548 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
549 	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
550 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
551 	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
552 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
553 	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
554 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
555 	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
556 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
557 	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
558 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
559 	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
560 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
561 	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
562 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
563 	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
564 
565 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
566 	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
567 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
568 	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
569 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
570 	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
571 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
572 	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
573 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
574 	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
575 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
576 	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
577 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
578 	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
579 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
580 	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
581 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
582 	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
583 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
584 	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
585 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
586 	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
587 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
588 	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
589 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
590 	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
591 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
592 	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
593 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
594 	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
595 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
596 	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
597 
598 	ctx->state.s64[0] += a;
599 	ctx->state.s64[1] += b;
600 	ctx->state.s64[2] += c;
601 	ctx->state.s64[3] += d;
602 	ctx->state.s64[4] += e;
603 	ctx->state.s64[5] += f;
604 	ctx->state.s64[6] += g;
605 	ctx->state.s64[7] += h;
606 
607 }
608 #endif	/* !__amd64 */
609 
610 
611 /*
612  * Encode()
613  *
614  * purpose: to convert a list of numbers from little endian to big endian
615  *   input: uint8_t *	: place to store the converted big endian numbers
616  *	    uint32_t *	: place to get numbers to convert from
617  *          size_t	: the length of the input in bytes
618  *  output: void
619  */
620 
621 static void
622 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
623     size_t len)
624 {
625 	size_t		i, j;
626 
627 #if	defined(__sparc)
628 	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
629 		for (i = 0, j = 0; j < len; i++, j += 4) {
630 			/* LINTED E_BAD_PTR_CAST_ALIGN */
631 			*((uint32_t *)(output + j)) = input[i];
632 		}
633 	} else {
634 #endif	/* little endian -- will work on big endian, but slowly */
635 		for (i = 0, j = 0; j < len; i++, j += 4) {
636 			output[j]	= (input[i] >> 24) & 0xff;
637 			output[j + 1]	= (input[i] >> 16) & 0xff;
638 			output[j + 2]	= (input[i] >>  8) & 0xff;
639 			output[j + 3]	= input[i] & 0xff;
640 		}
641 #if	defined(__sparc)
642 	}
643 #endif
644 }
645 
646 static void
647 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
648     size_t len)
649 {
650 	size_t		i, j;
651 
652 #if	defined(__sparc)
653 	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
654 		for (i = 0, j = 0; j < len; i++, j += 8) {
655 			/* LINTED E_BAD_PTR_CAST_ALIGN */
656 			*((uint64_t *)(output + j)) = input[i];
657 		}
658 	} else {
659 #endif	/* little endian -- will work on big endian, but slowly */
660 		for (i = 0, j = 0; j < len; i++, j += 8) {
661 
662 			output[j]	= (input[i] >> 56) & 0xff;
663 			output[j + 1]	= (input[i] >> 48) & 0xff;
664 			output[j + 2]	= (input[i] >> 40) & 0xff;
665 			output[j + 3]	= (input[i] >> 32) & 0xff;
666 			output[j + 4]	= (input[i] >> 24) & 0xff;
667 			output[j + 5]	= (input[i] >> 16) & 0xff;
668 			output[j + 6]	= (input[i] >>  8) & 0xff;
669 			output[j + 7]	= input[i] & 0xff;
670 		}
671 #if	defined(__sparc)
672 	}
673 #endif
674 }
675 
676 
677 void
678 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
679 {
680 
681 	switch (mech) {
682 	case SHA256_MECH_INFO_TYPE:
683 	case SHA256_HMAC_MECH_INFO_TYPE:
684 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
685 		ctx->state.s32[0] = 0x6a09e667U;
686 		ctx->state.s32[1] = 0xbb67ae85U;
687 		ctx->state.s32[2] = 0x3c6ef372U;
688 		ctx->state.s32[3] = 0xa54ff53aU;
689 		ctx->state.s32[4] = 0x510e527fU;
690 		ctx->state.s32[5] = 0x9b05688cU;
691 		ctx->state.s32[6] = 0x1f83d9abU;
692 		ctx->state.s32[7] = 0x5be0cd19U;
693 		break;
694 	case SHA384_MECH_INFO_TYPE:
695 	case SHA384_HMAC_MECH_INFO_TYPE:
696 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
697 		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
698 		ctx->state.s64[1] = 0x629a292a367cd507ULL;
699 		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
700 		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
701 		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
702 		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
703 		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
704 		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
705 		break;
706 	case SHA512_MECH_INFO_TYPE:
707 	case SHA512_HMAC_MECH_INFO_TYPE:
708 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
709 		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
710 		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
711 		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
712 		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
713 		ctx->state.s64[4] = 0x510e527fade682d1ULL;
714 		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
715 		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
716 		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
717 		break;
718 #ifdef _KERNEL
719 	default:
720 		cmn_err(CE_PANIC,
721 		    "sha2_init: failed to find a supported algorithm: 0x%x",
722 		    (uint32_t)mech);
723 
724 #endif /* _KERNEL */
725 	}
726 
727 	ctx->algotype = (uint32_t)mech;
728 	ctx->count.c64[0] = ctx->count.c64[1] = 0;
729 }
730 
731 #ifndef _KERNEL
732 
733 #pragma inline(SHA256Init, SHA384Init, SHA512Init)
734 void
735 SHA256Init(SHA256_CTX *ctx)
736 {
737 	SHA2Init(SHA256, ctx);
738 }
739 
740 void
741 SHA384Init(SHA384_CTX *ctx)
742 {
743 	SHA2Init(SHA384, ctx);
744 }
745 
746 void
747 SHA512Init(SHA512_CTX *ctx)
748 {
749 	SHA2Init(SHA512, ctx);
750 }
751 
752 #endif /* _KERNEL */
753 
754 /*
755  * SHA2Update()
756  *
757  * purpose: continues an sha2 digest operation, using the message block
758  *          to update the context.
759  *   input: SHA2_CTX *	: the context to update
760  *          void *	: the message block
761  *          size_t      : the length of the message block, in bytes
762  *  output: void
763  */
764 
765 void
766 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
767 {
768 	uint32_t	i, buf_index, buf_len, buf_limit;
769 	const uint8_t	*input = inptr;
770 	uint32_t	algotype = ctx->algotype;
771 #if defined(__amd64)
772 	uint32_t	block_count;
773 #endif	/* !__amd64 */
774 
775 
776 	/* check for noop */
777 	if (input_len == 0)
778 		return;
779 
780 	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
781 		buf_limit = 64;
782 
783 		/* compute number of bytes mod 64 */
784 		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
785 
786 		/* update number of bits */
787 		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
788 			ctx->count.c32[0]++;
789 
790 		ctx->count.c32[0] += (input_len >> 29);
791 
792 	} else {
793 		buf_limit = 128;
794 
795 		/* compute number of bytes mod 128 */
796 		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
797 
798 		/* update number of bits */
799 		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
800 			ctx->count.c64[0]++;
801 
802 		ctx->count.c64[0] += (input_len >> 29);
803 	}
804 
805 	buf_len = buf_limit - buf_index;
806 
807 	/* transform as many times as possible */
808 	i = 0;
809 	if (input_len >= buf_len) {
810 
811 		/*
812 		 * general optimization:
813 		 *
814 		 * only do initial bcopy() and SHA2Transform() if
815 		 * buf_index != 0.  if buf_index == 0, we're just
816 		 * wasting our time doing the bcopy() since there
817 		 * wasn't any data left over from a previous call to
818 		 * SHA2Update().
819 		 */
820 		if (buf_index) {
821 			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
822 			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
823 				SHA256Transform(ctx, ctx->buf_un.buf8);
824 			else
825 				SHA512Transform(ctx, ctx->buf_un.buf8);
826 
827 			i = buf_len;
828 		}
829 
830 #if !defined(__amd64)
831 		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
832 			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
833 				SHA256Transform(ctx, &input[i]);
834 			}
835 		} else {
836 			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
837 				SHA512Transform(ctx, &input[i]);
838 			}
839 		}
840 
841 #else
842 		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
843 			block_count = (input_len - i) >> 6;
844 			if (block_count > 0) {
845 				SHA256TransformBlocks(ctx, &input[i],
846 				    block_count);
847 				i += block_count << 6;
848 			}
849 		} else {
850 			block_count = (input_len - i) >> 7;
851 			if (block_count > 0) {
852 				SHA512TransformBlocks(ctx, &input[i],
853 				    block_count);
854 				i += block_count << 7;
855 			}
856 		}
857 #endif	/* !__amd64 */
858 
859 		/*
860 		 * general optimization:
861 		 *
862 		 * if i and input_len are the same, return now instead
863 		 * of calling bcopy(), since the bcopy() in this case
864 		 * will be an expensive noop.
865 		 */
866 
867 		if (input_len == i)
868 			return;
869 
870 		buf_index = 0;
871 	}
872 
873 	/* buffer remaining input */
874 	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
875 }
876 
877 
878 /*
879  * SHA2Final()
880  *
881  * purpose: ends an sha2 digest operation, finalizing the message digest and
882  *          zeroing the context.
883  *   input: uchar_t *	: a buffer to store the digest
884  *			: The function actually uses void* because many
885  *			: callers pass things other than uchar_t here.
886  *          SHA2_CTX *  : the context to finalize, save, and zero
887  *  output: void
888  */
889 
890 void
891 SHA2Final(void *digest, SHA2_CTX *ctx)
892 {
893 	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
894 	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
895 	uint32_t	index;
896 	uint32_t	algotype = ctx->algotype;
897 
898 	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
899 		index  = (ctx->count.c32[1] >> 3) & 0x3f;
900 		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
901 		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
902 		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
903 		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
904 
905 	} else {
906 		index  = (ctx->count.c64[1] >> 3) & 0x7f;
907 		Encode64(bitcount_be64, ctx->count.c64,
908 		    sizeof (bitcount_be64));
909 		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
910 		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
911 		if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
912 			ctx->state.s64[6] = ctx->state.s64[7] = 0;
913 			Encode64(digest, ctx->state.s64,
914 			    sizeof (uint64_t) * 6);
915 		} else
916 			Encode64(digest, ctx->state.s64,
917 			    sizeof (ctx->state.s64));
918 	}
919 
920 	/* zeroize sensitive information */
921 	bzero(ctx, sizeof (*ctx));
922 }
923