1 /*
2 * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /*
11 * SHA512 low level APIs are deprecated for public use, but still ok for
12 * internal use.
13 */
14 #include "internal/deprecated.h"
15
16 #include <stdio.h>
17 #include <openssl/opensslconf.h>
18 /*-
19 * IMPLEMENTATION NOTES.
20 *
21 * As you might have noticed 32-bit hash algorithms:
22 *
23 * - permit SHA_LONG to be wider than 32-bit
24 * - optimized versions implement two transform functions: one operating
25 * on [aligned] data in host byte order and one - on data in input
26 * stream byte order;
27 * - share common byte-order neutral collector and padding function
28 * implementations, ../md32_common.h;
29 *
30 * Neither of the above applies to this SHA-512 implementations. Reasons
31 * [in reverse order] are:
32 *
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 * there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 * *aligned* data in input stream byte order, big-endian in this case]
37 * we minimize burden of maintenance in two ways: a) collector/padding
38 * function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 * apply a number of optimizations to mitigate potential performance
41 * penalties caused by previous design decision;
42 *
43 * Caveat lector.
44 *
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
50 * 16-bit platforms.
51 */
52 #include <stdlib.h>
53 #include <string.h>
54
55 #include <openssl/crypto.h>
56 #include <openssl/sha.h>
57 #include <openssl/opensslv.h>
58
59 #include "internal/cryptlib.h"
60 #include "crypto/sha.h"
61
62 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64 defined(__s390__) || defined(__s390x__) || \
65 defined(__aarch64__) || \
66 defined(SHA512_ASM)
67 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68 #endif
69
70 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71 # define U64(C) C##UI64
72 #elif defined(__arch64__)
73 # define U64(C) C##UL
74 #else
75 # define U64(C) C##ULL
76 #endif
77
sha512_224_init(SHA512_CTX * c)78 int sha512_224_init(SHA512_CTX *c)
79 {
80 c->h[0] = U64(0x8c3d37c819544da2);
81 c->h[1] = U64(0x73e1996689dcd4d6);
82 c->h[2] = U64(0x1dfab7ae32ff9c82);
83 c->h[3] = U64(0x679dd514582f9fcf);
84 c->h[4] = U64(0x0f6d2b697bd44da8);
85 c->h[5] = U64(0x77e36f7304c48942);
86 c->h[6] = U64(0x3f9d85a86a1d36c8);
87 c->h[7] = U64(0x1112e6ad91d692a1);
88
89 c->Nl = 0;
90 c->Nh = 0;
91 c->num = 0;
92 c->md_len = SHA224_DIGEST_LENGTH;
93 return 1;
94 }
95
sha512_256_init(SHA512_CTX * c)96 int sha512_256_init(SHA512_CTX *c)
97 {
98 c->h[0] = U64(0x22312194fc2bf72c);
99 c->h[1] = U64(0x9f555fa3c84c64c2);
100 c->h[2] = U64(0x2393b86b6f53b151);
101 c->h[3] = U64(0x963877195940eabd);
102 c->h[4] = U64(0x96283ee2a88effe3);
103 c->h[5] = U64(0xbe5e1e2553863992);
104 c->h[6] = U64(0x2b0199fc2c85b8aa);
105 c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107 c->Nl = 0;
108 c->Nh = 0;
109 c->num = 0;
110 c->md_len = SHA256_DIGEST_LENGTH;
111 return 1;
112 }
113
SHA384_Init(SHA512_CTX * c)114 int SHA384_Init(SHA512_CTX *c)
115 {
116 c->h[0] = U64(0xcbbb9d5dc1059ed8);
117 c->h[1] = U64(0x629a292a367cd507);
118 c->h[2] = U64(0x9159015a3070dd17);
119 c->h[3] = U64(0x152fecd8f70e5939);
120 c->h[4] = U64(0x67332667ffc00b31);
121 c->h[5] = U64(0x8eb44a8768581511);
122 c->h[6] = U64(0xdb0c2e0d64f98fa7);
123 c->h[7] = U64(0x47b5481dbefa4fa4);
124
125 c->Nl = 0;
126 c->Nh = 0;
127 c->num = 0;
128 c->md_len = SHA384_DIGEST_LENGTH;
129 return 1;
130 }
131
SHA512_Init(SHA512_CTX * c)132 int SHA512_Init(SHA512_CTX *c)
133 {
134 c->h[0] = U64(0x6a09e667f3bcc908);
135 c->h[1] = U64(0xbb67ae8584caa73b);
136 c->h[2] = U64(0x3c6ef372fe94f82b);
137 c->h[3] = U64(0xa54ff53a5f1d36f1);
138 c->h[4] = U64(0x510e527fade682d1);
139 c->h[5] = U64(0x9b05688c2b3e6c1f);
140 c->h[6] = U64(0x1f83d9abfb41bd6b);
141 c->h[7] = U64(0x5be0cd19137e2179);
142
143 c->Nl = 0;
144 c->Nh = 0;
145 c->num = 0;
146 c->md_len = SHA512_DIGEST_LENGTH;
147 return 1;
148 }
149
150 #ifndef SHA512_ASM
151 static
152 #endif
153 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
SHA512_Final(unsigned char * md,SHA512_CTX * c)155 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156 {
157 unsigned char *p = (unsigned char *)c->u.p;
158 size_t n = c->num;
159
160 p[n] = 0x80; /* There always is a room for one */
161 n++;
162 if (n > (sizeof(c->u) - 16)) {
163 memset(p + n, 0, sizeof(c->u) - n);
164 n = 0;
165 sha512_block_data_order(c, p, 1);
166 }
167
168 memset(p + n, 0, sizeof(c->u) - 16 - n);
169 #ifdef B_ENDIAN
170 c->u.d[SHA_LBLOCK - 2] = c->Nh;
171 c->u.d[SHA_LBLOCK - 1] = c->Nl;
172 #else
173 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189 #endif
190
191 sha512_block_data_order(c, p, 1);
192
193 if (md == 0)
194 return 0;
195
196 switch (c->md_len) {
197 /* Let compiler decide if it's appropriate to unroll... */
198 case SHA224_DIGEST_LENGTH:
199 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200 SHA_LONG64 t = c->h[n];
201
202 *(md++) = (unsigned char)(t >> 56);
203 *(md++) = (unsigned char)(t >> 48);
204 *(md++) = (unsigned char)(t >> 40);
205 *(md++) = (unsigned char)(t >> 32);
206 *(md++) = (unsigned char)(t >> 24);
207 *(md++) = (unsigned char)(t >> 16);
208 *(md++) = (unsigned char)(t >> 8);
209 *(md++) = (unsigned char)(t);
210 }
211 /*
212 * For 224 bits, there are four bytes left over that have to be
213 * processed separately.
214 */
215 {
216 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218 *(md++) = (unsigned char)(t >> 56);
219 *(md++) = (unsigned char)(t >> 48);
220 *(md++) = (unsigned char)(t >> 40);
221 *(md++) = (unsigned char)(t >> 32);
222 }
223 break;
224 case SHA256_DIGEST_LENGTH:
225 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226 SHA_LONG64 t = c->h[n];
227
228 *(md++) = (unsigned char)(t >> 56);
229 *(md++) = (unsigned char)(t >> 48);
230 *(md++) = (unsigned char)(t >> 40);
231 *(md++) = (unsigned char)(t >> 32);
232 *(md++) = (unsigned char)(t >> 24);
233 *(md++) = (unsigned char)(t >> 16);
234 *(md++) = (unsigned char)(t >> 8);
235 *(md++) = (unsigned char)(t);
236 }
237 break;
238 case SHA384_DIGEST_LENGTH:
239 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240 SHA_LONG64 t = c->h[n];
241
242 *(md++) = (unsigned char)(t >> 56);
243 *(md++) = (unsigned char)(t >> 48);
244 *(md++) = (unsigned char)(t >> 40);
245 *(md++) = (unsigned char)(t >> 32);
246 *(md++) = (unsigned char)(t >> 24);
247 *(md++) = (unsigned char)(t >> 16);
248 *(md++) = (unsigned char)(t >> 8);
249 *(md++) = (unsigned char)(t);
250 }
251 break;
252 case SHA512_DIGEST_LENGTH:
253 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254 SHA_LONG64 t = c->h[n];
255
256 *(md++) = (unsigned char)(t >> 56);
257 *(md++) = (unsigned char)(t >> 48);
258 *(md++) = (unsigned char)(t >> 40);
259 *(md++) = (unsigned char)(t >> 32);
260 *(md++) = (unsigned char)(t >> 24);
261 *(md++) = (unsigned char)(t >> 16);
262 *(md++) = (unsigned char)(t >> 8);
263 *(md++) = (unsigned char)(t);
264 }
265 break;
266 /* ... as well as make sure md_len is not abused. */
267 default:
268 return 0;
269 }
270
271 return 1;
272 }
273
SHA384_Final(unsigned char * md,SHA512_CTX * c)274 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275 {
276 return SHA512_Final(md, c);
277 }
278
SHA512_Update(SHA512_CTX * c,const void * _data,size_t len)279 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280 {
281 SHA_LONG64 l;
282 unsigned char *p = c->u.p;
283 const unsigned char *data = (const unsigned char *)_data;
284
285 if (len == 0)
286 return 1;
287
288 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289 if (l < c->Nl)
290 c->Nh++;
291 if (sizeof(len) >= 8)
292 c->Nh += (((SHA_LONG64) len) >> 61);
293 c->Nl = l;
294
295 if (c->num != 0) {
296 size_t n = sizeof(c->u) - c->num;
297
298 if (len < n) {
299 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300 return 1;
301 } else {
302 memcpy(p + c->num, data, n), c->num = 0;
303 len -= n, data += n;
304 sha512_block_data_order(c, p, 1);
305 }
306 }
307
308 if (len >= sizeof(c->u)) {
309 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310 if ((size_t)data % sizeof(c->u.d[0]) != 0)
311 while (len >= sizeof(c->u))
312 memcpy(p, data, sizeof(c->u)),
313 sha512_block_data_order(c, p, 1),
314 len -= sizeof(c->u), data += sizeof(c->u);
315 else
316 #endif
317 sha512_block_data_order(c, data, len / sizeof(c->u)),
318 data += len, len %= sizeof(c->u), data -= len;
319 }
320
321 if (len != 0)
322 memcpy(p, data, len), c->num = (int)len;
323
324 return 1;
325 }
326
SHA384_Update(SHA512_CTX * c,const void * data,size_t len)327 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328 {
329 return SHA512_Update(c, data, len);
330 }
331
SHA512_Transform(SHA512_CTX * c,const unsigned char * data)332 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333 {
334 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335 if ((size_t)data % sizeof(c->u.d[0]) != 0)
336 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337 #endif
338 sha512_block_data_order(c, data, 1);
339 }
340
341 #ifndef SHA512_ASM
342 static const SHA_LONG64 K512[80] = {
343 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383 };
384
385 # ifndef PEDANTIC
386 # if defined(__GNUC__) && __GNUC__>=2 && \
387 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
388 # if defined(__x86_64) || defined(__x86_64__)
389 # define ROTR(a,n) ({ SHA_LONG64 ret; \
390 asm ("rorq %1,%0" \
391 : "=r"(ret) \
392 : "J"(n),"0"(a) \
393 : "cc"); ret; })
394 # if !defined(B_ENDIAN)
395 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
396 asm ("bswapq %0" \
397 : "=r"(ret) \
398 : "0"(ret)); ret; })
399 # endif
400 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401 # if defined(I386_ONLY)
402 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
403 unsigned int hi=p[0],lo=p[1]; \
404 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405 "roll $16,%%eax; roll $16,%%edx; "\
406 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
407 : "=a"(lo),"=d"(hi) \
408 : "0"(lo),"1"(hi) : "cc"); \
409 ((SHA_LONG64)hi)<<32|lo; })
410 # else
411 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
412 unsigned int hi=p[0],lo=p[1]; \
413 asm ("bswapl %0; bswapl %1;" \
414 : "=r"(lo),"=r"(hi) \
415 : "0"(lo),"1"(hi)); \
416 ((SHA_LONG64)hi)<<32|lo; })
417 # endif
418 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
419 # define ROTR(a,n) ({ SHA_LONG64 ret; \
420 asm ("rotrdi %0,%1,%2" \
421 : "=r"(ret) \
422 : "r"(a),"K"(n)); ret; })
423 # elif defined(__aarch64__)
424 # define ROTR(a,n) ({ SHA_LONG64 ret; \
425 asm ("ror %0,%1,%2" \
426 : "=r"(ret) \
427 : "r"(a),"I"(n)); ret; })
428 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
429 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
430 # define PULL64(x) ({ SHA_LONG64 ret; \
431 asm ("rev %0,%1" \
432 : "=r"(ret) \
433 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
434 # endif
435 # endif
436 # elif defined(_MSC_VER)
437 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
438 # pragma intrinsic(_rotr64)
439 # define ROTR(a,n) _rotr64((a),n)
440 # endif
441 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
442 !defined(OPENSSL_NO_INLINE_ASM)
443 # if defined(I386_ONLY)
__pull64be(const void * x)444 static SHA_LONG64 __fastcall __pull64be(const void *x)
445 {
446 _asm mov edx,[ecx + 0]
447 _asm mov eax,[ecx + 4]
448 _asm xchg dh, dl
449 _asm xchg ah, al
450 _asm rol edx, 16
451 _asm rol eax, 16
452 _asm xchg dh, dl
453 _asm xchg ah, al
454 }
455 # else
__pull64be(const void * x)456 static SHA_LONG64 __fastcall __pull64be(const void *x)
457 {
458 _asm mov edx,[ecx + 0]
459 _asm mov eax,[ecx + 4]
460 _asm bswap edx
461 _asm bswap eax
462 }
463 # endif
464 # define PULL64(x) __pull64be(&(x))
465 # endif
466 # endif
467 # endif
468 # ifndef PULL64
469 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
470 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
471 # endif
472 # ifndef ROTR
473 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
474 # endif
475 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
476 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
477 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
478 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
479 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
480 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
481
482 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
483 /*
484 * This code should give better results on 32-bit CPU with less than
485 * ~24 registers, both size and performance wise...
486 */
487
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)488 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
489 size_t num)
490 {
491 const SHA_LONG64 *W = in;
492 SHA_LONG64 A, E, T;
493 SHA_LONG64 X[9 + 80], *F;
494 int i;
495
496 while (num--) {
497
498 F = X + 80;
499 A = ctx->h[0];
500 F[1] = ctx->h[1];
501 F[2] = ctx->h[2];
502 F[3] = ctx->h[3];
503 E = ctx->h[4];
504 F[5] = ctx->h[5];
505 F[6] = ctx->h[6];
506 F[7] = ctx->h[7];
507
508 for (i = 0; i < 16; i++, F--) {
509 # ifdef B_ENDIAN
510 T = W[i];
511 # else
512 T = PULL64(W[i]);
513 # endif
514 F[0] = A;
515 F[4] = E;
516 F[8] = T;
517 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
518 E = F[3] + T;
519 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
520 }
521
522 for (; i < 80; i++, F--) {
523 T = sigma0(F[8 + 16 - 1]);
524 T += sigma1(F[8 + 16 - 14]);
525 T += F[8 + 16] + F[8 + 16 - 9];
526
527 F[0] = A;
528 F[4] = E;
529 F[8] = T;
530 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531 E = F[3] + T;
532 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533 }
534
535 ctx->h[0] += A;
536 ctx->h[1] += F[1];
537 ctx->h[2] += F[2];
538 ctx->h[3] += F[3];
539 ctx->h[4] += E;
540 ctx->h[5] += F[5];
541 ctx->h[6] += F[6];
542 ctx->h[7] += F[7];
543
544 W += SHA_LBLOCK;
545 }
546 }
547
548 # elif defined(OPENSSL_SMALL_FOOTPRINT)
549
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)550 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
551 size_t num)
552 {
553 const SHA_LONG64 *W = in;
554 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
555 SHA_LONG64 X[16];
556 int i;
557
558 while (num--) {
559
560 a = ctx->h[0];
561 b = ctx->h[1];
562 c = ctx->h[2];
563 d = ctx->h[3];
564 e = ctx->h[4];
565 f = ctx->h[5];
566 g = ctx->h[6];
567 h = ctx->h[7];
568
569 for (i = 0; i < 16; i++) {
570 # ifdef B_ENDIAN
571 T1 = X[i] = W[i];
572 # else
573 T1 = X[i] = PULL64(W[i]);
574 # endif
575 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
576 T2 = Sigma0(a) + Maj(a, b, c);
577 h = g;
578 g = f;
579 f = e;
580 e = d + T1;
581 d = c;
582 c = b;
583 b = a;
584 a = T1 + T2;
585 }
586
587 for (; i < 80; i++) {
588 s0 = X[(i + 1) & 0x0f];
589 s0 = sigma0(s0);
590 s1 = X[(i + 14) & 0x0f];
591 s1 = sigma1(s1);
592
593 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
594 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
595 T2 = Sigma0(a) + Maj(a, b, c);
596 h = g;
597 g = f;
598 f = e;
599 e = d + T1;
600 d = c;
601 c = b;
602 b = a;
603 a = T1 + T2;
604 }
605
606 ctx->h[0] += a;
607 ctx->h[1] += b;
608 ctx->h[2] += c;
609 ctx->h[3] += d;
610 ctx->h[4] += e;
611 ctx->h[5] += f;
612 ctx->h[6] += g;
613 ctx->h[7] += h;
614
615 W += SHA_LBLOCK;
616 }
617 }
618
619 # else
620 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
621 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
622 h = Sigma0(a) + Maj(a,b,c); \
623 d += T1; h += T1; } while (0)
624
625 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
626 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
627 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
628 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
629 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
630
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)631 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
632 size_t num)
633 {
634 const SHA_LONG64 *W = in;
635 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
636 SHA_LONG64 X[16];
637 int i;
638
639 while (num--) {
640
641 a = ctx->h[0];
642 b = ctx->h[1];
643 c = ctx->h[2];
644 d = ctx->h[3];
645 e = ctx->h[4];
646 f = ctx->h[5];
647 g = ctx->h[6];
648 h = ctx->h[7];
649
650 # ifdef B_ENDIAN
651 T1 = X[0] = W[0];
652 ROUND_00_15(0, a, b, c, d, e, f, g, h);
653 T1 = X[1] = W[1];
654 ROUND_00_15(1, h, a, b, c, d, e, f, g);
655 T1 = X[2] = W[2];
656 ROUND_00_15(2, g, h, a, b, c, d, e, f);
657 T1 = X[3] = W[3];
658 ROUND_00_15(3, f, g, h, a, b, c, d, e);
659 T1 = X[4] = W[4];
660 ROUND_00_15(4, e, f, g, h, a, b, c, d);
661 T1 = X[5] = W[5];
662 ROUND_00_15(5, d, e, f, g, h, a, b, c);
663 T1 = X[6] = W[6];
664 ROUND_00_15(6, c, d, e, f, g, h, a, b);
665 T1 = X[7] = W[7];
666 ROUND_00_15(7, b, c, d, e, f, g, h, a);
667 T1 = X[8] = W[8];
668 ROUND_00_15(8, a, b, c, d, e, f, g, h);
669 T1 = X[9] = W[9];
670 ROUND_00_15(9, h, a, b, c, d, e, f, g);
671 T1 = X[10] = W[10];
672 ROUND_00_15(10, g, h, a, b, c, d, e, f);
673 T1 = X[11] = W[11];
674 ROUND_00_15(11, f, g, h, a, b, c, d, e);
675 T1 = X[12] = W[12];
676 ROUND_00_15(12, e, f, g, h, a, b, c, d);
677 T1 = X[13] = W[13];
678 ROUND_00_15(13, d, e, f, g, h, a, b, c);
679 T1 = X[14] = W[14];
680 ROUND_00_15(14, c, d, e, f, g, h, a, b);
681 T1 = X[15] = W[15];
682 ROUND_00_15(15, b, c, d, e, f, g, h, a);
683 # else
684 T1 = X[0] = PULL64(W[0]);
685 ROUND_00_15(0, a, b, c, d, e, f, g, h);
686 T1 = X[1] = PULL64(W[1]);
687 ROUND_00_15(1, h, a, b, c, d, e, f, g);
688 T1 = X[2] = PULL64(W[2]);
689 ROUND_00_15(2, g, h, a, b, c, d, e, f);
690 T1 = X[3] = PULL64(W[3]);
691 ROUND_00_15(3, f, g, h, a, b, c, d, e);
692 T1 = X[4] = PULL64(W[4]);
693 ROUND_00_15(4, e, f, g, h, a, b, c, d);
694 T1 = X[5] = PULL64(W[5]);
695 ROUND_00_15(5, d, e, f, g, h, a, b, c);
696 T1 = X[6] = PULL64(W[6]);
697 ROUND_00_15(6, c, d, e, f, g, h, a, b);
698 T1 = X[7] = PULL64(W[7]);
699 ROUND_00_15(7, b, c, d, e, f, g, h, a);
700 T1 = X[8] = PULL64(W[8]);
701 ROUND_00_15(8, a, b, c, d, e, f, g, h);
702 T1 = X[9] = PULL64(W[9]);
703 ROUND_00_15(9, h, a, b, c, d, e, f, g);
704 T1 = X[10] = PULL64(W[10]);
705 ROUND_00_15(10, g, h, a, b, c, d, e, f);
706 T1 = X[11] = PULL64(W[11]);
707 ROUND_00_15(11, f, g, h, a, b, c, d, e);
708 T1 = X[12] = PULL64(W[12]);
709 ROUND_00_15(12, e, f, g, h, a, b, c, d);
710 T1 = X[13] = PULL64(W[13]);
711 ROUND_00_15(13, d, e, f, g, h, a, b, c);
712 T1 = X[14] = PULL64(W[14]);
713 ROUND_00_15(14, c, d, e, f, g, h, a, b);
714 T1 = X[15] = PULL64(W[15]);
715 ROUND_00_15(15, b, c, d, e, f, g, h, a);
716 # endif
717
718 for (i = 16; i < 80; i += 16) {
719 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
720 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
721 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
722 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
723 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
724 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
725 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
726 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
727 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
728 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
729 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
730 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
731 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
732 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
733 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
734 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
735 }
736
737 ctx->h[0] += a;
738 ctx->h[1] += b;
739 ctx->h[2] += c;
740 ctx->h[3] += d;
741 ctx->h[4] += e;
742 ctx->h[5] += f;
743 ctx->h[6] += g;
744 ctx->h[7] += h;
745
746 W += SHA_LBLOCK;
747 }
748 }
749
750 # endif
751
752 #endif /* SHA512_ASM */
753