1 /* 2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* 7 * The basic framework for this code came from the reference 8 * implementation for MD5. That implementation is Copyright (C) 9 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. 10 * 11 * License to copy and use this software is granted provided that it 12 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest 13 * Algorithm" in all material mentioning or referencing this software 14 * or this function. 15 * 16 * License is also granted to make and use derivative works provided 17 * that such works are identified as "derived from the RSA Data 18 * Security, Inc. MD5 Message-Digest Algorithm" in all material 19 * mentioning or referencing the derived work. 20 * 21 * RSA Data Security, Inc. makes no representations concerning either 22 * the merchantability of this software or the suitability of this 23 * software for any particular purpose. It is provided "as is" 24 * without express or implied warranty of any kind. 25 * 26 * These notices must be retained in any copies of any part of this 27 * documentation and/or software. 28 * 29 * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1 30 * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm 31 * Not as fast as one would like -- further optimizations are encouraged 32 * and appreciated. 33 */ 34 35 #ifndef _KERNEL 36 #include <stdint.h> 37 #include <strings.h> 38 #include <stdlib.h> 39 #include <errno.h> 40 #include <sys/systeminfo.h> 41 #endif /* !_KERNEL */ 42 43 #include <sys/types.h> 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysmacros.h> 47 #include <sys/sha1.h> 48 #include <sys/sha1_consts.h> 49 50 #ifdef _LITTLE_ENDIAN 51 #include <sys/byteorder.h> 52 #define HAVE_HTONL 53 #endif 54 55 static void Encode(uint8_t *, const uint32_t *, size_t); 56 57 #if defined(__sparc) 58 59 #define SHA1_TRANSFORM(ctx, in) \ 60 SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \ 61 (ctx)->state[3], (ctx)->state[4], (ctx), (in)) 62 63 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, 64 SHA1_CTX *, const uint8_t *); 65 66 #elif defined(__amd64) 67 68 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1) 69 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \ 70 (in), (num)) 71 72 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks); 73 74 #else 75 76 #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in)) 77 78 static void SHA1Transform(SHA1_CTX *, const uint8_t *); 79 80 #endif 81 82 83 static uint8_t PADDING[64] = { 0x80, /* all zeros */ }; 84 85 /* 86 * F, G, and H are the basic SHA1 functions. 87 */ 88 #define F(b, c, d) (((b) & (c)) | ((~b) & (d))) 89 #define G(b, c, d) ((b) ^ (c) ^ (d)) 90 #define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d))) 91 92 /* 93 * ROTATE_LEFT rotates x left n bits. 94 */ 95 96 #if defined(__GNUC__) && defined(_LP64) 97 static __inline__ uint64_t 98 ROTATE_LEFT(uint64_t value, uint32_t n) 99 { 100 uint32_t t32; 101 102 t32 = (uint32_t)value; 103 return ((t32 << n) | (t32 >> (32 - n))); 104 } 105 106 #else 107 108 #define ROTATE_LEFT(x, n) \ 109 (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n)))) 110 111 #endif 112 113 114 /* 115 * SHA1Init() 116 * 117 * purpose: initializes the sha1 context and begins and sha1 digest operation 118 * input: SHA1_CTX * : the context to initializes. 119 * output: void 120 */ 121 122 void 123 SHA1Init(SHA1_CTX *ctx) 124 { 125 ctx->count[0] = ctx->count[1] = 0; 126 127 /* 128 * load magic initialization constants. Tell lint 129 * that these constants are unsigned by using U. 130 */ 131 132 ctx->state[0] = 0x67452301U; 133 ctx->state[1] = 0xefcdab89U; 134 ctx->state[2] = 0x98badcfeU; 135 ctx->state[3] = 0x10325476U; 136 ctx->state[4] = 0xc3d2e1f0U; 137 } 138 139 #ifdef VIS_SHA1 140 #ifdef _KERNEL 141 142 #include <sys/regset.h> 143 #include <sys/vis.h> 144 #include <sys/fpu/fpusystm.h> 145 146 /* the alignment for block stores to save fp registers */ 147 #define VIS_ALIGN (64) 148 149 extern int sha1_savefp(kfpu_t *, int); 150 extern void sha1_restorefp(kfpu_t *); 151 152 uint32_t vis_sha1_svfp_threshold = 128; 153 154 #endif /* _KERNEL */ 155 156 /* 157 * VIS SHA-1 consts. 158 */ 159 static uint64_t VIS[] = { 160 0x8000000080000000ULL, 161 0x0002000200020002ULL, 162 0x5a8279996ed9eba1ULL, 163 0x8f1bbcdcca62c1d6ULL, 164 0x012389ab456789abULL}; 165 166 extern void SHA1TransformVIS(uint64_t *, uint32_t *, uint32_t *, uint64_t *); 167 168 169 /* 170 * SHA1Update() 171 * 172 * purpose: continues an sha1 digest operation, using the message block 173 * to update the context. 174 * input: SHA1_CTX * : the context to update 175 * void * : the message block 176 * size_t : the length of the message block in bytes 177 * output: void 178 */ 179 180 void 181 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len) 182 { 183 uint32_t i, buf_index, buf_len; 184 uint64_t X0[40], input64[8]; 185 const uint8_t *input = inptr; 186 #ifdef _KERNEL 187 int usevis = 0; 188 #else 189 int usevis = 1; 190 #endif /* _KERNEL */ 191 192 /* check for noop */ 193 if (input_len == 0) 194 return; 195 196 /* compute number of bytes mod 64 */ 197 buf_index = (ctx->count[1] >> 3) & 0x3F; 198 199 /* update number of bits */ 200 if ((ctx->count[1] += (input_len << 3)) < (input_len << 3)) 201 ctx->count[0]++; 202 203 ctx->count[0] += (input_len >> 29); 204 205 buf_len = 64 - buf_index; 206 207 /* transform as many times as possible */ 208 i = 0; 209 if (input_len >= buf_len) { 210 #ifdef _KERNEL 211 kfpu_t *fpu; 212 if (fpu_exists) { 213 uint8_t fpua[sizeof (kfpu_t) + GSR_SIZE + VIS_ALIGN]; 214 uint32_t len = (input_len + buf_index) & ~0x3f; 215 int svfp_ok; 216 217 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, 64); 218 svfp_ok = ((len >= vis_sha1_svfp_threshold) ? 1 : 0); 219 usevis = fpu_exists && sha1_savefp(fpu, svfp_ok); 220 } else { 221 usevis = 0; 222 } 223 #endif /* _KERNEL */ 224 225 /* 226 * general optimization: 227 * 228 * only do initial bcopy() and SHA1Transform() if 229 * buf_index != 0. if buf_index == 0, we're just 230 * wasting our time doing the bcopy() since there 231 * wasn't any data left over from a previous call to 232 * SHA1Update(). 233 */ 234 235 if (buf_index) { 236 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); 237 if (usevis) { 238 SHA1TransformVIS(X0, 239 ctx->buf_un.buf32, 240 &ctx->state[0], VIS); 241 } else { 242 SHA1_TRANSFORM(ctx, ctx->buf_un.buf8); 243 } 244 i = buf_len; 245 } 246 247 /* 248 * VIS SHA-1: uses the VIS 1.0 instructions to accelerate 249 * SHA-1 processing. This is achieved by "offloading" the 250 * computation of the message schedule (MS) to the VIS units. 251 * This allows the VIS computation of the message schedule 252 * to be performed in parallel with the standard integer 253 * processing of the remainder of the SHA-1 computation. 254 * performance by up to around 1.37X, compared to an optimized 255 * integer-only implementation. 256 * 257 * The VIS implementation of SHA1Transform has a different API 258 * to the standard integer version: 259 * 260 * void SHA1TransformVIS( 261 * uint64_t *, // Pointer to MS for ith block 262 * uint32_t *, // Pointer to ith block of message data 263 * uint32_t *, // Pointer to SHA state i.e ctx->state 264 * uint64_t *, // Pointer to various VIS constants 265 * ) 266 * 267 * Note: the message data must by 4-byte aligned. 268 * 269 * Function requires VIS 1.0 support. 270 * 271 * Handling is provided to deal with arbitrary byte alingment 272 * of the input data but the performance gains are reduced 273 * for alignments other than 4-bytes. 274 */ 275 if (usevis) { 276 if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) { 277 /* 278 * Main processing loop - input misaligned 279 */ 280 for (; i + 63 < input_len; i += 64) { 281 bcopy(&input[i], input64, 64); 282 SHA1TransformVIS(X0, 283 (uint32_t *)input64, 284 &ctx->state[0], VIS); 285 } 286 } else { 287 /* 288 * Main processing loop - input 8-byte aligned 289 */ 290 for (; i + 63 < input_len; i += 64) { 291 SHA1TransformVIS(X0, 292 /* LINTED E_BAD_PTR_CAST_ALIGN */ 293 (uint32_t *)&input[i], /* CSTYLED */ 294 &ctx->state[0], VIS); 295 } 296 297 } 298 #ifdef _KERNEL 299 sha1_restorefp(fpu); 300 #endif /* _KERNEL */ 301 } else { 302 for (; i + 63 < input_len; i += 64) { 303 SHA1_TRANSFORM(ctx, &input[i]); 304 } 305 } 306 307 /* 308 * general optimization: 309 * 310 * if i and input_len are the same, return now instead 311 * of calling bcopy(), since the bcopy() in this case 312 * will be an expensive nop. 313 */ 314 315 if (input_len == i) 316 return; 317 318 buf_index = 0; 319 } 320 321 /* buffer remaining input */ 322 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); 323 } 324 325 #else /* VIS_SHA1 */ 326 327 void 328 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len) 329 { 330 uint32_t i, buf_index, buf_len; 331 const uint8_t *input = inptr; 332 #if defined(__amd64) 333 uint32_t block_count; 334 #endif /* __amd64 */ 335 336 /* check for noop */ 337 if (input_len == 0) 338 return; 339 340 /* compute number of bytes mod 64 */ 341 buf_index = (ctx->count[1] >> 3) & 0x3F; 342 343 /* update number of bits */ 344 if ((ctx->count[1] += (input_len << 3)) < (input_len << 3)) 345 ctx->count[0]++; 346 347 ctx->count[0] += (input_len >> 29); 348 349 buf_len = 64 - buf_index; 350 351 /* transform as many times as possible */ 352 i = 0; 353 if (input_len >= buf_len) { 354 355 /* 356 * general optimization: 357 * 358 * only do initial bcopy() and SHA1Transform() if 359 * buf_index != 0. if buf_index == 0, we're just 360 * wasting our time doing the bcopy() since there 361 * wasn't any data left over from a previous call to 362 * SHA1Update(). 363 */ 364 365 if (buf_index) { 366 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); 367 SHA1_TRANSFORM(ctx, ctx->buf_un.buf8); 368 i = buf_len; 369 } 370 371 #if !defined(__amd64) 372 for (; i + 63 < input_len; i += 64) 373 SHA1_TRANSFORM(ctx, &input[i]); 374 #else 375 block_count = (input_len - i) >> 6; 376 if (block_count > 0) { 377 SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count); 378 i += block_count << 6; 379 } 380 #endif /* !__amd64 */ 381 382 /* 383 * general optimization: 384 * 385 * if i and input_len are the same, return now instead 386 * of calling bcopy(), since the bcopy() in this case 387 * will be an expensive nop. 388 */ 389 390 if (input_len == i) 391 return; 392 393 buf_index = 0; 394 } 395 396 /* buffer remaining input */ 397 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); 398 } 399 400 #endif /* VIS_SHA1 */ 401 402 /* 403 * SHA1Final() 404 * 405 * purpose: ends an sha1 digest operation, finalizing the message digest and 406 * zeroing the context. 407 * input: uchar_t * : A buffer to store the digest. 408 * : The function actually uses void* because many 409 * : callers pass things other than uchar_t here. 410 * SHA1_CTX * : the context to finalize, save, and zero 411 * output: void 412 */ 413 414 void 415 SHA1Final(void *digest, SHA1_CTX *ctx) 416 { 417 uint8_t bitcount_be[sizeof (ctx->count)]; 418 uint32_t index = (ctx->count[1] >> 3) & 0x3f; 419 420 /* store bit count, big endian */ 421 Encode(bitcount_be, ctx->count, sizeof (bitcount_be)); 422 423 /* pad out to 56 mod 64 */ 424 SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); 425 426 /* append length (before padding) */ 427 SHA1Update(ctx, bitcount_be, sizeof (bitcount_be)); 428 429 /* store state in digest */ 430 Encode(digest, ctx->state, sizeof (ctx->state)); 431 432 /* zeroize sensitive information */ 433 bzero(ctx, sizeof (*ctx)); 434 } 435 436 437 #if !defined(__amd64) 438 439 typedef uint32_t sha1word; 440 441 /* 442 * sparc optimization: 443 * 444 * on the sparc, we can load big endian 32-bit data easily. note that 445 * special care must be taken to ensure the address is 32-bit aligned. 446 * in the interest of speed, we don't check to make sure, since 447 * careful programming can guarantee this for us. 448 */ 449 450 #if defined(_BIG_ENDIAN) 451 #define LOAD_BIG_32(addr) (*(uint32_t *)(addr)) 452 453 #elif defined(HAVE_HTONL) 454 #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr))) 455 456 #else 457 /* little endian -- will work on big endian, but slowly */ 458 #define LOAD_BIG_32(addr) \ 459 (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3]) 460 #endif /* _BIG_ENDIAN */ 461 462 /* 463 * SHA1Transform() 464 */ 465 #if defined(W_ARRAY) 466 #define W(n) w[n] 467 #else /* !defined(W_ARRAY) */ 468 #define W(n) w_ ## n 469 #endif /* !defined(W_ARRAY) */ 470 471 472 #if defined(__sparc) 473 474 /* 475 * sparc register window optimization: 476 * 477 * `a', `b', `c', `d', and `e' are passed into SHA1Transform 478 * explicitly since it increases the number of registers available to 479 * the compiler. under this scheme, these variables can be held in 480 * %i0 - %i4, which leaves more local and out registers available. 481 * 482 * purpose: sha1 transformation -- updates the digest based on `block' 483 * input: uint32_t : bytes 1 - 4 of the digest 484 * uint32_t : bytes 5 - 8 of the digest 485 * uint32_t : bytes 9 - 12 of the digest 486 * uint32_t : bytes 12 - 16 of the digest 487 * uint32_t : bytes 16 - 20 of the digest 488 * SHA1_CTX * : the context to update 489 * uint8_t [64]: the block to use to update the digest 490 * output: void 491 */ 492 493 void 494 SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, 495 SHA1_CTX *ctx, const uint8_t blk[64]) 496 { 497 /* 498 * sparc optimization: 499 * 500 * while it is somewhat counter-intuitive, on sparc, it is 501 * more efficient to place all the constants used in this 502 * function in an array and load the values out of the array 503 * than to manually load the constants. this is because 504 * setting a register to a 32-bit value takes two ops in most 505 * cases: a `sethi' and an `or', but loading a 32-bit value 506 * from memory only takes one `ld' (or `lduw' on v9). while 507 * this increases memory usage, the compiler can find enough 508 * other things to do while waiting to keep the pipeline does 509 * not stall. additionally, it is likely that many of these 510 * constants are cached so that later accesses do not even go 511 * out to the bus. 512 * 513 * this array is declared `static' to keep the compiler from 514 * having to bcopy() this array onto the stack frame of 515 * SHA1Transform() each time it is called -- which is 516 * unacceptably expensive. 517 * 518 * the `const' is to ensure that callers are good citizens and 519 * do not try to munge the array. since these routines are 520 * going to be called from inside multithreaded kernelland, 521 * this is a good safety check. -- `sha1_consts' will end up in 522 * .rodata. 523 * 524 * unfortunately, loading from an array in this manner hurts 525 * performance under Intel. So, there is a macro, 526 * SHA1_CONST(), used in SHA1Transform(), that either expands to 527 * a reference to this array, or to the actual constant, 528 * depending on what platform this code is compiled for. 529 */ 530 531 static const uint32_t sha1_consts[] = { 532 SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3 533 }; 534 535 /* 536 * general optimization: 537 * 538 * use individual integers instead of using an array. this is a 539 * win, although the amount it wins by seems to vary quite a bit. 540 */ 541 542 uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7; 543 uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; 544 545 /* 546 * sparc optimization: 547 * 548 * if `block' is already aligned on a 4-byte boundary, use 549 * LOAD_BIG_32() directly. otherwise, bcopy() into a 550 * buffer that *is* aligned on a 4-byte boundary and then do 551 * the LOAD_BIG_32() on that buffer. benchmarks have shown 552 * that using the bcopy() is better than loading the bytes 553 * individually and doing the endian-swap by hand. 554 * 555 * even though it's quite tempting to assign to do: 556 * 557 * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32)); 558 * 559 * and only have one set of LOAD_BIG_32()'s, the compiler 560 * *does not* like that, so please resist the urge. 561 */ 562 563 if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */ 564 bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); 565 w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15); 566 w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14); 567 w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13); 568 w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12); 569 w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11); 570 w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10); 571 w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9); 572 w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8); 573 w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7); 574 w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6); 575 w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5); 576 w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4); 577 w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3); 578 w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2); 579 w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1); 580 w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0); 581 } else { 582 /* LINTED E_BAD_PTR_CAST_ALIGN */ 583 w_15 = LOAD_BIG_32(blk + 60); 584 /* LINTED E_BAD_PTR_CAST_ALIGN */ 585 w_14 = LOAD_BIG_32(blk + 56); 586 /* LINTED E_BAD_PTR_CAST_ALIGN */ 587 w_13 = LOAD_BIG_32(blk + 52); 588 /* LINTED E_BAD_PTR_CAST_ALIGN */ 589 w_12 = LOAD_BIG_32(blk + 48); 590 /* LINTED E_BAD_PTR_CAST_ALIGN */ 591 w_11 = LOAD_BIG_32(blk + 44); 592 /* LINTED E_BAD_PTR_CAST_ALIGN */ 593 w_10 = LOAD_BIG_32(blk + 40); 594 /* LINTED E_BAD_PTR_CAST_ALIGN */ 595 w_9 = LOAD_BIG_32(blk + 36); 596 /* LINTED E_BAD_PTR_CAST_ALIGN */ 597 w_8 = LOAD_BIG_32(blk + 32); 598 /* LINTED E_BAD_PTR_CAST_ALIGN */ 599 w_7 = LOAD_BIG_32(blk + 28); 600 /* LINTED E_BAD_PTR_CAST_ALIGN */ 601 w_6 = LOAD_BIG_32(blk + 24); 602 /* LINTED E_BAD_PTR_CAST_ALIGN */ 603 w_5 = LOAD_BIG_32(blk + 20); 604 /* LINTED E_BAD_PTR_CAST_ALIGN */ 605 w_4 = LOAD_BIG_32(blk + 16); 606 /* LINTED E_BAD_PTR_CAST_ALIGN */ 607 w_3 = LOAD_BIG_32(blk + 12); 608 /* LINTED E_BAD_PTR_CAST_ALIGN */ 609 w_2 = LOAD_BIG_32(blk + 8); 610 /* LINTED E_BAD_PTR_CAST_ALIGN */ 611 w_1 = LOAD_BIG_32(blk + 4); 612 /* LINTED E_BAD_PTR_CAST_ALIGN */ 613 w_0 = LOAD_BIG_32(blk + 0); 614 } 615 #else /* !defined(__sparc) */ 616 617 void /* CSTYLED */ 618 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64]) 619 { 620 /* CSTYLED */ 621 sha1word a = ctx->state[0]; 622 sha1word b = ctx->state[1]; 623 sha1word c = ctx->state[2]; 624 sha1word d = ctx->state[3]; 625 sha1word e = ctx->state[4]; 626 627 #if defined(W_ARRAY) 628 sha1word w[16]; 629 #else /* !defined(W_ARRAY) */ 630 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7; 631 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; 632 #endif /* !defined(W_ARRAY) */ 633 634 W(0) = LOAD_BIG_32((void *)(blk + 0)); 635 W(1) = LOAD_BIG_32((void *)(blk + 4)); 636 W(2) = LOAD_BIG_32((void *)(blk + 8)); 637 W(3) = LOAD_BIG_32((void *)(blk + 12)); 638 W(4) = LOAD_BIG_32((void *)(blk + 16)); 639 W(5) = LOAD_BIG_32((void *)(blk + 20)); 640 W(6) = LOAD_BIG_32((void *)(blk + 24)); 641 W(7) = LOAD_BIG_32((void *)(blk + 28)); 642 W(8) = LOAD_BIG_32((void *)(blk + 32)); 643 W(9) = LOAD_BIG_32((void *)(blk + 36)); 644 W(10) = LOAD_BIG_32((void *)(blk + 40)); 645 W(11) = LOAD_BIG_32((void *)(blk + 44)); 646 W(12) = LOAD_BIG_32((void *)(blk + 48)); 647 W(13) = LOAD_BIG_32((void *)(blk + 52)); 648 W(14) = LOAD_BIG_32((void *)(blk + 56)); 649 W(15) = LOAD_BIG_32((void *)(blk + 60)); 650 651 #endif /* !defined(__sparc) */ 652 653 /* 654 * general optimization: 655 * 656 * even though this approach is described in the standard as 657 * being slower algorithmically, it is 30-40% faster than the 658 * "faster" version under SPARC, because this version has more 659 * of the constraints specified at compile-time and uses fewer 660 * variables (and therefore has better register utilization) 661 * than its "speedier" brother. (i've tried both, trust me) 662 * 663 * for either method given in the spec, there is an "assignment" 664 * phase where the following takes place: 665 * 666 * tmp = (main_computation); 667 * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp; 668 * 669 * we can make the algorithm go faster by not doing this work, 670 * but just pretending that `d' is now `e', etc. this works 671 * really well and obviates the need for a temporary variable. 672 * however, we still explicitly perform the rotate action, 673 * since it is cheaper on SPARC to do it once than to have to 674 * do it over and over again. 675 */ 676 677 /* round 1 */ 678 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */ 679 b = ROTATE_LEFT(b, 30); 680 681 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */ 682 a = ROTATE_LEFT(a, 30); 683 684 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */ 685 e = ROTATE_LEFT(e, 30); 686 687 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */ 688 d = ROTATE_LEFT(d, 30); 689 690 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */ 691 c = ROTATE_LEFT(c, 30); 692 693 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */ 694 b = ROTATE_LEFT(b, 30); 695 696 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */ 697 a = ROTATE_LEFT(a, 30); 698 699 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */ 700 e = ROTATE_LEFT(e, 30); 701 702 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */ 703 d = ROTATE_LEFT(d, 30); 704 705 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */ 706 c = ROTATE_LEFT(c, 30); 707 708 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */ 709 b = ROTATE_LEFT(b, 30); 710 711 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */ 712 a = ROTATE_LEFT(a, 30); 713 714 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */ 715 e = ROTATE_LEFT(e, 30); 716 717 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */ 718 d = ROTATE_LEFT(d, 30); 719 720 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */ 721 c = ROTATE_LEFT(c, 30); 722 723 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */ 724 b = ROTATE_LEFT(b, 30); 725 726 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */ 727 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0); 728 a = ROTATE_LEFT(a, 30); 729 730 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */ 731 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0); 732 e = ROTATE_LEFT(e, 30); 733 734 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */ 735 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0); 736 d = ROTATE_LEFT(d, 30); 737 738 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */ 739 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0); 740 c = ROTATE_LEFT(c, 30); 741 742 /* round 2 */ 743 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */ 744 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1); 745 b = ROTATE_LEFT(b, 30); 746 747 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */ 748 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1); 749 a = ROTATE_LEFT(a, 30); 750 751 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */ 752 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1); 753 e = ROTATE_LEFT(e, 30); 754 755 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */ 756 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1); 757 d = ROTATE_LEFT(d, 30); 758 759 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */ 760 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1); 761 c = ROTATE_LEFT(c, 30); 762 763 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */ 764 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1); 765 b = ROTATE_LEFT(b, 30); 766 767 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */ 768 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1); 769 a = ROTATE_LEFT(a, 30); 770 771 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */ 772 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1); 773 e = ROTATE_LEFT(e, 30); 774 775 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */ 776 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1); 777 d = ROTATE_LEFT(d, 30); 778 779 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */ 780 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1); 781 c = ROTATE_LEFT(c, 30); 782 783 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */ 784 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1); 785 b = ROTATE_LEFT(b, 30); 786 787 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */ 788 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1); 789 a = ROTATE_LEFT(a, 30); 790 791 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */ 792 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1); 793 e = ROTATE_LEFT(e, 30); 794 795 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */ 796 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1); 797 d = ROTATE_LEFT(d, 30); 798 799 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */ 800 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1); 801 c = ROTATE_LEFT(c, 30); 802 803 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */ 804 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1); 805 b = ROTATE_LEFT(b, 30); 806 807 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */ 808 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1); 809 a = ROTATE_LEFT(a, 30); 810 811 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */ 812 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1); 813 e = ROTATE_LEFT(e, 30); 814 815 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */ 816 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1); 817 d = ROTATE_LEFT(d, 30); 818 819 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */ 820 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1); 821 c = ROTATE_LEFT(c, 30); 822 823 /* round 3 */ 824 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */ 825 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2); 826 b = ROTATE_LEFT(b, 30); 827 828 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */ 829 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2); 830 a = ROTATE_LEFT(a, 30); 831 832 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */ 833 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2); 834 e = ROTATE_LEFT(e, 30); 835 836 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */ 837 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2); 838 d = ROTATE_LEFT(d, 30); 839 840 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */ 841 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2); 842 c = ROTATE_LEFT(c, 30); 843 844 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */ 845 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2); 846 b = ROTATE_LEFT(b, 30); 847 848 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */ 849 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2); 850 a = ROTATE_LEFT(a, 30); 851 852 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */ 853 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2); 854 e = ROTATE_LEFT(e, 30); 855 856 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */ 857 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2); 858 d = ROTATE_LEFT(d, 30); 859 860 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */ 861 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2); 862 c = ROTATE_LEFT(c, 30); 863 864 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */ 865 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2); 866 b = ROTATE_LEFT(b, 30); 867 868 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */ 869 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2); 870 a = ROTATE_LEFT(a, 30); 871 872 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */ 873 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2); 874 e = ROTATE_LEFT(e, 30); 875 876 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */ 877 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2); 878 d = ROTATE_LEFT(d, 30); 879 880 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */ 881 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2); 882 c = ROTATE_LEFT(c, 30); 883 884 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */ 885 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2); 886 b = ROTATE_LEFT(b, 30); 887 888 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */ 889 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2); 890 a = ROTATE_LEFT(a, 30); 891 892 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */ 893 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2); 894 e = ROTATE_LEFT(e, 30); 895 896 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */ 897 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2); 898 d = ROTATE_LEFT(d, 30); 899 900 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */ 901 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2); 902 c = ROTATE_LEFT(c, 30); 903 904 /* round 4 */ 905 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */ 906 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3); 907 b = ROTATE_LEFT(b, 30); 908 909 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */ 910 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3); 911 a = ROTATE_LEFT(a, 30); 912 913 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */ 914 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3); 915 e = ROTATE_LEFT(e, 30); 916 917 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */ 918 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3); 919 d = ROTATE_LEFT(d, 30); 920 921 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */ 922 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3); 923 c = ROTATE_LEFT(c, 30); 924 925 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */ 926 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3); 927 b = ROTATE_LEFT(b, 30); 928 929 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */ 930 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3); 931 a = ROTATE_LEFT(a, 30); 932 933 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */ 934 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3); 935 e = ROTATE_LEFT(e, 30); 936 937 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */ 938 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3); 939 d = ROTATE_LEFT(d, 30); 940 941 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */ 942 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3); 943 c = ROTATE_LEFT(c, 30); 944 945 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */ 946 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3); 947 b = ROTATE_LEFT(b, 30); 948 949 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */ 950 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3); 951 a = ROTATE_LEFT(a, 30); 952 953 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */ 954 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3); 955 e = ROTATE_LEFT(e, 30); 956 957 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */ 958 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3); 959 d = ROTATE_LEFT(d, 30); 960 961 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */ 962 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3); 963 c = ROTATE_LEFT(c, 30); 964 965 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */ 966 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3); 967 b = ROTATE_LEFT(b, 30); 968 969 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */ 970 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3); 971 a = ROTATE_LEFT(a, 30); 972 973 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */ 974 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3); 975 e = ROTATE_LEFT(e, 30); 976 977 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */ 978 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3); 979 d = ROTATE_LEFT(d, 30); 980 981 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */ 982 983 ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) + 984 SHA1_CONST(3); 985 ctx->state[1] += b; 986 ctx->state[2] += ROTATE_LEFT(c, 30); 987 ctx->state[3] += d; 988 ctx->state[4] += e; 989 990 /* zeroize sensitive information */ 991 W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0; 992 W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0; 993 } 994 #endif /* !__amd64 */ 995 996 997 /* 998 * Encode() 999 * 1000 * purpose: to convert a list of numbers from little endian to big endian 1001 * input: uint8_t * : place to store the converted big endian numbers 1002 * uint32_t * : place to get numbers to convert from 1003 * size_t : the length of the input in bytes 1004 * output: void 1005 */ 1006 1007 static void 1008 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input, 1009 size_t len) 1010 { 1011 size_t i, j; 1012 1013 #if defined(__sparc) 1014 if (IS_P2ALIGNED(output, sizeof (uint32_t))) { 1015 for (i = 0, j = 0; j < len; i++, j += 4) { 1016 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1017 *((uint32_t *)(output + j)) = input[i]; 1018 } 1019 } else { 1020 #endif /* little endian -- will work on big endian, but slowly */ 1021 for (i = 0, j = 0; j < len; i++, j += 4) { 1022 output[j] = (input[i] >> 24) & 0xff; 1023 output[j + 1] = (input[i] >> 16) & 0xff; 1024 output[j + 2] = (input[i] >> 8) & 0xff; 1025 output[j + 3] = input[i] & 0xff; 1026 } 1027 #if defined(__sparc) 1028 } 1029 #endif 1030 } 1031