1 /* 2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* 7 * Cleaned-up and optimized version of MD5, based on the reference 8 * implementation provided in RFC 1321. See RSA Copyright information 9 * below. 10 */ 11 12 #pragma ident "%Z%%M% %I% %E% SMI" 13 14 /* 15 * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm 16 */ 17 18 /* 19 * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All 20 * rights reserved. 21 * 22 * License to copy and use this software is granted provided that it 23 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest 24 * Algorithm" in all material mentioning or referencing this software 25 * or this function. 26 * 27 * License is also granted to make and use derivative works provided 28 * that such works are identified as "derived from the RSA Data 29 * Security, Inc. MD5 Message-Digest Algorithm" in all material 30 * mentioning or referencing the derived work. 31 * 32 * RSA Data Security, Inc. makes no representations concerning either 33 * the merchantability of this software or the suitability of this 34 * software for any particular purpose. It is provided "as is" 35 * without express or implied warranty of any kind. 36 * 37 * These notices must be retained in any copies of any part of this 38 * documentation and/or software. 39 */ 40 41 #include <sys/types.h> 42 #include <sys/md5.h> 43 #include <sys/md5_consts.h> /* MD5_CONST() optimization */ 44 #include "md5_byteswap.h" 45 #if !defined(_KERNEL) || defined(_BOOT) 46 #include <strings.h> 47 #endif /* !_KERNEL || _BOOT */ 48 49 #ifdef _KERNEL 50 #include <sys/systm.h> 51 #endif /* _KERNEL */ 52 53 static void Encode(uint8_t *, const uint32_t *, size_t); 54 55 #if !defined(__amd64) 56 static void MD5Transform(uint32_t, uint32_t, uint32_t, uint32_t, MD5_CTX *, 57 const uint8_t [64]); 58 #else 59 void md5_block_asm_host_order(MD5_CTX *ctx, const void *inpp, 60 unsigned int input_length_in_blocks); 61 #endif /* !defined(__amd64) */ 62 63 static uint8_t PADDING[64] = { 0x80, /* all zeros */ }; 64 65 /* 66 * F, G, H and I are the basic MD5 functions. 67 */ 68 #define F(b, c, d) (((b) & (c)) | ((~b) & (d))) 69 #define G(b, c, d) (((b) & (d)) | ((c) & (~d))) 70 #define H(b, c, d) ((b) ^ (c) ^ (d)) 71 #define I(b, c, d) ((c) ^ ((b) | (~d))) 72 73 /* 74 * ROTATE_LEFT rotates x left n bits. 75 */ 76 #define ROTATE_LEFT(x, n) \ 77 (((x) << (n)) | ((x) >> ((sizeof (x) << 3) - (n)))) 78 79 /* 80 * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. 81 * Rotation is separate from addition to prevent recomputation. 82 */ 83 84 #define FF(a, b, c, d, x, s, ac) { \ 85 (a) += F((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \ 86 (a) = ROTATE_LEFT((a), (s)); \ 87 (a) += (b); \ 88 } 89 90 #define GG(a, b, c, d, x, s, ac) { \ 91 (a) += G((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \ 92 (a) = ROTATE_LEFT((a), (s)); \ 93 (a) += (b); \ 94 } 95 96 #define HH(a, b, c, d, x, s, ac) { \ 97 (a) += H((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \ 98 (a) = ROTATE_LEFT((a), (s)); \ 99 (a) += (b); \ 100 } 101 102 #define II(a, b, c, d, x, s, ac) { \ 103 (a) += I((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \ 104 (a) = ROTATE_LEFT((a), (s)); \ 105 (a) += (b); \ 106 } 107 108 /* 109 * Loading 32-bit constants on a RISC is expensive since it involves both a 110 * `sethi' and an `or'. thus, we instead have the compiler generate `ld's to 111 * load the constants from an array called `md5_consts'. however, on intel 112 * (and other CISC processors), it is cheaper to load the constant 113 * directly. thus, the c code in MD5Transform() uses the macro MD5_CONST() 114 * which either expands to a constant or an array reference, depending on the 115 * architecture the code is being compiled for. 116 * 117 * Right now, i386 and amd64 are the CISC exceptions. 118 * If we get another CISC ISA, we'll have to change the ifdef. 119 */ 120 121 #if defined(__i386) || defined(__amd64) 122 123 #define MD5_CONST(x) (MD5_CONST_ ## x) 124 #define MD5_CONST_e(x) MD5_CONST(x) 125 #define MD5_CONST_o(x) MD5_CONST(x) 126 127 #else 128 /* 129 * sparc/RISC optimization: 130 * 131 * while it is somewhat counter-intuitive, on sparc (and presumably other RISC 132 * machines), it is more efficient to place all the constants used in this 133 * function in an array and load the values out of the array than to manually 134 * load the constants. this is because setting a register to a 32-bit value 135 * takes two ops in most cases: a `sethi' and an `or', but loading a 32-bit 136 * value from memory only takes one `ld' (or `lduw' on v9). while this 137 * increases memory usage, the compiler can find enough other things to do 138 * while waiting to keep the pipeline does not stall. additionally, it is 139 * likely that many of these constants are cached so that later accesses do 140 * not even go out to the bus. 141 * 142 * this array is declared `static' to keep the compiler from having to 143 * bcopy() this array onto the stack frame of MD5Transform() each time it is 144 * called -- which is unacceptably expensive. 145 * 146 * the `const' is to ensure that callers are good citizens and do not try to 147 * munge the array. since these routines are going to be called from inside 148 * multithreaded kernelland, this is a good safety check. -- `constants' will 149 * end up in .rodata. 150 * 151 * unfortunately, loading from an array in this manner hurts performance under 152 * intel (and presumably other CISC machines). so, there is a macro, 153 * MD5_CONST(), used in MD5Transform(), that either expands to a reference to 154 * this array, or to the actual constant, depending on what platform this code 155 * is compiled for. 156 */ 157 158 #ifdef sun4v 159 160 /* 161 * Going to load these consts in 8B chunks, so need to enforce 8B alignment 162 */ 163 164 /* CSTYLED */ 165 #pragma align 64 (md5_consts) 166 #define _MD5_CHECK_ALIGNMENT 167 168 #endif /* sun4v */ 169 170 static const uint32_t md5_consts[] = { 171 MD5_CONST_0, MD5_CONST_1, MD5_CONST_2, MD5_CONST_3, 172 MD5_CONST_4, MD5_CONST_5, MD5_CONST_6, MD5_CONST_7, 173 MD5_CONST_8, MD5_CONST_9, MD5_CONST_10, MD5_CONST_11, 174 MD5_CONST_12, MD5_CONST_13, MD5_CONST_14, MD5_CONST_15, 175 MD5_CONST_16, MD5_CONST_17, MD5_CONST_18, MD5_CONST_19, 176 MD5_CONST_20, MD5_CONST_21, MD5_CONST_22, MD5_CONST_23, 177 MD5_CONST_24, MD5_CONST_25, MD5_CONST_26, MD5_CONST_27, 178 MD5_CONST_28, MD5_CONST_29, MD5_CONST_30, MD5_CONST_31, 179 MD5_CONST_32, MD5_CONST_33, MD5_CONST_34, MD5_CONST_35, 180 MD5_CONST_36, MD5_CONST_37, MD5_CONST_38, MD5_CONST_39, 181 MD5_CONST_40, MD5_CONST_41, MD5_CONST_42, MD5_CONST_43, 182 MD5_CONST_44, MD5_CONST_45, MD5_CONST_46, MD5_CONST_47, 183 MD5_CONST_48, MD5_CONST_49, MD5_CONST_50, MD5_CONST_51, 184 MD5_CONST_52, MD5_CONST_53, MD5_CONST_54, MD5_CONST_55, 185 MD5_CONST_56, MD5_CONST_57, MD5_CONST_58, MD5_CONST_59, 186 MD5_CONST_60, MD5_CONST_61, MD5_CONST_62, MD5_CONST_63 187 }; 188 189 190 #ifdef sun4v 191 /* 192 * To reduce the number of loads, load consts in 64-bit 193 * chunks and then split. 194 * 195 * No need to mask upper 32-bits, as just interested in 196 * low 32-bits (saves an & operation and means that this 197 * optimization doesn't increases the icount. 198 */ 199 #define MD5_CONST_e(x) (md5_consts64[x/2] >> 32) 200 #define MD5_CONST_o(x) (md5_consts64[x/2]) 201 202 #else 203 204 #define MD5_CONST_e(x) (md5_consts[x]) 205 #define MD5_CONST_o(x) (md5_consts[x]) 206 207 #endif /* sun4v */ 208 209 #endif 210 211 /* 212 * MD5Init() 213 * 214 * purpose: initializes the md5 context and begins and md5 digest operation 215 * input: MD5_CTX * : the context to initialize. 216 * output: void 217 */ 218 219 void 220 MD5Init(MD5_CTX *ctx) 221 { 222 ctx->count[0] = ctx->count[1] = 0; 223 224 /* load magic initialization constants */ 225 ctx->state[0] = MD5_INIT_CONST_1; 226 ctx->state[1] = MD5_INIT_CONST_2; 227 ctx->state[2] = MD5_INIT_CONST_3; 228 ctx->state[3] = MD5_INIT_CONST_4; 229 } 230 231 /* 232 * MD5Update() 233 * 234 * purpose: continues an md5 digest operation, using the message block 235 * to update the context. 236 * input: MD5_CTX * : the context to update 237 * uint8_t * : the message block 238 * uint32_t : the length of the message block in bytes 239 * output: void 240 * 241 * MD5 crunches in 64-byte blocks. All numeric constants here are related to 242 * that property of MD5. 243 */ 244 245 void 246 MD5Update(MD5_CTX *ctx, const void *inpp, unsigned int input_len) 247 { 248 uint32_t i, buf_index, buf_len; 249 #ifdef sun4v 250 uint32_t old_asi; 251 #endif /* sun4v */ 252 #if defined(__amd64) 253 uint32_t block_count; 254 #endif /* !defined(__amd64) */ 255 const unsigned char *input = (const unsigned char *)inpp; 256 257 /* compute (number of bytes computed so far) mod 64 */ 258 buf_index = (ctx->count[0] >> 3) & 0x3F; 259 260 /* update number of bits hashed into this MD5 computation so far */ 261 if ((ctx->count[0] += (input_len << 3)) < (input_len << 3)) 262 ctx->count[1]++; 263 ctx->count[1] += (input_len >> 29); 264 265 buf_len = 64 - buf_index; 266 267 /* transform as many times as possible */ 268 i = 0; 269 if (input_len >= buf_len) { 270 271 /* 272 * general optimization: 273 * 274 * only do initial bcopy() and MD5Transform() if 275 * buf_index != 0. if buf_index == 0, we're just 276 * wasting our time doing the bcopy() since there 277 * wasn't any data left over from a previous call to 278 * MD5Update(). 279 */ 280 281 #ifdef sun4v 282 /* 283 * For N1 use %asi register. However, costly to repeatedly set 284 * in MD5Transform. Therefore, set once here. 285 * Should probably restore the old value afterwards... 286 */ 287 old_asi = get_little(); 288 set_little(0x88); 289 #endif /* sun4v */ 290 291 if (buf_index) { 292 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); 293 294 #if !defined(__amd64) 295 MD5Transform(ctx->state[0], ctx->state[1], 296 ctx->state[2], ctx->state[3], ctx, 297 ctx->buf_un.buf8); 298 #else 299 md5_block_asm_host_order(ctx, ctx->buf_un.buf8, 1); 300 #endif /* !defined(__amd64) */ 301 302 i = buf_len; 303 } 304 305 #if !defined(__amd64) 306 for (; i + 63 < input_len; i += 64) 307 MD5Transform(ctx->state[0], ctx->state[1], 308 ctx->state[2], ctx->state[3], ctx, &input[i]); 309 310 #else 311 block_count = (input_len - i) >> 6; 312 if (block_count > 0) { 313 md5_block_asm_host_order(ctx, &input[i], block_count); 314 i += block_count << 6; 315 } 316 #endif /* !defined(__amd64) */ 317 318 319 #ifdef sun4v 320 /* 321 * Restore old %ASI value 322 */ 323 set_little(old_asi); 324 #endif /* sun4v */ 325 326 /* 327 * general optimization: 328 * 329 * if i and input_len are the same, return now instead 330 * of calling bcopy(), since the bcopy() in this 331 * case will be an expensive nop. 332 */ 333 334 if (input_len == i) 335 return; 336 337 buf_index = 0; 338 } 339 340 /* buffer remaining input */ 341 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); 342 } 343 344 /* 345 * MD5Final() 346 * 347 * purpose: ends an md5 digest operation, finalizing the message digest and 348 * zeroing the context. 349 * input: uchar_t * : a buffer to store the digest in 350 * : The function actually uses void* because many 351 * : callers pass things other than uchar_t here. 352 * MD5_CTX * : the context to finalize, save, and zero 353 * output: void 354 */ 355 356 void 357 MD5Final(void *digest, MD5_CTX *ctx) 358 { 359 uint8_t bitcount_le[sizeof (ctx->count)]; 360 uint32_t index = (ctx->count[0] >> 3) & 0x3f; 361 362 /* store bit count, little endian */ 363 Encode(bitcount_le, ctx->count, sizeof (bitcount_le)); 364 365 /* pad out to 56 mod 64 */ 366 MD5Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); 367 368 /* append length (before padding) */ 369 MD5Update(ctx, bitcount_le, sizeof (bitcount_le)); 370 371 /* store state in digest */ 372 Encode(digest, ctx->state, sizeof (ctx->state)); 373 374 /* zeroize sensitive information */ 375 bzero(ctx, sizeof (*ctx)); 376 } 377 378 #ifndef _KERNEL 379 380 void 381 md5_calc(unsigned char *output, unsigned char *input, unsigned int inlen) 382 { 383 MD5_CTX context; 384 385 MD5Init(&context); 386 MD5Update(&context, input, inlen); 387 MD5Final(output, &context); 388 } 389 390 #endif /* !_KERNEL */ 391 392 #if !defined(__amd64) 393 /* 394 * sparc register window optimization: 395 * 396 * `a', `b', `c', and `d' are passed into MD5Transform explicitly 397 * since it increases the number of registers available to the 398 * compiler. under this scheme, these variables can be held in 399 * %i0 - %i3, which leaves more local and out registers available. 400 */ 401 402 /* 403 * MD5Transform() 404 * 405 * purpose: md5 transformation -- updates the digest based on `block' 406 * input: uint32_t : bytes 1 - 4 of the digest 407 * uint32_t : bytes 5 - 8 of the digest 408 * uint32_t : bytes 9 - 12 of the digest 409 * uint32_t : bytes 12 - 16 of the digest 410 * MD5_CTX * : the context to update 411 * uint8_t [64]: the block to use to update the digest 412 * output: void 413 */ 414 415 static void 416 MD5Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, 417 MD5_CTX *ctx, const uint8_t block[64]) 418 { 419 /* 420 * general optimization: 421 * 422 * use individual integers instead of using an array. this is a 423 * win, although the amount it wins by seems to vary quite a bit. 424 */ 425 426 register uint32_t x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7; 427 register uint32_t x_8, x_9, x_10, x_11, x_12, x_13, x_14, x_15; 428 #ifdef sun4v 429 unsigned long long *md5_consts64; 430 431 /* LINTED E_BAD_PTR_CAST_ALIGN */ 432 md5_consts64 = (unsigned long long *) md5_consts; 433 #endif /* sun4v */ 434 435 /* 436 * general optimization: 437 * 438 * the compiler (at least SC4.2/5.x) generates better code if 439 * variable use is localized. in this case, swapping the integers in 440 * this order allows `x_0 'to be swapped nearest to its first use in 441 * FF(), and likewise for `x_1' and up. note that the compiler 442 * prefers this to doing each swap right before the FF() that 443 * uses it. 444 */ 445 446 /* 447 * sparc v9/v8plus optimization: 448 * 449 * if `block' is already aligned on a 4-byte boundary, use the 450 * optimized load_little_32() directly. otherwise, bcopy() 451 * into a buffer that *is* aligned on a 4-byte boundary and 452 * then do the load_little_32() on that buffer. benchmarks 453 * have shown that using the bcopy() is better than loading 454 * the bytes individually and doing the endian-swap by hand. 455 * 456 * even though it's quite tempting to assign to do: 457 * 458 * blk = bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); 459 * 460 * and only have one set of LOAD_LITTLE_32()'s, the compiler (at least 461 * SC4.2/5.x) *does not* like that, so please resist the urge. 462 */ 463 464 #ifdef _MD5_CHECK_ALIGNMENT 465 if ((uintptr_t)block & 0x3) { /* not 4-byte aligned? */ 466 bcopy(block, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); 467 468 #ifdef sun4v 469 x_15 = LOAD_LITTLE_32_f(ctx->buf_un.buf32); 470 x_14 = LOAD_LITTLE_32_e(ctx->buf_un.buf32); 471 x_13 = LOAD_LITTLE_32_d(ctx->buf_un.buf32); 472 x_12 = LOAD_LITTLE_32_c(ctx->buf_un.buf32); 473 x_11 = LOAD_LITTLE_32_b(ctx->buf_un.buf32); 474 x_10 = LOAD_LITTLE_32_a(ctx->buf_un.buf32); 475 x_9 = LOAD_LITTLE_32_9(ctx->buf_un.buf32); 476 x_8 = LOAD_LITTLE_32_8(ctx->buf_un.buf32); 477 x_7 = LOAD_LITTLE_32_7(ctx->buf_un.buf32); 478 x_6 = LOAD_LITTLE_32_6(ctx->buf_un.buf32); 479 x_5 = LOAD_LITTLE_32_5(ctx->buf_un.buf32); 480 x_4 = LOAD_LITTLE_32_4(ctx->buf_un.buf32); 481 x_3 = LOAD_LITTLE_32_3(ctx->buf_un.buf32); 482 x_2 = LOAD_LITTLE_32_2(ctx->buf_un.buf32); 483 x_1 = LOAD_LITTLE_32_1(ctx->buf_un.buf32); 484 x_0 = LOAD_LITTLE_32_0(ctx->buf_un.buf32); 485 #else 486 x_15 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 15); 487 x_14 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 14); 488 x_13 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 13); 489 x_12 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 12); 490 x_11 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 11); 491 x_10 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 10); 492 x_9 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 9); 493 x_8 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 8); 494 x_7 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 7); 495 x_6 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 6); 496 x_5 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 5); 497 x_4 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 4); 498 x_3 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 3); 499 x_2 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 2); 500 x_1 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 1); 501 x_0 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 0); 502 #endif /* sun4v */ 503 } else 504 #endif 505 { 506 507 #ifdef sun4v 508 /* LINTED E_BAD_PTR_CAST_ALIGN */ 509 x_15 = LOAD_LITTLE_32_f(block); 510 /* LINTED E_BAD_PTR_CAST_ALIGN */ 511 x_14 = LOAD_LITTLE_32_e(block); 512 /* LINTED E_BAD_PTR_CAST_ALIGN */ 513 x_13 = LOAD_LITTLE_32_d(block); 514 /* LINTED E_BAD_PTR_CAST_ALIGN */ 515 x_12 = LOAD_LITTLE_32_c(block); 516 /* LINTED E_BAD_PTR_CAST_ALIGN */ 517 x_11 = LOAD_LITTLE_32_b(block); 518 /* LINTED E_BAD_PTR_CAST_ALIGN */ 519 x_10 = LOAD_LITTLE_32_a(block); 520 /* LINTED E_BAD_PTR_CAST_ALIGN */ 521 x_9 = LOAD_LITTLE_32_9(block); 522 /* LINTED E_BAD_PTR_CAST_ALIGN */ 523 x_8 = LOAD_LITTLE_32_8(block); 524 /* LINTED E_BAD_PTR_CAST_ALIGN */ 525 x_7 = LOAD_LITTLE_32_7(block); 526 /* LINTED E_BAD_PTR_CAST_ALIGN */ 527 x_6 = LOAD_LITTLE_32_6(block); 528 /* LINTED E_BAD_PTR_CAST_ALIGN */ 529 x_5 = LOAD_LITTLE_32_5(block); 530 /* LINTED E_BAD_PTR_CAST_ALIGN */ 531 x_4 = LOAD_LITTLE_32_4(block); 532 /* LINTED E_BAD_PTR_CAST_ALIGN */ 533 x_3 = LOAD_LITTLE_32_3(block); 534 /* LINTED E_BAD_PTR_CAST_ALIGN */ 535 x_2 = LOAD_LITTLE_32_2(block); 536 /* LINTED E_BAD_PTR_CAST_ALIGN */ 537 x_1 = LOAD_LITTLE_32_1(block); 538 /* LINTED E_BAD_PTR_CAST_ALIGN */ 539 x_0 = LOAD_LITTLE_32_0(block); 540 #else 541 /* LINTED E_BAD_PTR_CAST_ALIGN */ 542 x_15 = LOAD_LITTLE_32(block + 60); 543 /* LINTED E_BAD_PTR_CAST_ALIGN */ 544 x_14 = LOAD_LITTLE_32(block + 56); 545 /* LINTED E_BAD_PTR_CAST_ALIGN */ 546 x_13 = LOAD_LITTLE_32(block + 52); 547 /* LINTED E_BAD_PTR_CAST_ALIGN */ 548 x_12 = LOAD_LITTLE_32(block + 48); 549 /* LINTED E_BAD_PTR_CAST_ALIGN */ 550 x_11 = LOAD_LITTLE_32(block + 44); 551 /* LINTED E_BAD_PTR_CAST_ALIGN */ 552 x_10 = LOAD_LITTLE_32(block + 40); 553 /* LINTED E_BAD_PTR_CAST_ALIGN */ 554 x_9 = LOAD_LITTLE_32(block + 36); 555 /* LINTED E_BAD_PTR_CAST_ALIGN */ 556 x_8 = LOAD_LITTLE_32(block + 32); 557 /* LINTED E_BAD_PTR_CAST_ALIGN */ 558 x_7 = LOAD_LITTLE_32(block + 28); 559 /* LINTED E_BAD_PTR_CAST_ALIGN */ 560 x_6 = LOAD_LITTLE_32(block + 24); 561 /* LINTED E_BAD_PTR_CAST_ALIGN */ 562 x_5 = LOAD_LITTLE_32(block + 20); 563 /* LINTED E_BAD_PTR_CAST_ALIGN */ 564 x_4 = LOAD_LITTLE_32(block + 16); 565 /* LINTED E_BAD_PTR_CAST_ALIGN */ 566 x_3 = LOAD_LITTLE_32(block + 12); 567 /* LINTED E_BAD_PTR_CAST_ALIGN */ 568 x_2 = LOAD_LITTLE_32(block + 8); 569 /* LINTED E_BAD_PTR_CAST_ALIGN */ 570 x_1 = LOAD_LITTLE_32(block + 4); 571 /* LINTED E_BAD_PTR_CAST_ALIGN */ 572 x_0 = LOAD_LITTLE_32(block + 0); 573 #endif /* sun4v */ 574 } 575 576 /* round 1 */ 577 FF(a, b, c, d, x_0, MD5_SHIFT_11, MD5_CONST_e(0)); /* 1 */ 578 FF(d, a, b, c, x_1, MD5_SHIFT_12, MD5_CONST_o(1)); /* 2 */ 579 FF(c, d, a, b, x_2, MD5_SHIFT_13, MD5_CONST_e(2)); /* 3 */ 580 FF(b, c, d, a, x_3, MD5_SHIFT_14, MD5_CONST_o(3)); /* 4 */ 581 FF(a, b, c, d, x_4, MD5_SHIFT_11, MD5_CONST_e(4)); /* 5 */ 582 FF(d, a, b, c, x_5, MD5_SHIFT_12, MD5_CONST_o(5)); /* 6 */ 583 FF(c, d, a, b, x_6, MD5_SHIFT_13, MD5_CONST_e(6)); /* 7 */ 584 FF(b, c, d, a, x_7, MD5_SHIFT_14, MD5_CONST_o(7)); /* 8 */ 585 FF(a, b, c, d, x_8, MD5_SHIFT_11, MD5_CONST_e(8)); /* 9 */ 586 FF(d, a, b, c, x_9, MD5_SHIFT_12, MD5_CONST_o(9)); /* 10 */ 587 FF(c, d, a, b, x_10, MD5_SHIFT_13, MD5_CONST_e(10)); /* 11 */ 588 FF(b, c, d, a, x_11, MD5_SHIFT_14, MD5_CONST_o(11)); /* 12 */ 589 FF(a, b, c, d, x_12, MD5_SHIFT_11, MD5_CONST_e(12)); /* 13 */ 590 FF(d, a, b, c, x_13, MD5_SHIFT_12, MD5_CONST_o(13)); /* 14 */ 591 FF(c, d, a, b, x_14, MD5_SHIFT_13, MD5_CONST_e(14)); /* 15 */ 592 FF(b, c, d, a, x_15, MD5_SHIFT_14, MD5_CONST_o(15)); /* 16 */ 593 594 /* round 2 */ 595 GG(a, b, c, d, x_1, MD5_SHIFT_21, MD5_CONST_e(16)); /* 17 */ 596 GG(d, a, b, c, x_6, MD5_SHIFT_22, MD5_CONST_o(17)); /* 18 */ 597 GG(c, d, a, b, x_11, MD5_SHIFT_23, MD5_CONST_e(18)); /* 19 */ 598 GG(b, c, d, a, x_0, MD5_SHIFT_24, MD5_CONST_o(19)); /* 20 */ 599 GG(a, b, c, d, x_5, MD5_SHIFT_21, MD5_CONST_e(20)); /* 21 */ 600 GG(d, a, b, c, x_10, MD5_SHIFT_22, MD5_CONST_o(21)); /* 22 */ 601 GG(c, d, a, b, x_15, MD5_SHIFT_23, MD5_CONST_e(22)); /* 23 */ 602 GG(b, c, d, a, x_4, MD5_SHIFT_24, MD5_CONST_o(23)); /* 24 */ 603 GG(a, b, c, d, x_9, MD5_SHIFT_21, MD5_CONST_e(24)); /* 25 */ 604 GG(d, a, b, c, x_14, MD5_SHIFT_22, MD5_CONST_o(25)); /* 26 */ 605 GG(c, d, a, b, x_3, MD5_SHIFT_23, MD5_CONST_e(26)); /* 27 */ 606 GG(b, c, d, a, x_8, MD5_SHIFT_24, MD5_CONST_o(27)); /* 28 */ 607 GG(a, b, c, d, x_13, MD5_SHIFT_21, MD5_CONST_e(28)); /* 29 */ 608 GG(d, a, b, c, x_2, MD5_SHIFT_22, MD5_CONST_o(29)); /* 30 */ 609 GG(c, d, a, b, x_7, MD5_SHIFT_23, MD5_CONST_e(30)); /* 31 */ 610 GG(b, c, d, a, x_12, MD5_SHIFT_24, MD5_CONST_o(31)); /* 32 */ 611 612 /* round 3 */ 613 HH(a, b, c, d, x_5, MD5_SHIFT_31, MD5_CONST_e(32)); /* 33 */ 614 HH(d, a, b, c, x_8, MD5_SHIFT_32, MD5_CONST_o(33)); /* 34 */ 615 HH(c, d, a, b, x_11, MD5_SHIFT_33, MD5_CONST_e(34)); /* 35 */ 616 HH(b, c, d, a, x_14, MD5_SHIFT_34, MD5_CONST_o(35)); /* 36 */ 617 HH(a, b, c, d, x_1, MD5_SHIFT_31, MD5_CONST_e(36)); /* 37 */ 618 HH(d, a, b, c, x_4, MD5_SHIFT_32, MD5_CONST_o(37)); /* 38 */ 619 HH(c, d, a, b, x_7, MD5_SHIFT_33, MD5_CONST_e(38)); /* 39 */ 620 HH(b, c, d, a, x_10, MD5_SHIFT_34, MD5_CONST_o(39)); /* 40 */ 621 HH(a, b, c, d, x_13, MD5_SHIFT_31, MD5_CONST_e(40)); /* 41 */ 622 HH(d, a, b, c, x_0, MD5_SHIFT_32, MD5_CONST_o(41)); /* 42 */ 623 HH(c, d, a, b, x_3, MD5_SHIFT_33, MD5_CONST_e(42)); /* 43 */ 624 HH(b, c, d, a, x_6, MD5_SHIFT_34, MD5_CONST_o(43)); /* 44 */ 625 HH(a, b, c, d, x_9, MD5_SHIFT_31, MD5_CONST_e(44)); /* 45 */ 626 HH(d, a, b, c, x_12, MD5_SHIFT_32, MD5_CONST_o(45)); /* 46 */ 627 HH(c, d, a, b, x_15, MD5_SHIFT_33, MD5_CONST_e(46)); /* 47 */ 628 HH(b, c, d, a, x_2, MD5_SHIFT_34, MD5_CONST_o(47)); /* 48 */ 629 630 /* round 4 */ 631 II(a, b, c, d, x_0, MD5_SHIFT_41, MD5_CONST_e(48)); /* 49 */ 632 II(d, a, b, c, x_7, MD5_SHIFT_42, MD5_CONST_o(49)); /* 50 */ 633 II(c, d, a, b, x_14, MD5_SHIFT_43, MD5_CONST_e(50)); /* 51 */ 634 II(b, c, d, a, x_5, MD5_SHIFT_44, MD5_CONST_o(51)); /* 52 */ 635 II(a, b, c, d, x_12, MD5_SHIFT_41, MD5_CONST_e(52)); /* 53 */ 636 II(d, a, b, c, x_3, MD5_SHIFT_42, MD5_CONST_o(53)); /* 54 */ 637 II(c, d, a, b, x_10, MD5_SHIFT_43, MD5_CONST_e(54)); /* 55 */ 638 II(b, c, d, a, x_1, MD5_SHIFT_44, MD5_CONST_o(55)); /* 56 */ 639 II(a, b, c, d, x_8, MD5_SHIFT_41, MD5_CONST_e(56)); /* 57 */ 640 II(d, a, b, c, x_15, MD5_SHIFT_42, MD5_CONST_o(57)); /* 58 */ 641 II(c, d, a, b, x_6, MD5_SHIFT_43, MD5_CONST_e(58)); /* 59 */ 642 II(b, c, d, a, x_13, MD5_SHIFT_44, MD5_CONST_o(59)); /* 60 */ 643 II(a, b, c, d, x_4, MD5_SHIFT_41, MD5_CONST_e(60)); /* 61 */ 644 II(d, a, b, c, x_11, MD5_SHIFT_42, MD5_CONST_o(61)); /* 62 */ 645 II(c, d, a, b, x_2, MD5_SHIFT_43, MD5_CONST_e(62)); /* 63 */ 646 II(b, c, d, a, x_9, MD5_SHIFT_44, MD5_CONST_o(63)); /* 64 */ 647 648 ctx->state[0] += a; 649 ctx->state[1] += b; 650 ctx->state[2] += c; 651 ctx->state[3] += d; 652 653 /* 654 * zeroize sensitive information -- compiler will optimize 655 * this out if everything is kept in registers 656 */ 657 658 x_0 = x_1 = x_2 = x_3 = x_4 = x_5 = x_6 = x_7 = x_8 = 0; 659 x_9 = x_10 = x_11 = x_12 = x_13 = x_14 = x_15 = 0; 660 } 661 #endif /* !defined(__amd64) */ 662 663 /* 664 * Encode() 665 * 666 * purpose: to convert a list of numbers from big endian to little endian 667 * input: uint8_t * : place to store the converted little endian numbers 668 * uint32_t * : place to get numbers to convert from 669 * size_t : the length of the input in bytes 670 * output: void 671 */ 672 673 static void 674 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input, 675 size_t input_len) 676 { 677 size_t i, j; 678 679 for (i = 0, j = 0; j < input_len; i++, j += sizeof (uint32_t)) { 680 681 #ifdef _LITTLE_ENDIAN 682 683 #ifdef _MD5_CHECK_ALIGNMENT 684 if ((uintptr_t)output & 0x3) /* Not 4-byte aligned */ 685 bcopy(input + i, output + j, 4); 686 else *(uint32_t *)(output + j) = input[i]; 687 #else 688 /*LINTED E_BAD_PTR_CAST_ALIGN*/ 689 *(uint32_t *)(output + j) = input[i]; 690 #endif /* _MD5_CHECK_ALIGNMENT */ 691 692 #else /* big endian -- will work on little endian, but slowly */ 693 694 output[j] = input[i] & 0xff; 695 output[j + 1] = (input[i] >> 8) & 0xff; 696 output[j + 2] = (input[i] >> 16) & 0xff; 697 output[j + 3] = (input[i] >> 24) & 0xff; 698 #endif 699 } 700 } 701