Lines Matching +full:sub +full:- +full:block

2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
83 movdqa (\i-32)*4(SHA256CONSTANTS), MSG
100 * Intel SHA Extensions optimized implementation of a SHA-256 block function
102 * This function takes a pointer to the current SHA-256 state, a pointer to the
103 * input data, and the number of 64-byte blocks to process. Once all blocks
120 * DCBA, HGFE -> ABEF, CDGH
223 // Do 4 rounds of SHA-256 for each of two messages (interleaved). m0_a and m0_b
228 // computes 4 more message schedule words for each message. m1_a-m3_a contain
230 // likewise m1_b-m3_b for the second. After consuming the current value of
236 movdqa (\i-32)*4(SHA256CONSTANTS), TMP_A
270 // This function computes the SHA-256 digests of two messages |data1| and
274 // The instructions for the two SHA-256 operations are interleaved. On many
276 // to taking better advantage of the CPU's SHA-256 and SIMD throughput.
279 // Allocate 128 bytes of stack space, 16-byte aligned.
283 sub $128, %rsp
286 // Load the shuffle mask for swapping the endianness of 32-bit words.
295 // Load the initial state from ctx->state.
304 // Load ctx->bytecount. Take the mod 64 of it to get the number of
305 // bytes that are buffered in ctx->buf. Also save it in a register with
313 // %ebx bytes (1 to 63) are currently buffered in ctx->buf. Load them
314 // followed by the first 64 - %ebx bytes of data. Since LEN >= 64, we
315 // just load 64 bytes from each of ctx->buf, DATA1, and DATA2
353 sub $64, %rbx // rbx = buffered - 64
354 sub %rbx, DATA1 // DATA1 += 64 - buffered
355 sub %rbx, DATA2 // DATA2 += 64 - buffered
356 add %ebx, LEN // LEN += buffered - 64
362 sub $64, LEN
389 // Save the original state for each block.
395 // Do the SHA-256 rounds on each block.
407 // Add the original state for each block.
414 sub $64, LEN
419 // FINAL_STEP = 1: need to do count-only padding block
420 // FINAL_STEP = 0: need to do the block with 0x80 padding byte
427 // Not block-aligned; 1 <= LEN <= 63 data bytes remain. Pad the block.
430 // and load from &sp[64 - LEN] to get the needed padding block. This
433 sub LEN, %ebx // ebx = 64 - LEN
434 sub %rbx, DATA1 // DATA1 -= 64 - LEN
435 sub %rbx, DATA2 // DATA2 -= 64 - LEN
444 jge 1f // will COUNT spill into its own block?
448 mov $2, FINAL_STEP // won't need count-only block
451 mov $1, FINAL_STEP // will need count-only block
480 // Prepare a padding block, either:
483 // This is for a block aligned message.
488 // Pre-swap the endianness of the words.