Lines Matching +full:sub +full:- +full:message
2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
83 movdqa (\i-32)*4(SHA256CONSTANTS), MSG
100 * Intel SHA Extensions optimized implementation of a SHA-256 block function
102 * This function takes a pointer to the current SHA-256 state, a pointer to the
103 * input data, and the number of 64-byte blocks to process. Once all blocks
120 * DCBA, HGFE -> ABEF, CDGH
223 // Do 4 rounds of SHA-256 for each of two messages (interleaved). m0_a and m0_b
224 // contain the current 4 message schedule words for the first and second message
227 // If not all the message schedule words have been computed yet, then this also
228 // computes 4 more message schedule words for each message. m1_a-m3_a contain
229 // the next 3 groups of 4 message schedule words for the first message, and
230 // likewise m1_b-m3_b for the second. After consuming the current value of
236 movdqa (\i-32)*4(SHA256CONSTANTS), TMP_A
270 // This function computes the SHA-256 digests of two messages |data1| and
274 // The instructions for the two SHA-256 operations are interleaved. On many
275 // CPUs, this is almost twice as fast as hashing each message individually due
276 // to taking better advantage of the CPU's SHA-256 and SIMD throughput.
279 // Allocate 128 bytes of stack space, 16-byte aligned.
283 sub $128, %rsp
286 // Load the shuffle mask for swapping the endianness of 32-bit words.
295 // Load the initial state from ctx->state.
304 // Load ctx->bytecount. Take the mod 64 of it to get the number of
305 // bytes that are buffered in ctx->buf. Also save it in a register with
313 // %ebx bytes (1 to 63) are currently buffered in ctx->buf. Load them
314 // followed by the first 64 - %ebx bytes of data. Since LEN >= 64, we
315 // just load 64 bytes from each of ctx->buf, DATA1, and DATA2
353 sub $64, %rbx // rbx = buffered - 64
354 sub %rbx, DATA1 // DATA1 += 64 - buffered
355 sub %rbx, DATA2 // DATA2 += 64 - buffered
356 add %ebx, LEN // LEN += buffered - 64
362 sub $64, LEN
395 // Do the SHA-256 rounds on each block.
414 sub $64, LEN
419 // FINAL_STEP = 1: need to do count-only padding block
427 // Not block-aligned; 1 <= LEN <= 63 data bytes remain. Pad the block.
429 // &sp[64]. Then for each message, copy the last 64 data bytes to sp
430 // and load from &sp[64 - LEN] to get the needed padding block. This
433 sub LEN, %ebx // ebx = 64 - LEN
434 sub %rbx, DATA1 // DATA1 -= 64 - LEN
435 sub %rbx, DATA2 // DATA2 -= 64 - LEN
448 mov $2, FINAL_STEP // won't need count-only block
451 mov $1, FINAL_STEP // will need count-only block
483 // This is for a block aligned message.
486 // This is for a message whose length mod 64 is >= 56.
488 // Pre-swap the endianness of the words.