sha256-ni-asm.S - OpenGrok cross reference for /linux/lib/crypto/x86/sha256-ni-asm.S

Lines Matching +full:sub +full:- +full:message
2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
83 	movdqa		(\i-32)*4(SHA256CONSTANTS), MSG
100  * Intel SHA Extensions optimized implementation of a SHA-256 block function
102  * This function takes a pointer to the current SHA-256 state, a pointer to the
103  * input data, and the number of 64-byte blocks to process.  Once all blocks
120 	 * DCBA, HGFE -> ABEF, CDGH
223 // Do 4 rounds of SHA-256 for each of two messages (interleaved).  m0_a and m0_b
224 // contain the current 4 message schedule words for the first and second message
227 // If not all the message schedule words have been computed yet, then this also
228 // computes 4 more message schedule words for each message.  m1_a-m3_a contain
229 // the next 3 groups of 4 message schedule words for the first message, and
230 // likewise m1_b-m3_b for the second.  After consuming the current value of
236 	movdqa		(\i-32)*4(SHA256CONSTANTS), TMP_A
270 // This function computes the SHA-256 digests of two messages |data1| and
274 // The instructions for the two SHA-256 operations are interleaved.  On many
275 // CPUs, this is almost twice as fast as hashing each message individually due
276 // to taking better advantage of the CPU's SHA-256 and SIMD throughput.
279 	// Allocate 128 bytes of stack space, 16-byte aligned.
283 	sub		$128, %rsp
286 	// Load the shuffle mask for swapping the endianness of 32-bit words.
295 	// Load the initial state from ctx->state.
304 	// Load ctx->bytecount.  Take the mod 64 of it to get the number of
305 	// bytes that are buffered in ctx->buf.  Also save it in a register with
313 	// %ebx bytes (1 to 63) are currently buffered in ctx->buf.  Load them
314 	// followed by the first 64 - %ebx bytes of data.  Since LEN >= 64, we
315 	// just load 64 bytes from each of ctx->buf, DATA1, and DATA2
353 	sub		$64, %rbx 	// rbx = buffered - 64
354 	sub		%rbx, DATA1	// DATA1 += 64 - buffered
355 	sub		%rbx, DATA2	// DATA2 += 64 - buffered
356 	add		%ebx, LEN	// LEN += buffered - 64
362 	sub		$64, LEN
395 	// Do the SHA-256 rounds on each block.
414 	sub		$64, LEN
419 	// FINAL_STEP = 1: need to do count-only padding block
427 	// Not block-aligned; 1 <= LEN <= 63 data bytes remain.  Pad the block.
429 	// &sp[64].  Then for each message, copy the last 64 data bytes to sp
430 	// and load from &sp[64 - LEN] to get the needed padding block.  This
433 	sub		LEN, %ebx		// ebx = 64 - LEN
434 	sub		%rbx, DATA1		// DATA1 -= 64 - LEN
435 	sub		%rbx, DATA2		// DATA2 -= 64 - LEN
448 	mov		$2, FINAL_STEP	// won't need count-only block
451 	mov		$1, FINAL_STEP	// will need count-only block
483 	//	This is for a block aligned message.
486 	//	This is for a message whose length mod 64 is >= 56.
488 	// Pre-swap the endianness of the words.