sha256-ni-asm.S - OpenGrok cross reference for /linux/lib/crypto/x86/sha256-ni-asm.S

Lines Matching +full:sub +full:- +full:block
2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
83 	movdqa		(\i-32)*4(SHA256CONSTANTS), MSG
100  * Intel SHA Extensions optimized implementation of a SHA-256 block function
102  * This function takes a pointer to the current SHA-256 state, a pointer to the
103  * input data, and the number of 64-byte blocks to process.  Once all blocks
120 	 * DCBA, HGFE -> ABEF, CDGH
223 // Do 4 rounds of SHA-256 for each of two messages (interleaved).  m0_a and m0_b
228 // computes 4 more message schedule words for each message.  m1_a-m3_a contain
230 // likewise m1_b-m3_b for the second.  After consuming the current value of
236 	movdqa		(\i-32)*4(SHA256CONSTANTS), TMP_A
270 // This function computes the SHA-256 digests of two messages |data1| and
274 // The instructions for the two SHA-256 operations are interleaved.  On many
276 // to taking better advantage of the CPU's SHA-256 and SIMD throughput.
279 	// Allocate 128 bytes of stack space, 16-byte aligned.
283 	sub		$128, %rsp
286 	// Load the shuffle mask for swapping the endianness of 32-bit words.
295 	// Load the initial state from ctx->state.
304 	// Load ctx->bytecount.  Take the mod 64 of it to get the number of
305 	// bytes that are buffered in ctx->buf.  Also save it in a register with
313 	// %ebx bytes (1 to 63) are currently buffered in ctx->buf.  Load them
314 	// followed by the first 64 - %ebx bytes of data.  Since LEN >= 64, we
315 	// just load 64 bytes from each of ctx->buf, DATA1, and DATA2
353 	sub		$64, %rbx 	// rbx = buffered - 64
354 	sub		%rbx, DATA1	// DATA1 += 64 - buffered
355 	sub		%rbx, DATA2	// DATA2 += 64 - buffered
356 	add		%ebx, LEN	// LEN += buffered - 64
362 	sub		$64, LEN
389 	// Save the original state for each block.
395 	// Do the SHA-256 rounds on each block.
407 	// Add the original state for each block.
414 	sub		$64, LEN
419 	// FINAL_STEP = 1: need to do count-only padding block
420 	// FINAL_STEP = 0: need to do the block with 0x80 padding byte
427 	// Not block-aligned; 1 <= LEN <= 63 data bytes remain.  Pad the block.
430 	// and load from &sp[64 - LEN] to get the needed padding block.  This
433 	sub		LEN, %ebx		// ebx = 64 - LEN
434 	sub		%rbx, DATA1		// DATA1 -= 64 - LEN
435 	sub		%rbx, DATA2		// DATA2 -= 64 - LEN
444 	jge		1f	// will COUNT spill into its own block?
448 	mov		$2, FINAL_STEP	// won't need count-only block
451 	mov		$1, FINAL_STEP	// will need count-only block
480 	// Prepare a padding block, either:
483 	//	This is for a block aligned message.
488 	// Pre-swap the endianness of the words.