Lines Matching +full:len +full:- +full:or +full:- +full:define

2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
4 * This file is provided under a dual BSD/GPLv2 license. When using or
11 * This program is free software; you can redistribute it and/or modify
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Redistribution and use in source and binary forms, with or without
36 * the documentation and/or other materials provided with the
39 * contributors may be used to endorse or promote products derived
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
58 #define STATE_PTR %rdi /* 1st arg */
59 #define DATA_PTR %rsi /* 2nd arg */
60 #define NUM_BLKS %rdx /* 3rd arg */
62 #define SHA256CONSTANTS %rax
64 #define MSG %xmm0 /* sha256rnds2 implicit operand */
65 #define STATE0 %xmm1
66 #define STATE1 %xmm2
67 #define MSG0 %xmm3
68 #define MSG1 %xmm4
69 #define MSG2 %xmm5
70 #define MSG3 %xmm6
71 #define TMP %xmm7
73 #define SHUF_MASK %xmm8
75 #define ABEF_SAVE %xmm9
76 #define CDGH_SAVE %xmm10
83 movdqa (\i-32)*4(SHA256CONSTANTS), MSG
100 * Intel SHA Extensions optimized implementation of a SHA-256 block function
102 * This function takes a pointer to the current SHA-256 state, a pointer to the
103 * input data, and the number of 64-byte blocks to process. Once all blocks
120 * DCBA, HGFE -> ABEF, CDGH
185 #define CTX %rdi
186 #define DATA1 %rsi
187 #define DATA2 %rdx
188 #define LEN %ecx macro
189 #define LEN8 %cl
190 #define LEN64 %rcx
191 #define OUT1 %r8
192 #define OUT2 %r9
195 #define SHA256CONSTANTS %rax
196 #define COUNT %r10
197 #define COUNT32 %r10d
198 #define FINAL_STEP %r11d
202 #define MSG %xmm0 // sha256rnds2 implicit operand
203 #define STATE0_A %xmm1
204 #define STATE1_A %xmm2
205 #define STATE0_B %xmm3
206 #define STATE1_B %xmm4
207 #define TMP_A %xmm5
208 #define TMP_B %xmm6
209 #define MSG0_A %xmm7
210 #define MSG1_A %xmm8
211 #define MSG2_A %xmm9
212 #define MSG3_A %xmm10
213 #define MSG0_B %xmm11
214 #define MSG1_B %xmm12
215 #define MSG2_B %xmm13
216 #define MSG3_B %xmm14
217 #define SHUF_MASK %xmm15
219 #define OFFSETOF_STATE 0 // offsetof(struct __sha256_ctx, state)
220 #define OFFSETOF_BYTECOUNT 32 // offsetof(struct __sha256_ctx, bytecount)
221 #define OFFSETOF_BUF 40 // offsetof(struct __sha256_ctx, buf)
223 // Do 4 rounds of SHA-256 for each of two messages (interleaved). m0_a and m0_b
228 // computes 4 more message schedule words for each message. m1_a-m3_a contain
230 // likewise m1_b-m3_b for the second. After consuming the current value of
236 movdqa (\i-32)*4(SHA256CONSTANTS), TMP_A
266 // const u8 *data1, const u8 *data2, int len,
270 // This function computes the SHA-256 digests of two messages |data1| and
271 // |data2| that are both |len| bytes long, starting from the initial context
272 // |ctx|. |len| must be at least SHA256_BLOCK_SIZE.
274 // The instructions for the two SHA-256 operations are interleaved. On many
276 // to taking better advantage of the CPU's SHA-256 and SIMD throughput.
279 // Allocate 128 bytes of stack space, 16-byte aligned.
286 // Load the shuffle mask for swapping the endianness of 32-bit words.
295 // Load the initial state from ctx->state.
304 // Load ctx->bytecount. Take the mod 64 of it to get the number of
305 // bytes that are buffered in ctx->buf. Also save it in a register with
306 // LEN added to it.
307 mov LEN, LEN
313 // %ebx bytes (1 to 63) are currently buffered in ctx->buf. Load them
314 // followed by the first 64 - %ebx bytes of data. Since LEN >= 64, we
315 // just load 64 bytes from each of ctx->buf, DATA1, and DATA2
353 sub $64, %rbx // rbx = buffered - 64
354 sub %rbx, DATA1 // DATA1 += 64 - buffered
355 sub %rbx, DATA2 // DATA2 += 64 - buffered
356 add %ebx, LEN // LEN += buffered - 64
362 sub $64, LEN
395 // Do the SHA-256 rounds on each block.
413 // Update LEN and loop back if more blocks remain.
414 sub $64, LEN
419 // FINAL_STEP = 1: need to do count-only padding block
424 add $64, LEN
427 // Not block-aligned; 1 <= LEN <= 63 data bytes remain. Pad the block.
430 // and load from &sp[64 - LEN] to get the needed padding block. This
433 sub LEN, %ebx // ebx = 64 - LEN
434 sub %rbx, DATA1 // DATA1 -= 64 - LEN
435 sub %rbx, DATA2 // DATA2 -= 64 - LEN
443 cmp $56, LEN
448 mov $2, FINAL_STEP // won't need count-only block
451 mov $1, FINAL_STEP // will need count-only block
488 // Pre-swap the endianness of the words.