Lines Matching +full:sub +full:- +full:block
1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
12 .arch armv8-a
13 .fpu crypto-neon-fp-armv8
102 vld1.32 {q10-q11}, [ip]!
104 vld1.32 {q12-q13}, [ip]!
106 vld1.32 {q10-q11}, [ip]!
108 vld1.32 {q12-q13}, [ip]!
110 blo 0f @ AES-128: 10 rounds
111 vld1.32 {q10-q11}, [ip]!
113 beq 1f @ AES-192: 12 rounds
114 vld1.32 {q12-q13}, [ip]
124 * Internal, non-AAPCS compliant functions that implement the core AES
125 * transforms. These should preserve all registers except q0 - q2 and ip
127 * q0 : first in/output block
128 * q1 : second in/output block (_4x version only)
129 * q2 : third in/output block (_4x version only)
130 * q3 : fourth in/output block (_4x version only)
164 vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
181 vld1.8 {q0-q1}, [r1]!
182 vld1.8 {q2-q3}, [r1]!
184 vst1.8 {q0-q1}, [r0]!
185 vst1.8 {q2-q3}, [r0]!
207 vld1.8 {q0-q1}, [r1]!
208 vld1.8 {q2-q3}, [r1]!
210 vst1.8 {q0-q1}, [r0]!
211 vst1.8 {q2-q3}, [r0]!
233 push {r4-r6, lr}
238 vld1.8 {q1}, [r1]! @ get next pt block
245 pop {r4-r6, pc}
249 push {r4-r6, lr}
256 vld1.8 {q0-q1}, [r1]!
257 vld1.8 {q2-q3}, [r1]!
268 vst1.8 {q0-q1}, [r0]!
269 vst1.8 {q2-q3}, [r0]!
276 vld1.8 {q0}, [r1]! @ get next ct block
285 pop {r4-r6, pc}
297 push {r4-r6, lr}
302 sub r4, r4, #16
305 sub lr, lr, r4
319 vtbl.8 d4, {d0-d1}, d10
320 vtbl.8 d5, {d0-d1}, d11
321 vtbl.8 d2, {d6-d7}, d12
322 vtbl.8 d3, {d6-d7}, d13
331 pop {r4-r6, pc}
335 push {r4-r6, lr}
340 sub r4, r4, #16
343 sub lr, lr, r4
356 vtbl.8 d4, {d0-d1}, d10
357 vtbl.8 d5, {d0-d1}, d11
358 vtbx.8 d0, {d2-d3}, d12
359 vtbx.8 d1, {d2-d3}, d13
369 pop {r4-r6, pc}
378 push {r4-r6, lr}
392 * a silicon erratum that exists in Cortex-A57 (#1742098) and
393 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
395 * register of which a single 32-bit lane has been updated the last
397 * q0-q3 below are not manipulated individually, and the different
415 vld1.8 {q4-q5}, [r1]!
424 vst1.8 {q0-q1}, [r0]!
425 vst1.8 {q2-q3}, [r0]!
442 bmi .Lctrtailblock @ blocks < 0 means tail block
450 pop {r4-r6, pc}
491 teq r6, #1 @ start of a block?
495 @ be done at the start of a block.
503 push {r4-r6, lr}
509 teq r6, #0 @ start of a block?
517 vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
518 vld1.8 {q2-q3}, [r1]!
531 vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
532 vst1.8 {q2-q3}, [r0]!
559 pop {r4-r6, pc}
563 sub r0, r0, #16
569 add r4, r4, #16 @ # bytes in final block
572 sub lr, lr, r4
573 add r4, r0, r4 @ output address of final block
575 vld1.8 {q1}, [r1] @ load final partial block
579 vtbl.8 d4, {d0-d1}, d4
580 vtbl.8 d5, {d0-d1}, d5
581 vtbx.8 d0, {d2-d3}, d6
582 vtbx.8 d1, {d2-d3}, d7
591 push {r4-r6, lr}
601 teq r6, #0 @ start of a block?
609 vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
610 vld1.8 {q2-q3}, [r1]!
623 vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
624 vst1.8 {q2-q3}, [r0]!
648 pop {r4-r6, pc}
655 add r4, r4, #16 @ # bytes in final block
658 sub lr, lr, r4
659 add r4, r0, r4 @ output address of final block
663 vld1.8 {q1}, [r1] @ load final partial block
671 vtbl.8 d4, {d0-d1}, d4
672 vtbl.8 d5, {d0-d1}, d5
673 vtbx.8 d0, {d2-d3}, d6
674 vtbx.8 d1, {d2-d3}, d7
682 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
695 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns