Lines Matching +full:ip +full:- +full:block
1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
12 .arch armv8-a
13 .fpu crypto-neon-fp-armv8
102 vld1.32 {q10-q11}, [ip]!
104 vld1.32 {q12-q13}, [ip]!
106 vld1.32 {q10-q11}, [ip]!
108 vld1.32 {q12-q13}, [ip]!
110 blo 0f @ AES-128: 10 rounds
111 vld1.32 {q10-q11}, [ip]!
113 beq 1f @ AES-192: 12 rounds
114 vld1.32 {q12-q13}, [ip]
124 * Internal, non-AAPCS compliant functions that implement the core AES
125 * transforms. These should preserve all registers except q0 - q2 and ip
127 * q0 : first in/output block
128 * q1 : second in/output block (_4x version only)
129 * q2 : third in/output block (_4x version only)
130 * q3 : fourth in/output block (_4x version only)
139 add ip, r2, #32 @ 3rd round key
146 add ip, r2, #32 @ 3rd round key
152 add ip, r2, #32 @ 3rd round key
158 add ip, r2, #32 @ 3rd round key
163 add ip, \rk, \rounds, lsl #4
164 vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
165 vld1.32 {q14}, [ip] @ load last round key
181 vld1.8 {q0-q1}, [r1]!
182 vld1.8 {q2-q3}, [r1]!
184 vst1.8 {q0-q1}, [r0]!
185 vst1.8 {q2-q3}, [r0]!
207 vld1.8 {q0-q1}, [r1]!
208 vld1.8 {q2-q3}, [r1]!
210 vst1.8 {q0-q1}, [r0]!
211 vst1.8 {q2-q3}, [r0]!
233 push {r4-r6, lr}
238 vld1.8 {q1}, [r1]! @ get next pt block
245 pop {r4-r6, pc}
249 push {r4-r6, lr}
256 vld1.8 {q0-q1}, [r1]!
257 vld1.8 {q2-q3}, [r1]!
268 vst1.8 {q0-q1}, [r0]!
269 vst1.8 {q2-q3}, [r0]!
276 vld1.8 {q0}, [r1]! @ get next ct block
285 pop {r4-r6, pc}
297 push {r4-r6, lr}
300 movw ip, :lower16:.Lcts_permute_table
301 movt ip, :upper16:.Lcts_permute_table
303 add lr, ip, #32
304 add ip, ip, r4
306 vld1.8 {q5}, [ip]
309 add ip, r1, r4
311 vld1.8 {q3}, [ip]
319 vtbl.8 d4, {d0-d1}, d10
320 vtbl.8 d5, {d0-d1}, d11
321 vtbl.8 d2, {d6-d7}, d12
322 vtbl.8 d3, {d6-d7}, d13
331 pop {r4-r6, pc}
335 push {r4-r6, lr}
338 movw ip, :lower16:.Lcts_permute_table
339 movt ip, :upper16:.Lcts_permute_table
341 add lr, ip, #32
342 add ip, ip, r4
344 vld1.8 {q5}, [ip]
347 add ip, r1, r4
349 vld1.8 {q1}, [ip]
356 vtbl.8 d4, {d0-d1}, d10
357 vtbl.8 d5, {d0-d1}, d11
358 vtbx.8 d0, {d2-d3}, d12
359 vtbx.8 d1, {d2-d3}, d13
369 pop {r4-r6, pc}
378 push {r4-r6, lr}
392 * a silicon erratum that exists in Cortex-A57 (#1742098) and
393 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
395 * register of which a single 32-bit lane has been updated the last
397 * q0-q3 below are not manipulated individually, and the different
400 add ip, r6, #1
402 rev ip, ip
404 vmov s31, ip @ set lane 3 of q1 via q7
405 add ip, r6, #3
409 rev ip, ip
411 vmov s31, ip @ set lane 3 of q3 via q7
415 vld1.8 {q4-q5}, [r1]!
423 rev ip, r6
424 vst1.8 {q0-q1}, [r0]!
425 vst1.8 {q2-q3}, [r0]!
426 vmov s31, ip
436 rev ip, r6
437 vmov s31, ip
442 bmi .Lctrtailblock @ blocks < 0 means tail block
450 pop {r4-r6, pc}
458 vmov ip, \sreg @ load next word of ctr
459 rev ip, ip @ ... to handle the carry
460 adds ip, ip, #1
461 rev ip, ip
462 vmov \sreg, ip
491 teq r6, #1 @ start of a block?
495 @ be done at the start of a block.
498 add ip, r6, #32 @ 3rd round key of key 2
503 push {r4-r6, lr}
509 teq r6, #0 @ start of a block?
517 vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
518 vld1.8 {q2-q3}, [r1]!
531 vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
532 vst1.8 {q2-q3}, [r0]!
559 pop {r4-r6, pc}
565 movw ip, :lower16:.Lcts_permute_table
566 movt ip, :upper16:.Lcts_permute_table
569 add r4, r4, #16 @ # bytes in final block
570 add lr, ip, #32
571 add ip, ip, r4
573 add r4, r0, r4 @ output address of final block
575 vld1.8 {q1}, [r1] @ load final partial block
576 vld1.8 {q2}, [ip]
579 vtbl.8 d4, {d0-d1}, d4
580 vtbl.8 d5, {d0-d1}, d5
581 vtbx.8 d0, {d2-d3}, d6
582 vtbx.8 d1, {d2-d3}, d7
591 push {r4-r6, lr}
601 teq r6, #0 @ start of a block?
609 vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
610 vld1.8 {q2-q3}, [r1]!
623 vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
624 vst1.8 {q2-q3}, [r0]!
648 pop {r4-r6, pc}
651 movw ip, :lower16:.Lcts_permute_table
652 movt ip, :upper16:.Lcts_permute_table
655 add r4, r4, #16 @ # bytes in final block
656 add lr, ip, #32
657 add ip, ip, r4
659 add r4, r0, r4 @ output address of final block
663 vld1.8 {q1}, [r1] @ load final partial block
664 vld1.8 {q2}, [ip]
671 vtbl.8 d4, {d0-d1}, d4
672 vtbl.8 d5, {d0-d1}, d5
673 vtbx.8 d0, {d2-d3}, d6
674 vtbx.8 d1, {d2-d3}, d7
682 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
695 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns