Lines Matching +full:rx +full:- +full:input

1 /* Do not modify. This file is auto-generated from chacha-armv4.pl. */
21 .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral
29 .word OPENSSL_armcap_P-.LChaCha20_ctr32
32 .word -1
41 stmdb sp!,{r0,r1,r2,r4-r11,lr}
56 ldr r4,[r14,#-32]
68 sub sp,sp,#4*(16) @ off-load area
75 str r10,[sp,#4*(16+10)] @ off-load "rx"
76 str r11,[sp,#4*(16+11)] @ off-load "rx"
87 ldr r12,[sp,#4*(12)] @ modulo-scheduled load
209 str r8, [sp,#4*(16+8)] @ modulo-scheduled store
215 @ at this point we have first half of 512-bit result in
216 @ rx and second half at sp+4*(16+8)
233 tst r10,#3 @ are input and output aligned?
247 ldrhs r8,[r12],#16 @ load input
248 ldrhs r9,[r12,#-12]
255 ldrhs r10,[r12,#-8]
256 ldrhs r11,[r12,#-4]
266 eorhs r0,r0,r8 @ xor with input
276 str r1,[r14,#-12]
277 str r2,[r14,#-8]
278 str r3,[r14,#-4]
285 ldrhs r8,[r12],#16 @ load input
286 ldrhs r9,[r12,#-12]
292 ldrhs r10,[r12,#-8]
293 ldrhs r11,[r12,#-4]
312 str r5,[r14,#-12]
314 str r6,[r14,#-8]
316 str r7,[r14,#-4]
325 ldrhs r8,[r12],#16 @ load input
326 ldrhs r9,[r12,#-12]
330 strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it
331 strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it
337 ldrhs r10,[r12,#-8]
338 ldrhs r11,[r12,#-4]
357 str r1,[r14,#-12]
359 str r2,[r14,#-8]
360 str r3,[r14,#-4]
372 ldrhs r8,[r12],#16 @ load input
373 ldrhs r9,[r12,#-12]
379 ldrhs r10,[r12,#-8]
380 ldrhs r11,[r12,#-4]
395 ldrne r8,[sp,#4*(32+2)] @ re-load len
402 str r5,[r14,#-12]
406 subhs r11,r8,#64 @ len-=64
407 str r6,[r14,#-8]
408 str r7,[r14,#-4]
416 .Lunaligned:@ unaligned endian-neutral path
429 ldrhsb r8,[r12],#16 @ ... load input
431 ldrhsb r9,[r12,#-12]
438 ldrhsb r10,[r12,#-8]
440 ldrhsb r11,[r12,#-4]
442 eor r0,r8,r0 @ xor with input (or zero)
447 ldrhsb r8,[r12,#-15] @ load more input
448 ldrhsb r9,[r12,#-11]
455 ldrhsb r10,[r12,#-7]
456 ldrhsb r11,[r12,#-3]
457 strb r1,[r14,#-12]
459 strb r2,[r14,#-8]
464 ldrhsb r8,[r12,#-14] @ load more input
465 ldrhsb r9,[r12,#-10]
466 strb r3,[r14,#-4]
468 strb r0,[r14,#-15]
473 ldrhsb r10,[r12,#-6]
474 ldrhsb r11,[r12,#-2]
475 strb r1,[r14,#-11]
477 strb r2,[r14,#-7]
482 ldrhsb r8,[r12,#-13] @ load more input
483 ldrhsb r9,[r12,#-9]
484 strb r3,[r14,#-3]
486 strb r0,[r14,#-14]
491 ldrhsb r10,[r12,#-5]
492 ldrhsb r11,[r12,#-1]
493 strb r1,[r14,#-10]
494 strb r2,[r14,#-6]
496 strb r3,[r14,#-2]
498 strb r0,[r14,#-13]
500 strb r1,[r14,#-9]
502 strb r2,[r14,#-5]
503 strb r3,[r14,#-1]
514 ldrhsb r8,[r12],#16 @ ... load input
516 ldrhsb r9,[r12,#-12]
523 ldrhsb r10,[r12,#-8]
525 ldrhsb r11,[r12,#-4]
527 eor r4,r8,r4 @ xor with input (or zero)
532 ldrhsb r8,[r12,#-15] @ load more input
533 ldrhsb r9,[r12,#-11]
540 ldrhsb r10,[r12,#-7]
541 ldrhsb r11,[r12,#-3]
542 strb r5,[r14,#-12]
544 strb r6,[r14,#-8]
549 ldrhsb r8,[r12,#-14] @ load more input
550 ldrhsb r9,[r12,#-10]
551 strb r7,[r14,#-4]
553 strb r4,[r14,#-15]
558 ldrhsb r10,[r12,#-6]
559 ldrhsb r11,[r12,#-2]
560 strb r5,[r14,#-11]
562 strb r6,[r14,#-7]
567 ldrhsb r8,[r12,#-13] @ load more input
568 ldrhsb r9,[r12,#-9]
569 strb r7,[r14,#-3]
571 strb r4,[r14,#-14]
576 ldrhsb r10,[r12,#-5]
577 ldrhsb r11,[r12,#-1]
578 strb r5,[r14,#-10]
579 strb r6,[r14,#-6]
581 strb r7,[r14,#-2]
583 strb r4,[r14,#-13]
585 strb r5,[r14,#-9]
587 strb r6,[r14,#-5]
588 strb r7,[r14,#-1]
595 strhi r10,[sp,#4*(16+10)] @ copy "rx"
596 strhi r11,[sp,#4*(16+11)] @ copy "rx"
604 ldrhsb r8,[r12],#16 @ ... load input
606 ldrhsb r9,[r12,#-12]
613 ldrhsb r10,[r12,#-8]
615 ldrhsb r11,[r12,#-4]
617 eor r0,r8,r0 @ xor with input (or zero)
622 ldrhsb r8,[r12,#-15] @ load more input
623 ldrhsb r9,[r12,#-11]
630 ldrhsb r10,[r12,#-7]
631 ldrhsb r11,[r12,#-3]
632 strb r1,[r14,#-12]
634 strb r2,[r14,#-8]
639 ldrhsb r8,[r12,#-14] @ load more input
640 ldrhsb r9,[r12,#-10]
641 strb r3,[r14,#-4]
643 strb r0,[r14,#-15]
648 ldrhsb r10,[r12,#-6]
649 ldrhsb r11,[r12,#-2]
650 strb r1,[r14,#-11]
652 strb r2,[r14,#-7]
657 ldrhsb r8,[r12,#-13] @ load more input
658 ldrhsb r9,[r12,#-9]
659 strb r3,[r14,#-3]
661 strb r0,[r14,#-14]
666 ldrhsb r10,[r12,#-5]
667 ldrhsb r11,[r12,#-1]
668 strb r1,[r14,#-10]
669 strb r2,[r14,#-6]
671 strb r3,[r14,#-2]
673 strb r0,[r14,#-13]
675 strb r1,[r14,#-9]
677 strb r2,[r14,#-5]
678 strb r3,[r14,#-1]
693 ldrhsb r8,[r12],#16 @ ... load input
695 ldrhsb r9,[r12,#-12]
702 ldrhsb r10,[r12,#-8]
704 ldrhsb r11,[r12,#-4]
706 eor r4,r8,r4 @ xor with input (or zero)
711 ldrhsb r8,[r12,#-15] @ load more input
712 ldrhsb r9,[r12,#-11]
719 ldrhsb r10,[r12,#-7]
720 ldrhsb r11,[r12,#-3]
721 strb r5,[r14,#-12]
723 strb r6,[r14,#-8]
728 ldrhsb r8,[r12,#-14] @ load more input
729 ldrhsb r9,[r12,#-10]
730 strb r7,[r14,#-4]
732 strb r4,[r14,#-15]
737 ldrhsb r10,[r12,#-6]
738 ldrhsb r11,[r12,#-2]
739 strb r5,[r14,#-11]
741 strb r6,[r14,#-7]
746 ldrhsb r8,[r12,#-13] @ load more input
747 ldrhsb r9,[r12,#-9]
748 strb r7,[r14,#-3]
750 strb r4,[r14,#-14]
755 ldrhsb r10,[r12,#-5]
756 ldrhsb r11,[r12,#-1]
757 strb r5,[r14,#-10]
758 strb r6,[r14,#-6]
760 strb r7,[r14,#-2]
762 strb r4,[r14,#-13]
764 strb r5,[r14,#-9]
766 strb r6,[r14,#-5]
767 strb r7,[r14,#-1]
771 ldrne r8,[sp,#4*(32+2)] @ re-load len
775 subhs r11,r8,#64 @ len-=64
788 ldrb r11,[r12],#1 @ read input
798 .size ChaCha20_ctr32,.-ChaCha20_ctr32
800 .arch armv7-a
807 stmdb sp!,{r0,r1,r2,r4-r11,lr}
825 str r10,[sp,#4*(16+10)] @ off-load "rx"
826 str r11,[sp,#4*(16+11)] @ off-load "rx"
842 bls .Lbreak_neon @ switch to integer-only
853 ldr r12,[sp,#4*(12)] @ modulo-scheduled load
1091 str r8, [sp,#4*(16+8)] @ modulo-scheduled store
1097 @ at this point we have first half of 512-bit result in
1098 @ rx and second half at sp+4*(16+8)
1126 vld1.8 {q12,q13},[r12]! @ load input
1129 veor q0,q0,q12 @ xor with input
1161 ldr r8,[r12],#16 @ load input
1164 ldr r9,[r12,#-12]
1167 ldr r10,[r12,#-8]
1169 ldr r11,[r12,#-4]
1176 eor r0,r0,r8 @ xor with input
1181 str r1,[r14,#-12]
1184 str r2,[r14,#-8]
1185 str r3,[r14,#-4]
1188 ldr r8,[r12],#16 @ load input
1190 ldr r9,[r12,#-12]
1192 ldr r10,[r12,#-8]
1194 ldr r11,[r12,#-4]
1206 str r5,[r14,#-12]
1209 str r6,[r14,#-8]
1211 str r7,[r14,#-4]
1216 ldr r8,[r12],#16 @ load input
1218 ldr r9,[r12,#-12]
1222 strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it
1224 ldr r10,[r12,#-8]
1228 strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it
1230 ldr r11,[r12,#-4]
1242 str r1,[r14,#-12]
1245 str r2,[r14,#-8]
1246 str r3,[r14,#-4]
1252 ldr r8,[r12],#16 @ load input
1255 ldr r9,[r12,#-12]
1257 ldr r10,[r12,#-8]
1258 ldr r11,[r12,#-4]
1269 ldrhi r8,[sp,#4*(32+2)] @ re-load len
1274 str r5,[r14,#-12]
1275 sub r11,r8,#64*4 @ len-=64*4
1276 str r6,[r14,#-8]
1277 str r7,[r14,#-4]
1284 @ harmonize NEON and integer-only stack frames: load data
1285 @ from NEON frame, but save to integer-only one; distance
1286 @ between the two is 4*(32+4+16-32)=4*(20).
1296 str r12,[sp,#4*(20+16+10)] @ copy "rx"
1297 str r14,[sp,#4*(20+16+11)] @ copy "rx"
1300 ldr r12,[sp,#4*(12)] @ modulo-scheduled load
1309 b .Loop @ go integer-only
1343 sub r11,r11,#64*1 @ len-=64*1
1372 sub r11,r11,#64*2 @ len-=64*2
1448 ldr r11,[sp,#4*(32+2)] @ re-load len
1461 sub r11,r11,#64*3 @ len-=64*3
1465 ldrb r9,[r12],#1 @ read input
1476 .size ChaCha20_neon,.-ChaCha20_neon