Lines Matching +full:24 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
26 # to 9 vectors for multiplications.
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
107 SAVE_GPR 24, 192, 1
116 addi 9, 1, 256
117 SAVE_VRS 20, 0, 9
118 SAVE_VRS 21, 16, 9
119 SAVE_VRS 22, 32, 9
120 SAVE_VRS 23, 48, 9
121 SAVE_VRS 24, 64, 9
122 SAVE_VRS 25, 80, 9
123 SAVE_VRS 26, 96, 9
124 SAVE_VRS 27, 112, 9
125 SAVE_VRS 28, 128, 9
126 SAVE_VRS 29, 144, 9
127 SAVE_VRS 30, 160, 9
128 SAVE_VRS 31, 176, 9
130 SAVE_VSX 14, 192, 9
131 SAVE_VSX 15, 208, 9
132 SAVE_VSX 16, 224, 9
133 SAVE_VSX 17, 240, 9
134 SAVE_VSX 18, 256, 9
135 SAVE_VSX 19, 272, 9
136 SAVE_VSX 20, 288, 9
137 SAVE_VSX 21, 304, 9
138 SAVE_VSX 22, 320, 9
139 SAVE_VSX 23, 336, 9
140 SAVE_VSX 24, 352, 9
141 SAVE_VSX 25, 368, 9
142 SAVE_VSX 26, 384, 9
143 SAVE_VSX 27, 400, 9
144 SAVE_VSX 28, 416, 9
145 SAVE_VSX 29, 432, 9
146 SAVE_VSX 30, 448, 9
147 SAVE_VSX 31, 464, 9
151 addi 9, 1, 256
152 RESTORE_VRS 20, 0, 9
153 RESTORE_VRS 21, 16, 9
154 RESTORE_VRS 22, 32, 9
155 RESTORE_VRS 23, 48, 9
156 RESTORE_VRS 24, 64, 9
157 RESTORE_VRS 25, 80, 9
158 RESTORE_VRS 26, 96, 9
159 RESTORE_VRS 27, 112, 9
160 RESTORE_VRS 28, 128, 9
161 RESTORE_VRS 29, 144, 9
162 RESTORE_VRS 30, 160, 9
163 RESTORE_VRS 31, 176, 9
165 RESTORE_VSX 14, 192, 9
166 RESTORE_VSX 15, 208, 9
167 RESTORE_VSX 16, 224, 9
168 RESTORE_VSX 17, 240, 9
169 RESTORE_VSX 18, 256, 9
170 RESTORE_VSX 19, 272, 9
171 RESTORE_VSX 20, 288, 9
172 RESTORE_VSX 21, 304, 9
173 RESTORE_VSX 22, 320, 9
174 RESTORE_VSX 23, 336, 9
175 RESTORE_VSX 24, 352, 9
176 RESTORE_VSX 25, 368, 9
177 RESTORE_VSX 26, 384, 9
178 RESTORE_VSX 27, 400, 9
179 RESTORE_VSX 28, 416, 9
180 RESTORE_VSX 29, 432, 9
181 RESTORE_VSX 30, 448, 9
182 RESTORE_VSX 31, 464, 9
194 RESTORE_GPR 24, 192, 1
268 vmuleuw 9, 4, 26
273 vaddudm 14, 14, 9
279 vmuleuw 9, 4, 27
284 vaddudm 15, 15, 9
290 vmuleuw 9, 4, 28
295 vaddudm 16, 16, 9
301 vmuleuw 9, 4, 29
306 vaddudm 17, 17, 9
312 vmuleuw 9, 4, 30
317 vaddudm 18, 18, 9
375 vsld 9, 27, 13
379 vaddudm 0, 9, 27
391 vsld 9, 27, 13
395 vaddudm 0, 9, 27
411 vspltw 9, 26, 3
413 vmrgow 26, 10, 9
414 vspltw 9, 27, 3
416 vmrgow 27, 10, 9
417 vspltw 9, 28, 3
419 vmrgow 28, 10, 9
420 vspltw 9, 29, 3
422 vmrgow 29, 10, 9
423 vspltw 9, 30, 3
425 vmrgow 30, 10, 9
427 vsld 9, 27, 13
431 vaddudm 0, 9, 27
442 vspltisb 9, 2
455 vsld 10, 12, 9
486 lvx 25, 0, 10 # v25 - mask
488 lvx 19, 15, 10 # v19 = 1 << 24
489 lxv 24, 48(10) # vs24
494 ld 9, 24(3)
496 and. 9, 9, 11
500 extrdi 14, 9, 26, 38
501 extrdi 15, 9, 26, 12
502 extrdi 16, 9, 12, 0
506 extrdi 17, 10, 26, 24
508 extrdi 18, 10, 24, 0
513 li 9, 5
514 mtvsrdd 36, 0, 9
543 ld 9, 0(3)
546 sldi 19, 19, 24
548 extrdi 14, 9, 26, 38
549 extrdi 15, 9, 26, 12
550 extrdi 16, 9, 12, 0
554 extrdi 17, 10, 26, 24
556 extrdi 18, 10, 24, 0
559 vor 8, 8, 9
563 xxlor 49, 24, 24
570 vand 9, 14, 25 # a0
584 vaddudm 20, 4, 9
588 vaddudm 24, 8, 13
597 vand 9, 14, 25 # a0
612 vmrgow 4, 9, 20
616 vmrgow 8, 13, 24
619 addi 5, 5, -64 # len -= 64
622 li 9, 64
623 divdu 31, 5, 9
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
644 vspltisb 9, 2
657 vsld 10, 12, 9
674 xxlor 49, 24, 24
689 vand 9, 17, 25 # a0
709 vsrd 24, 23, 31 # >> 26, a4
719 vaddudm 8, 8, 24
722 vmrgow 4, 9, 4
729 addi 5, 5, -64 # len -= 64
752 vaddudm 4, 14, 9
767 vspltisb 9, 2
780 vsld 10, 12, 9
819 srdi 16, 16, 24
851 ld 9, 24(3)
853 and. 9, 9, 11 # cramp mask r0
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
861 mtvsrdd 32+0, 9, 19 # r0, s1
862 mtvsrdd 32+1, 10, 9 # r1, r0
864 mtvsrdd 32+3, 9, 25 # r0
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
943 SAVE_GPR 24, 192, 1
970 mr 24, 6 # highbit
973 vxor 9, 9, 9
981 adde 29, 29, 24
1009 RESTORE_GPR 24, 192, 1
1039 # h + 5 + (-p)
1046 srdi 9, 8, 2 # overflow?
1047 cmpdi 9, 0