Lines Matching +full:2 +full:- +full:9
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
26 # to 9 vectors for multiplications.
28 # setup r^4, r^3, r^2, r vectors
29 # vs [r^1, r^3, r^2, r^4]
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
116 addi 9, 1, 256
117 SAVE_VRS 20, 0, 9
118 SAVE_VRS 21, 16, 9
119 SAVE_VRS 22, 32, 9
120 SAVE_VRS 23, 48, 9
121 SAVE_VRS 24, 64, 9
122 SAVE_VRS 25, 80, 9
123 SAVE_VRS 26, 96, 9
124 SAVE_VRS 27, 112, 9
125 SAVE_VRS 28, 128, 9
126 SAVE_VRS 29, 144, 9
127 SAVE_VRS 30, 160, 9
128 SAVE_VRS 31, 176, 9
130 SAVE_VSX 14, 192, 9
131 SAVE_VSX 15, 208, 9
132 SAVE_VSX 16, 224, 9
133 SAVE_VSX 17, 240, 9
134 SAVE_VSX 18, 256, 9
135 SAVE_VSX 19, 272, 9
136 SAVE_VSX 20, 288, 9
137 SAVE_VSX 21, 304, 9
138 SAVE_VSX 22, 320, 9
139 SAVE_VSX 23, 336, 9
140 SAVE_VSX 24, 352, 9
141 SAVE_VSX 25, 368, 9
142 SAVE_VSX 26, 384, 9
143 SAVE_VSX 27, 400, 9
144 SAVE_VSX 28, 416, 9
145 SAVE_VSX 29, 432, 9
146 SAVE_VSX 30, 448, 9
147 SAVE_VSX 31, 464, 9
151 addi 9, 1, 256
152 RESTORE_VRS 20, 0, 9
153 RESTORE_VRS 21, 16, 9
154 RESTORE_VRS 22, 32, 9
155 RESTORE_VRS 23, 48, 9
156 RESTORE_VRS 24, 64, 9
157 RESTORE_VRS 25, 80, 9
158 RESTORE_VRS 26, 96, 9
159 RESTORE_VRS 27, 112, 9
160 RESTORE_VRS 28, 128, 9
161 RESTORE_VRS 29, 144, 9
162 RESTORE_VRS 30, 160, 9
163 RESTORE_VRS 31, 176, 9
165 RESTORE_VSX 14, 192, 9
166 RESTORE_VSX 15, 208, 9
167 RESTORE_VSX 16, 224, 9
168 RESTORE_VSX 17, 240, 9
169 RESTORE_VSX 18, 256, 9
170 RESTORE_VSX 19, 272, 9
171 RESTORE_VSX 20, 288, 9
172 RESTORE_VSX 21, 304, 9
173 RESTORE_VSX 22, 320, 9
174 RESTORE_VSX 23, 336, 9
175 RESTORE_VSX 24, 352, 9
176 RESTORE_VSX 25, 368, 9
177 RESTORE_VSX 26, 384, 9
178 RESTORE_VSX 27, 400, 9
179 RESTORE_VSX 28, 416, 9
180 RESTORE_VSX 29, 432, 9
181 RESTORE_VSX 30, 448, 9
182 RESTORE_VSX 31, 464, 9
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
215 # [r^2, r^3, r^1, r^4]
222 vmulouw 11, 6, 2
234 vmulouw 12, 7, 2
244 vmulouw 13, 8, 2
268 vmuleuw 9, 4, 26
270 vmuleuw 11, 6, 2
273 vaddudm 14, 14, 9
279 vmuleuw 9, 4, 27
282 vmuleuw 12, 7, 2
284 vaddudm 15, 15, 9
290 vmuleuw 9, 4, 28
294 vmuleuw 13, 8, 2
295 vaddudm 16, 16, 9
301 vmuleuw 9, 4, 29
306 vaddudm 17, 17, 9
312 vmuleuw 9, 4, 30
317 vaddudm 18, 18, 9
327 # setup r^4, r^3, r^2, r vectors
328 # [r, r^3, r^2, r^4]
356 # [r, r^3, r^2, r^4]
357 # compute r^2
363 bl do_mul # r^2 r^1
374 vspltisb 13, 2
375 vsld 9, 27, 13
379 vaddudm 0, 9, 27
381 vaddudm 2, 11, 29
390 vspltisb 13, 2
391 vsld 9, 27, 13
395 vaddudm 0, 9, 27
397 vaddudm 2, 11, 29
400 # r^2 r^4
403 xxlor 2, 60, 60
411 vspltw 9, 26, 3
412 vspltw 10, 26, 2
413 vmrgow 26, 10, 9
414 vspltw 9, 27, 3
415 vspltw 10, 27, 2
416 vmrgow 27, 10, 9
417 vspltw 9, 28, 3
418 vspltw 10, 28, 2
419 vmrgow 28, 10, 9
420 vspltw 9, 29, 3
421 vspltw 10, 29, 2
422 vmrgow 29, 10, 9
423 vspltw 9, 30, 3
424 vspltw 10, 30, 2
425 vmrgow 30, 10, 9
427 vsld 9, 27, 13
431 vaddudm 0, 9, 27
433 vaddudm 2, 11, 29
442 vspltisb 9, 2
455 vsld 10, 12, 9
476 addis 10, 2, rmask@toc@ha
484 addis 10, 2, cnum@toc@ha
486 lvx 25, 0, 10 # v25 - mask
494 ld 9, 24(3)
496 and. 9, 9, 11
500 extrdi 14, 9, 26, 38
501 extrdi 15, 9, 26, 12
502 extrdi 16, 9, 12, 0
513 li 9, 5
514 mtvsrdd 36, 0, 9
517 vmulouw 2, 29, 4 # v2 = rr2
543 ld 9, 0(3)
548 extrdi 14, 9, 26, 38
549 extrdi 15, 9, 26, 12
550 extrdi 16, 9, 12, 0
559 vor 8, 8, 9
570 vand 9, 14, 25 # a0
584 vaddudm 20, 4, 9
597 vand 9, 14, 25 # a0
612 vmrgow 4, 9, 20
619 addi 5, 5, -64 # len -= 64
622 li 9, 64
623 divdu 31, 5, 9
632 # h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
644 vspltisb 9, 2
657 vsld 10, 12, 9
689 vand 9, 17, 25 # a0
722 vmrgow 4, 9, 4
729 addi 5, 5, -64 # len -= 64
737 xxlor 60, 2, 2
752 vaddudm 4, 14, 9
767 vspltisb 9, 2
780 vsld 10, 12, 9
813 mfvsrld 16, 40 # save last 2 bytes
844 addis 10, 2, rmask@toc@ha
851 ld 9, 24(3)
853 and. 9, 9, 11 # cramp mask r0
856 srdi 21, 10, 2
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
861 mtvsrdd 32+0, 9, 19 # r0, s1
862 mtvsrdd 32+1, 10, 9 # r1, r0
863 mtvsrdd 32+2, 19, 25 # s1
864 mtvsrdd 32+3, 9, 25 # r0
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
882 vmsumudm 10, 8, 2, 11 # d1 += h2 * s1
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
973 vxor 9, 9, 9
1039 # h + 5 + (-p)
1046 srdi 9, 8, 2 # overflow?
1047 cmpdi 9, 0