Lines Matching +full:16 +full:- +full:17
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 # Accelerated AES-GCM stitched implementation for ppc64le.
5 # Copyright 2022- IBM Inc. All rights reserved
22 # Hash keys = v3 - v14
29 # v31 - counter 1
32 # vs0 - vs14 for round keys
35 # This implementation uses stitched AES-GCM approach to improve overall performance.
48 # v15 - v18 - input states
49 # vs1 - vs9 - round keys
58 vcipher 16, 16, 19
59 vcipher 17, 17, 19
63 vcipher 16, 16, 20
64 vcipher 17, 17, 20
68 vcipher 16, 16, 21
69 vcipher 17, 17, 21
73 vcipher 16, 16, 22
74 vcipher 17, 17, 22
83 vcipher 16, 16, 19
84 vcipher 17, 17, 19
88 vcipher 16, 16, 20
89 vcipher 17, 17, 20
93 vcipher 16, 16, 21
94 vcipher 17, 17, 21
98 vcipher 16, 16, 22
99 vcipher 17, 17, 22
104 vcipher 16, 16, 23
105 vcipher 17, 17, 23
110 # v15 - v22 - input states
111 # vs1 - vs9 - round keys
120 vcipher 16, 16, 23
121 vcipher 17, 17, 23
129 vcipher 16, 16, 24
130 vcipher 17, 17, 24
138 vcipher 16, 16, 25
139 vcipher 17, 17, 25
147 vcipher 16, 16, 26
148 vcipher 17, 17, 26
161 vcipher 16, 16, 23
162 vcipher 17, 17, 23
170 vcipher 16, 16, 24
171 vcipher 17, 17, 24
179 vcipher 16, 16, 25
180 vcipher 17, 17, 25
188 vcipher 16, 16, 26
189 vcipher 17, 17, 26
198 vcipher 16, 16, 23
199 vcipher 17, 17, 23
239 vpmsumd 24, 9, 16
240 vpmsumd 25, 6, 17
248 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
249 vpmsumd 26, 7, 17
268 vpmsumd 25, 11, 16
269 vpmsumd 26, 8, 17
290 # v15 - v22 - input blocks
297 vpmsumd 24, 9, 16
298 vpmsumd 25, 6, 17
306 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
307 vpmsumd 26, 7, 17
327 vpmsumd 25, 11, 16
328 vpmsumd 26, 8, 17
428 stdu 1,-640(1)
433 std 16,128(1)
434 std 17,136(1)
441 addi 9, 9, 16
443 addi 9, 9, 16
445 addi 9, 9, 16
447 addi 9, 9, 16
449 addi 9, 9, 16
451 addi 9, 9, 16
453 addi 9, 9, 16
455 addi 9, 9, 16
457 addi 9, 9, 16
459 addi 9, 9, 16
461 addi 9, 9, 16
465 stxv 16, 496(1)
466 stxv 17, 512(1)
478 lxv 16, 496(1)
479 lxv 17, 512(1)
487 addi 9, 9, 16
489 addi 9, 9, 16
491 addi 9, 9, 16
493 addi 9, 9, 16
495 addi 9, 9, 16
497 addi 9, 9, 16
499 addi 9, 9, 16
501 addi 9, 9, 16
503 addi 9, 9, 16
505 addi 9, 9, 16
507 addi 9, 9, 16
513 ld 16,128(1)
514 ld 17,136(1)
528 # load Hash - h^4, h^3, h^2, h
562 # const char *rk, unsigned char iv[16], void *Xip);
564 # r3 - inp
565 # r4 - out
566 # r5 - len
567 # r6 - AES round keys
568 # r7 - iv and other data
569 # r8 - Xi, HPoli, hash keys
581 # initialize ICB: GHASH( IV ), IV - r7
582 lxvb16x 30+32, 0, 7 # load IV - v30
605 # load rounds - 10 (128), 12 (192), 14 (256)
611 vxor 15, 30, 29 # IV + round key - add round key 0
644 cmpdi 15, 16
650 divdu 10, 12, 10 # n 128 bytes-blocks
655 vxor 16, 30, 29
657 vxor 17, 30, 29
671 li 15, 16
672 li 16, 32
673 li 17, 48
684 lxvb16x 16, 15, 14 # load block
685 lxvb16x 17, 16, 14 # load block
686 lxvb16x 18, 17, 14 # load block
704 vcipher 16, 16, 23
705 vcipher 17, 17, 23
713 vcipher 16, 16, 24
714 vcipher 17, 17, 24
730 vcipher 16, 16, 23
731 vcipher 17, 17, 23
739 vcipher 16, 16, 24
740 vcipher 17, 17, 24
758 vcipherlast 16, 16, 23
762 xxlxor 48, 48, 16
765 vcipherlast 17, 17, 23
768 xxlxor 49, 49, 17
769 stxvb16x 49, 16, 9 # store output
771 stxvb16x 50, 17, 9 # store output
799 vxor 16, 30, 27
801 vxor 17, 30, 27
813 addi 12, 12, -128
826 li 10, 16
833 cmpdi 12, 16
873 addi 14, 14, 16
874 addi 9, 9, 16
879 addi 12, 12, -16
880 addi 11, 11, 16
944 li 15, 16
947 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
948 vspltisb 17, 0 # second 16 bytes - 0x0000...00
950 stvx 16, 10, 1
951 addi 10, 10, 16
952 stvx 17, 10, 1
955 lxvb16x 16, 15, 10 # load partial block mask
956 xxland 47, 47, 16
966 li 16, 16
969 stxvb16x 32, 16, 8 # write out Xi
976 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
977 vspltisb 17, 0 # second 16 bytes - 0x0000...00
979 stxvb16x 17+32, 10, 1
981 stxvb16x 16+32, 10, 1
983 stxvb16x 17+32, 10, 1
994 add 17, 15, 5
995 cmpdi 17, 16
1001 li 16, 16
1002 GEN_MASK 18, 15, 16
1005 lxvb16x 17+32, 0, 14 # load last block
1006 sldi 16, 15, 3
1007 mtvsrdd 32+16, 0, 16
1008 vsro 17, 17, 16
1009 xxlxor 47, 47, 17+32
1017 xxland 32+28, 32+17, 18
1023 li 16, 16
1024 lxvb16x 32+29, 16, 8
1027 stxvb16x 32, 16, 8 # save Xi
1031 sldi 16, 15, 3
1032 mtvsrdd 32+16, 0, 16
1033 vslo 15, 15, 16
1036 li 16, 16
1037 sub 17, 16, 15 # 16 - partial
1039 add 16, 15, 5
1040 cmpdi 16, 16
1042 mr 17, 5
1049 addi 10, 9, -1
1050 addi 16, 1, 191
1051 mtctr 17 # move partial byte count
1054 lbzu 18, 1(16)
1059 add 14, 14, 17
1060 add 9, 9, 17
1061 sub 12, 12, 17
1062 add 11, 11, 17
1065 cmpdi 15, 16
1071 vxor 15, 30, 29 # IV + round key - add round key 0
1073 std 15, 56(7) # partial done - clear
1083 # r9 - output
1084 # r12 - remaining bytes
1085 # v15 - partial input data
1091 addi 10, 9, -1
1092 addi 16, 1, 191
1098 lbzu 14, 1(16)
1122 # initialize ICB: GHASH( IV ), IV - r7
1123 lxvb16x 30+32, 0, 7 # load IV - v30
1146 # load rounds - 10 (128), 12 (192), 14 (256)
1152 vxor 15, 30, 29 # IV + round key - add round key 0
1185 cmpdi 15, 16
1191 divdu 10, 12, 10 # n 128 bytes-blocks
1196 vxor 16, 30, 29
1198 vxor 17, 30, 29
1212 li 15, 16
1213 li 16, 32
1214 li 17, 48
1225 lxvb16x 16, 15, 14 # load block
1226 lxvb16x 17, 16, 14 # load block
1227 lxvb16x 18, 17, 14 # load block
1245 vcipher 16, 16, 23
1246 vcipher 17, 17, 23
1254 vcipher 16, 16, 24
1255 vcipher 17, 17, 24
1271 vcipher 16, 16, 23
1272 vcipher 17, 17, 23
1280 vcipher 16, 16, 24
1281 vcipher 17, 17, 24
1299 vcipherlast 16, 16, 23
1303 xxlxor 48, 48, 16
1306 vcipherlast 17, 17, 23
1309 xxlxor 49, 49, 17
1310 stxvb16x 49, 16, 9 # store output
1312 stxvb16x 50, 17, 9 # store output
1333 xxlor 16+32, 16, 16
1334 xxlor 17+32, 17, 17
1349 vxor 16, 30, 27
1351 vxor 17, 30, 27
1363 addi 12, 12, -128
1376 li 10, 16
1383 cmpdi 12, 16
1423 addi 14, 14, 16
1424 addi 9, 9, 16
1430 addi 12, 12, -16
1431 addi 11, 11, 16
1494 li 15, 16
1497 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1498 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1500 stvx 16, 10, 1
1501 addi 10, 10, 16
1502 stvx 17, 10, 1
1505 lxvb16x 16, 15, 10 # load partial block mask
1506 xxland 47, 47, 16
1508 xxland 32+28, 15, 16
1517 li 16, 16
1520 stxvb16x 32, 16, 8 # write out Xi