1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2# 3# Accelerated AES-GCM stitched implementation for ppc64le. 4# 5# Copyright 2024- IBM Inc. 6# 7#=================================================================================== 8# Written by Danny Tsen <dtsen@us.ibm.com> 9# 10# GHASH is based on the Karatsuba multiplication method. 11# 12# Xi xor X1 13# 14# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = 15# (X1.h * H4.h + xX.l * H4.l + X1 * H4) + 16# (X2.h * H3.h + X2.l * H3.l + X2 * H3) + 17# (X3.h * H2.h + X3.l * H2.l + X3 * H2) + 18# (X4.h * H.h + X4.l * H.l + X4 * H) 19# 20# Xi = v0 21# H Poly = v2 22# Hash keys = v3 - v14 23# ( H.l, H, H.h) 24# ( H^2.l, H^2, H^2.h) 25# ( H^3.l, H^3, H^3.h) 26# ( H^4.l, H^4, H^4.h) 27# 28# v30 is IV 29# v31 - counter 1 30# 31# AES used, 32# vs0 - round key 0 33# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) 34# 35# This implementation uses stitched AES-GCM approach to improve overall performance. 36# AES is implemented with 8x blocks and GHASH is using 2 4x blocks. 37# 38# =================================================================================== 39# 40 41#include <asm/ppc_asm.h> 42#include <linux/linkage.h> 43 44.machine "any" 45.text 46 47.macro SAVE_GPR GPR OFFSET FRAME 48 std \GPR,\OFFSET(\FRAME) 49.endm 50 51.macro SAVE_VRS VRS OFFSET FRAME 52 stxv \VRS+32, \OFFSET(\FRAME) 53.endm 54 55.macro RESTORE_GPR GPR OFFSET FRAME 56 ld \GPR,\OFFSET(\FRAME) 57.endm 58 59.macro RESTORE_VRS VRS OFFSET FRAME 60 lxv \VRS+32, \OFFSET(\FRAME) 61.endm 62 63.macro SAVE_REGS 64 mflr 0 65 std 0, 16(1) 66 stdu 1,-512(1) 67 68 SAVE_GPR 14, 112, 1 69 SAVE_GPR 15, 120, 1 70 SAVE_GPR 16, 128, 1 71 SAVE_GPR 17, 136, 1 72 SAVE_GPR 18, 144, 1 73 SAVE_GPR 19, 152, 1 74 SAVE_GPR 20, 160, 1 75 SAVE_GPR 21, 168, 1 76 SAVE_GPR 22, 176, 1 77 SAVE_GPR 23, 184, 1 78 SAVE_GPR 24, 192, 1 79 80 addi 9, 1, 256 81 SAVE_VRS 20, 0, 9 82 SAVE_VRS 21, 16, 9 83 SAVE_VRS 22, 32, 9 84 SAVE_VRS 23, 48, 9 85 SAVE_VRS 24, 64, 9 86 SAVE_VRS 25, 80, 9 87 SAVE_VRS 26, 96, 9 88 SAVE_VRS 27, 112, 9 89 SAVE_VRS 28, 128, 9 90 SAVE_VRS 29, 144, 9 91 SAVE_VRS 30, 160, 9 92 SAVE_VRS 31, 176, 9 93.endm # SAVE_REGS 94 95.macro RESTORE_REGS 96 addi 9, 1, 256 97 RESTORE_VRS 20, 0, 9 98 RESTORE_VRS 21, 16, 9 99 RESTORE_VRS 22, 32, 9 100 RESTORE_VRS 23, 48, 9 101 RESTORE_VRS 24, 64, 9 102 RESTORE_VRS 25, 80, 9 103 RESTORE_VRS 26, 96, 9 104 RESTORE_VRS 27, 112, 9 105 RESTORE_VRS 28, 128, 9 106 RESTORE_VRS 29, 144, 9 107 RESTORE_VRS 30, 160, 9 108 RESTORE_VRS 31, 176, 9 109 110 RESTORE_GPR 14, 112, 1 111 RESTORE_GPR 15, 120, 1 112 RESTORE_GPR 16, 128, 1 113 RESTORE_GPR 17, 136, 1 114 RESTORE_GPR 18, 144, 1 115 RESTORE_GPR 19, 152, 1 116 RESTORE_GPR 20, 160, 1 117 RESTORE_GPR 21, 168, 1 118 RESTORE_GPR 22, 176, 1 119 RESTORE_GPR 23, 184, 1 120 RESTORE_GPR 24, 192, 1 121 122 addi 1, 1, 512 123 ld 0, 16(1) 124 mtlr 0 125.endm # RESTORE_REGS 126 127# 4x loops 128.macro AES_CIPHER_4x _VCIPHER ST r 129 \_VCIPHER \ST, \ST, \r 130 \_VCIPHER \ST+1, \ST+1, \r 131 \_VCIPHER \ST+2, \ST+2, \r 132 \_VCIPHER \ST+3, \ST+3, \r 133.endm 134 135# 8x loops 136.macro AES_CIPHER_8x _VCIPHER ST r 137 \_VCIPHER \ST, \ST, \r 138 \_VCIPHER \ST+1, \ST+1, \r 139 \_VCIPHER \ST+2, \ST+2, \r 140 \_VCIPHER \ST+3, \ST+3, \r 141 \_VCIPHER \ST+4, \ST+4, \r 142 \_VCIPHER \ST+5, \ST+5, \r 143 \_VCIPHER \ST+6, \ST+6, \r 144 \_VCIPHER \ST+7, \ST+7, \r 145.endm 146 147.macro LOOP_8AES_STATE 148 xxlor 32+23, 1, 1 149 xxlor 32+24, 2, 2 150 xxlor 32+25, 3, 3 151 xxlor 32+26, 4, 4 152 AES_CIPHER_8x vcipher, 15, 23 153 AES_CIPHER_8x vcipher, 15, 24 154 AES_CIPHER_8x vcipher, 15, 25 155 AES_CIPHER_8x vcipher, 15, 26 156 xxlor 32+23, 5, 5 157 xxlor 32+24, 6, 6 158 xxlor 32+25, 7, 7 159 xxlor 32+26, 8, 8 160 AES_CIPHER_8x vcipher, 15, 23 161 AES_CIPHER_8x vcipher, 15, 24 162 AES_CIPHER_8x vcipher, 15, 25 163 AES_CIPHER_8x vcipher, 15, 26 164.endm 165 166# 167# PPC_GHASH4x(H, S1, S2, S3, S4): Compute 4x hash values based on Karatsuba method. 168# H: returning digest 169# S#: states 170# 171# S1 should xor with the previous digest 172# 173# Xi = v0 174# H Poly = v2 175# Hash keys = v3 - v14 176# Scratch: v23 - v29 177# 178.macro PPC_GHASH4x H S1 S2 S3 S4 179 180 vpmsumd 23, 12, \S1 # H4.L * X.L 181 vpmsumd 24, 9, \S2 182 vpmsumd 25, 6, \S3 183 vpmsumd 26, 3, \S4 184 185 vpmsumd 27, 13, \S1 # H4.L * X.H + H4.H * X.L 186 vpmsumd 28, 10, \S2 # H3.L * X1.H + H3.H * X1.L 187 188 vxor 23, 23, 24 189 vxor 23, 23, 25 190 vxor 23, 23, 26 # L 191 192 vxor 24, 27, 28 193 vpmsumd 25, 7, \S3 194 vpmsumd 26, 4, \S4 195 196 vxor 24, 24, 25 197 vxor 24, 24, 26 # M 198 199 # sum hash and reduction with H Poly 200 vpmsumd 28, 23, 2 # reduction 201 202 vxor 1, 1, 1 203 vsldoi 25, 24, 1, 8 # mL 204 vsldoi 1, 1, 24, 8 # mH 205 vxor 23, 23, 25 # mL + L 206 207 # This performs swap and xor like, 208 # vsldoi 23, 23, 23, 8 # swap 209 # vxor 23, 23, 28 210 xxlor 32+25, 10, 10 211 vpermxor 23, 23, 28, 25 212 213 vpmsumd 26, 14, \S1 # H4.H * X.H 214 vpmsumd 27, 11, \S2 215 vpmsumd 28, 8, \S3 216 vpmsumd 29, 5, \S4 217 218 vxor 24, 26, 27 219 vxor 24, 24, 28 220 vxor 24, 24, 29 221 222 vxor 24, 24, 1 223 224 # sum hash and reduction with H Poly 225 vsldoi 25, 23, 23, 8 # swap 226 vpmsumd 23, 23, 2 227 vxor 27, 25, 24 228 vxor \H, 23, 27 229.endm 230 231# 232# Compute update single ghash 233# scratch: v1, v22..v27 234# 235.macro PPC_GHASH1x H S1 236 237 vxor 1, 1, 1 238 239 vpmsumd 22, 3, \S1 # L 240 vpmsumd 23, 4, \S1 # M 241 vpmsumd 24, 5, \S1 # H 242 243 vpmsumd 27, 22, 2 # reduction 244 245 vsldoi 25, 23, 1, 8 # mL 246 vsldoi 26, 1, 23, 8 # mH 247 vxor 22, 22, 25 # LL + LL 248 vxor 24, 24, 26 # HH + HH 249 250 xxlor 32+25, 10, 10 251 vpermxor 22, 22, 27, 25 252 253 vsldoi 23, 22, 22, 8 # swap 254 vpmsumd 22, 22, 2 # reduction 255 vxor 23, 23, 24 256 vxor \H, 22, 23 257.endm 258 259# 260# LOAD_HASH_TABLE 261# Xi = v0 262# H Poly = v2 263# Hash keys = v3 - v14 264# 265.macro LOAD_HASH_TABLE 266 # Load Xi 267 lxvb16x 32, 0, 8 # load Xi 268 269 # load Hash - h^4, h^3, h^2, h 270 li 10, 32 271 lxvd2x 2+32, 10, 8 # H Poli 272 li 10, 48 273 lxvd2x 3+32, 10, 8 # Hl 274 li 10, 64 275 lxvd2x 4+32, 10, 8 # H 276 li 10, 80 277 lxvd2x 5+32, 10, 8 # Hh 278 279 li 10, 96 280 lxvd2x 6+32, 10, 8 # H^2l 281 li 10, 112 282 lxvd2x 7+32, 10, 8 # H^2 283 li 10, 128 284 lxvd2x 8+32, 10, 8 # H^2h 285 286 li 10, 144 287 lxvd2x 9+32, 10, 8 # H^3l 288 li 10, 160 289 lxvd2x 10+32, 10, 8 # H^3 290 li 10, 176 291 lxvd2x 11+32, 10, 8 # H^3h 292 293 li 10, 192 294 lxvd2x 12+32, 10, 8 # H^4l 295 li 10, 208 296 lxvd2x 13+32, 10, 8 # H^4 297 li 10, 224 298 lxvd2x 14+32, 10, 8 # H^4h 299.endm 300 301################################################################################ 302# Compute AES and ghash one block at a time. 303# r23: AES rounds 304# v30: current IV 305# vs0: roundkey 0 306# 307################################################################################ 308SYM_FUNC_START_LOCAL(aes_gcm_crypt_1x) 309 310 cmpdi 5, 16 311 bge __More_1x 312 blr 313__More_1x: 314 li 10, 16 315 divdu 12, 5, 10 316 317 xxlxor 32+15, 32+30, 0 318 319 # Pre-load 8 AES rounds to scratch vectors. 320 xxlor 32+16, 1, 1 321 xxlor 32+17, 2, 2 322 xxlor 32+18, 3, 3 323 xxlor 32+19, 4, 4 324 xxlor 32+20, 5, 5 325 xxlor 32+21, 6, 6 326 xxlor 32+28, 7, 7 327 xxlor 32+29, 8, 8 328 lwz 23, 240(6) # n rounds 329 addi 22, 23, -9 # remaing AES rounds 330 331 cmpdi 12, 0 332 bgt __Loop_1x 333 blr 334 335__Loop_1x: 336 mtctr 22 337 addi 10, 6, 144 338 vcipher 15, 15, 16 339 vcipher 15, 15, 17 340 vcipher 15, 15, 18 341 vcipher 15, 15, 19 342 vcipher 15, 15, 20 343 vcipher 15, 15, 21 344 vcipher 15, 15, 28 345 vcipher 15, 15, 29 346 347__Loop_aes_1state: 348 lxv 32+1, 0(10) 349 vcipher 15, 15, 1 350 addi 10, 10, 16 351 bdnz __Loop_aes_1state 352 lxv 32+1, 0(10) # last round key 353 lxvb16x 11, 0, 14 # load input block 354 vcipherlast 15, 15, 1 355 356 xxlxor 32+15, 32+15, 11 357 stxvb16x 32+15, 0, 9 # store output 358 addi 14, 14, 16 359 addi 9, 9, 16 360 361 cmpdi 24, 0 # decrypt? 362 bne __Encrypt_1x 363 xxlor 15+32, 11, 11 364__Encrypt_1x: 365 vxor 15, 15, 0 366 PPC_GHASH1x 0, 15 367 368 addi 5, 5, -16 369 addi 11, 11, 16 370 371 vadduwm 30, 30, 31 # IV + counter 372 xxlxor 32+15, 32+30, 0 373 addi 12, 12, -1 374 cmpdi 12, 0 375 bgt __Loop_1x 376 377 stxvb16x 32+30, 0, 7 # update IV 378 stxvb16x 32+0, 0, 8 # update Xi 379 blr 380SYM_FUNC_END(aes_gcm_crypt_1x) 381 382################################################################################ 383# Process a normal partial block when we come here. 384# Compute partial mask, Load and store partial block to stack. 385# Update partial_len and pblock. 386# pblock is (encrypted ^ AES state) for encrypt 387# and (input ^ AES state) for decrypt. 388# 389################################################################################ 390SYM_FUNC_START_LOCAL(__Process_partial) 391 392 # create partial mask 393 vspltisb 16, -1 394 li 12, 16 395 sub 12, 12, 5 396 sldi 12, 12, 3 397 mtvsrdd 32+17, 0, 12 398 vslo 16, 16, 17 # partial block mask 399 400 lxvb16x 11, 0, 14 # load partial block 401 xxland 11, 11, 32+16 402 403 # AES crypt partial 404 xxlxor 32+15, 32+30, 0 405 lwz 23, 240(6) # n rounds 406 addi 22, 23, -1 # loop - 1 407 mtctr 22 408 addi 10, 6, 16 409 410__Loop_aes_pstate: 411 lxv 32+1, 0(10) 412 vcipher 15, 15, 1 413 addi 10, 10, 16 414 bdnz __Loop_aes_pstate 415 lxv 32+1, 0(10) # last round key 416 vcipherlast 15, 15, 1 417 418 xxlxor 32+15, 32+15, 11 419 vand 15, 15, 16 420 421 # AES crypt output v15 422 # Write partial 423 li 10, 224 424 stxvb16x 15+32, 10, 1 # write v15 to stack 425 addi 10, 1, 223 426 addi 12, 9, -1 427 mtctr 5 # partial block len 428__Write_partial: 429 lbzu 22, 1(10) 430 stbu 22, 1(12) 431 bdnz __Write_partial 432 433 cmpdi 24, 0 # decrypt? 434 bne __Encrypt_partial 435 xxlor 32+15, 11, 11 # decrypt using the input block 436__Encrypt_partial: 437 #vxor 15, 15, 0 # ^ previous hash 438 #PPC_GHASH1x 0, 15 439 440 add 14, 14, 5 441 add 9, 9, 5 442 std 5, 56(7) # update partial 443 sub 11, 11, 5 444 li 5, 0 # done last byte 445 446 # 447 # Don't increase IV since this is the last partial. 448 # It should get updated in gcm_update if no more data blocks. 449 #vadduwm 30, 30, 31 # increase IV 450 stxvb16x 32+30, 0, 7 # update IV 451 li 10, 64 452 stxvb16x 32+0, 0, 8 # Update X1 453 stxvb16x 32+15, 10, 7 # Update pblock 454 blr 455SYM_FUNC_END(__Process_partial) 456 457################################################################################ 458# Combine partial blocks and ghash when we come here. 459# 460# The partial block has to be shifted to the right location to encrypt/decrypt 461# and compute ghash if combing the previous partial block is needed. 462# - Compute ghash for a full block. Clear Partial_len and pblock. Update IV. 463# Write Xi. 464# - Don't compute ghash if not full block. gcm_update will take care of it 465# is the last block. Update Partial_len and pblock. 466# 467################################################################################ 468SYM_FUNC_START_LOCAL(__Combine_partial) 469 470 ld 12, 56(7) 471 mr 21, 5 # these bytes to be processed 472 473 li 17, 0 474 li 16, 16 475 sub 22, 16, 12 # bytes to complete a block 476 sub 17, 22, 5 # remaining bytes in a block 477 cmpdi 5, 16 478 ble __Inp_msg_less16 479 li 17, 0 480 mr 21, 22 481 b __Combine_continue 482__Inp_msg_less16: 483 cmpd 22, 5 484 bgt __Combine_continue 485 li 17, 0 486 mr 21, 22 # these bytes to be processed 487 488__Combine_continue: 489 # load msg and shift to the proper location and mask 490 vspltisb 16, -1 491 sldi 15, 12, 3 492 mtvsrdd 32+17, 0, 15 493 vslo 16, 16, 17 494 vsro 16, 16, 17 495 sldi 15, 17, 3 496 mtvsrdd 32+17, 0, 15 497 vsro 16, 16, 17 498 vslo 16, 16, 17 # mask 499 500 lxvb16x 32+19, 0, 14 # load partial block 501 sldi 15, 12, 3 502 mtvsrdd 32+17, 0, 15 503 vsro 19, 19, 17 # 0x00..xxxx??..?? 504 sldi 15, 17, 3 505 mtvsrdd 32+17, 0, 15 506 vsro 19, 19, 17 # 0x00..xxxx 507 vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00 508 509 # AES crypt partial 510 xxlxor 32+15, 32+30, 0 511 lwz 23, 240(6) # n rounds 512 addi 22, 23, -1 # loop - 1 513 mtctr 22 514 addi 10, 6, 16 515 516__Loop_aes_cpstate: 517 lxv 32+1, 0(10) 518 vcipher 15, 15, 1 519 addi 10, 10, 16 520 bdnz __Loop_aes_cpstate 521 lxv 32+1, 0(10) # last round key 522 vcipherlast 15, 15, 1 523 524 vxor 15, 15, 19 525 vand 15, 15, 16 526 527 # AES crypt output v15 528 # Write partial 529 li 10, 224 530 stxvb16x 15+32, 10, 1 # write v15 to stack 531 addi 10, 1, 223 532 add 10, 10, 12 # add offset 533 addi 15, 9, -1 534 mtctr 21 # partial block len 535__Write_combine_partial: 536 lbzu 22, 1(10) 537 stbu 22, 1(15) 538 bdnz __Write_combine_partial 539 540 add 14, 14, 21 541 add 11, 11, 21 542 add 9, 9, 21 543 sub 5, 5, 21 544 545 # Encrypt/Decrypt? 546 cmpdi 24, 0 # decrypt? 547 bne __Encrypt_combine_partial 548 vmr 15, 19 # decrypt using the input block 549 550__Encrypt_combine_partial: 551 # 552 # Update partial flag and combine ghash. 553__Update_partial_ghash: 554 li 10, 64 555 lxvb16x 32+17, 10, 7 # load previous pblock 556 add 12, 12, 21 # combined pprocessed 557 vxor 15, 15, 17 # combined pblock 558 559 cmpdi 12, 16 560 beq __Clear_partial_flag 561 std 12, 56(7) # update partial len 562 stxvb16x 32+15, 10, 7 # Update current pblock 563 blr 564 565__Clear_partial_flag: 566 li 12, 0 567 std 12, 56(7) 568 # Update IV and ghash here 569 vadduwm 30, 30, 31 # increase IV 570 stxvb16x 32+30, 0, 7 # update IV 571 572 # v15 either is either (input blockor encrypted)^(AES state) 573 vxor 15, 15, 0 574 PPC_GHASH1x 0, 15 575 stxvb16x 32+0, 10, 7 # update pblock for debug? 576 stxvb16x 32+0, 0, 8 # update Xi 577 blr 578SYM_FUNC_END(__Combine_partial) 579 580################################################################################ 581# gcm_update(iv, Xi) - compute last hash 582# 583################################################################################ 584SYM_FUNC_START(gcm_update) 585 586 ld 10, 56(3) 587 cmpdi 10, 0 588 beq __no_update 589 590 lxvb16x 32, 0, 4 # load Xi 591 # load Hash - h^4, h^3, h^2, h 592 li 10, 32 593 lxvd2x 2+32, 10, 4 # H Poli 594 li 10, 48 595 lxvd2x 3+32, 10, 4 # Hl 596 li 10, 64 597 lxvd2x 4+32, 10, 4 # H 598 li 10, 80 599 lxvd2x 5+32, 10, 4 # Hh 600 601 addis 11, 2, permx@toc@ha 602 addi 11, 11, permx@toc@l 603 lxv 10, 0(11) # vs10: vpermxor vector 604 605 li 9, 64 606 lxvb16x 32+6, 9, 3 # load pblock 607 vxor 6, 6, 0 608 609 vxor 1, 1, 1 610 vpmsumd 12, 3, 6 # L 611 vpmsumd 13, 4, 6 # M 612 vpmsumd 14, 5, 6 # H 613 vpmsumd 17, 12, 2 # reduction 614 vsldoi 15, 13, 1, 8 # mL 615 vsldoi 16, 1, 13, 8 # mH 616 vxor 12, 12, 15 # LL + LL 617 vxor 14, 14, 16 # HH + HH 618 xxlor 32+15, 10, 10 619 vpermxor 12, 12, 17, 15 620 vsldoi 13, 12, 12, 8 # swap 621 vpmsumd 12, 12, 2 # reduction 622 vxor 13, 13, 14 623 vxor 7, 12, 13 624 625 #vxor 0, 0, 0 626 #stxvb16x 32+0, 9, 3 627 li 10, 0 628 std 10, 56(3) 629 stxvb16x 32+7, 0, 4 630 631__no_update: 632 blr 633SYM_FUNC_END(gcm_update) 634 635################################################################################ 636# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, 637# const char *rk, unsigned char iv[16], void *Xip); 638# 639# r3 - inp 640# r4 - out 641# r5 - len 642# r6 - AES round keys 643# r7 - iv and other data 644# r8 - Xi, HPoli, hash keys 645# 646# rounds is at offset 240 in rk 647# Xi is at 0 in gcm_table (Xip). 648# 649################################################################################ 650SYM_FUNC_START(aes_p10_gcm_encrypt) 651 652 cmpdi 5, 0 653 ble __Invalid_msg_len 654 655 SAVE_REGS 656 LOAD_HASH_TABLE 657 658 # initialize ICB: GHASH( IV ), IV - r7 659 lxvb16x 30+32, 0, 7 # load IV - v30 660 661 mr 14, 3 662 mr 9, 4 663 664 # counter 1 665 vxor 31, 31, 31 666 vspltisb 22, 1 667 vsldoi 31, 31, 22,1 # counter 1 668 669 addis 11, 2, permx@toc@ha 670 addi 11, 11, permx@toc@l 671 lxv 10, 0(11) # vs10: vpermxor vector 672 li 11, 0 673 674 # load 9 round keys to VSR 675 lxv 0, 0(6) # round key 0 676 lxv 1, 16(6) # round key 1 677 lxv 2, 32(6) # round key 2 678 lxv 3, 48(6) # round key 3 679 lxv 4, 64(6) # round key 4 680 lxv 5, 80(6) # round key 5 681 lxv 6, 96(6) # round key 6 682 lxv 7, 112(6) # round key 7 683 lxv 8, 128(6) # round key 8 684 685 # load rounds - 10 (128), 12 (192), 14 (256) 686 lwz 23, 240(6) # n rounds 687 li 24, 1 # encrypt 688 689__Process_encrypt: 690 # 691 # Process different blocks 692 # 693 ld 12, 56(7) 694 cmpdi 12, 0 695 bgt __Do_combine_enc 696 cmpdi 5, 128 697 blt __Process_more_enc 698 699# 700# Process 8x AES/GCM blocks 701# 702__Process_8x_enc: 703 # 8x blcoks 704 li 10, 128 705 divdu 12, 5, 10 # n 128 bytes-blocks 706 707 addi 12, 12, -1 # loop - 1 708 709 vmr 15, 30 # first state: IV 710 vadduwm 16, 15, 31 # state + counter 711 vadduwm 17, 16, 31 712 vadduwm 18, 17, 31 713 vadduwm 19, 18, 31 714 vadduwm 20, 19, 31 715 vadduwm 21, 20, 31 716 vadduwm 22, 21, 31 717 xxlor 9, 32+22, 32+22 # save last state 718 719 # vxor state, state, w # addroundkey 720 xxlor 32+29, 0, 0 721 vxor 15, 15, 29 # IV + round key - add round key 0 722 vxor 16, 16, 29 723 vxor 17, 17, 29 724 vxor 18, 18, 29 725 vxor 19, 19, 29 726 vxor 20, 20, 29 727 vxor 21, 21, 29 728 vxor 22, 22, 29 729 730 li 15, 16 731 li 16, 32 732 li 17, 48 733 li 18, 64 734 li 19, 80 735 li 20, 96 736 li 21, 112 737 738 # 739 # Pre-compute first 8 AES state and leave 1/3/5 more rounds 740 # for the loop. 741 # 742 addi 22, 23, -9 # process 8 keys 743 mtctr 22 # AES key loop 744 addi 10, 6, 144 745 746 LOOP_8AES_STATE # process 8 AES keys 747 748__PreLoop_aes_state: 749 lxv 32+1, 0(10) # round key 750 AES_CIPHER_8x vcipher 15 1 751 addi 10, 10, 16 752 bdnz __PreLoop_aes_state 753 lxv 32+1, 0(10) # last round key (v1) 754 755 cmpdi 12, 0 # Only one loop (8 block) 756 beq __Finish_ghash 757 758# 759# Loop 8x blocks and compute ghash 760# 761__Loop_8x_block_enc: 762 vcipherlast 15, 15, 1 763 vcipherlast 16, 16, 1 764 vcipherlast 17, 17, 1 765 vcipherlast 18, 18, 1 766 vcipherlast 19, 19, 1 767 vcipherlast 20, 20, 1 768 vcipherlast 21, 21, 1 769 vcipherlast 22, 22, 1 770 771 lxvb16x 32+23, 0, 14 # load block 772 lxvb16x 32+24, 15, 14 # load block 773 lxvb16x 32+25, 16, 14 # load block 774 lxvb16x 32+26, 17, 14 # load block 775 lxvb16x 32+27, 18, 14 # load block 776 lxvb16x 32+28, 19, 14 # load block 777 lxvb16x 32+29, 20, 14 # load block 778 lxvb16x 32+30, 21, 14 # load block 779 addi 14, 14, 128 780 781 vxor 15, 15, 23 782 vxor 16, 16, 24 783 vxor 17, 17, 25 784 vxor 18, 18, 26 785 vxor 19, 19, 27 786 vxor 20, 20, 28 787 vxor 21, 21, 29 788 vxor 22, 22, 30 789 790 stxvb16x 47, 0, 9 # store output 791 stxvb16x 48, 15, 9 # store output 792 stxvb16x 49, 16, 9 # store output 793 stxvb16x 50, 17, 9 # store output 794 stxvb16x 51, 18, 9 # store output 795 stxvb16x 52, 19, 9 # store output 796 stxvb16x 53, 20, 9 # store output 797 stxvb16x 54, 21, 9 # store output 798 addi 9, 9, 128 799 800 # ghash here 801 vxor 15, 15, 0 802 PPC_GHASH4x 0, 15, 16, 17, 18 803 804 vxor 19, 19, 0 805 PPC_GHASH4x 0, 19, 20, 21, 22 806 807 xxlor 32+15, 9, 9 # last state 808 vadduwm 15, 15, 31 # state + counter 809 vadduwm 16, 15, 31 810 vadduwm 17, 16, 31 811 vadduwm 18, 17, 31 812 vadduwm 19, 18, 31 813 vadduwm 20, 19, 31 814 vadduwm 21, 20, 31 815 vadduwm 22, 21, 31 816 xxlor 9, 32+22, 32+22 # save last state 817 818 xxlor 32+27, 0, 0 # restore roundkey 0 819 vxor 15, 15, 27 # IV + round key - add round key 0 820 vxor 16, 16, 27 821 vxor 17, 17, 27 822 vxor 18, 18, 27 823 vxor 19, 19, 27 824 vxor 20, 20, 27 825 vxor 21, 21, 27 826 vxor 22, 22, 27 827 828 addi 5, 5, -128 829 addi 11, 11, 128 830 831 LOOP_8AES_STATE # process 8 AES keys 832 mtctr 22 # AES key loop 833 addi 10, 6, 144 834__LastLoop_aes_state: 835 lxv 32+1, 0(10) # round key 836 AES_CIPHER_8x vcipher 15 1 837 addi 10, 10, 16 838 bdnz __LastLoop_aes_state 839 lxv 32+1, 0(10) # last round key (v1) 840 841 addi 12, 12, -1 842 cmpdi 12, 0 843 bne __Loop_8x_block_enc 844 845__Finish_ghash: 846 vcipherlast 15, 15, 1 847 vcipherlast 16, 16, 1 848 vcipherlast 17, 17, 1 849 vcipherlast 18, 18, 1 850 vcipherlast 19, 19, 1 851 vcipherlast 20, 20, 1 852 vcipherlast 21, 21, 1 853 vcipherlast 22, 22, 1 854 855 lxvb16x 32+23, 0, 14 # load block 856 lxvb16x 32+24, 15, 14 # load block 857 lxvb16x 32+25, 16, 14 # load block 858 lxvb16x 32+26, 17, 14 # load block 859 lxvb16x 32+27, 18, 14 # load block 860 lxvb16x 32+28, 19, 14 # load block 861 lxvb16x 32+29, 20, 14 # load block 862 lxvb16x 32+30, 21, 14 # load block 863 addi 14, 14, 128 864 865 vxor 15, 15, 23 866 vxor 16, 16, 24 867 vxor 17, 17, 25 868 vxor 18, 18, 26 869 vxor 19, 19, 27 870 vxor 20, 20, 28 871 vxor 21, 21, 29 872 vxor 22, 22, 30 873 874 stxvb16x 47, 0, 9 # store output 875 stxvb16x 48, 15, 9 # store output 876 stxvb16x 49, 16, 9 # store output 877 stxvb16x 50, 17, 9 # store output 878 stxvb16x 51, 18, 9 # store output 879 stxvb16x 52, 19, 9 # store output 880 stxvb16x 53, 20, 9 # store output 881 stxvb16x 54, 21, 9 # store output 882 addi 9, 9, 128 883 884 vxor 15, 15, 0 885 PPC_GHASH4x 0, 15, 16, 17, 18 886 887 vxor 19, 19, 0 888 PPC_GHASH4x 0, 19, 20, 21, 22 889 890 xxlor 30+32, 9, 9 # last ctr 891 vadduwm 30, 30, 31 # increase ctr 892 stxvb16x 32+30, 0, 7 # update IV 893 stxvb16x 32+0, 0, 8 # update Xi 894 895 addi 5, 5, -128 896 addi 11, 11, 128 897 898 # 899 # Done 8x blocks 900 # 901 902 cmpdi 5, 0 903 beq aes_gcm_out 904 905__Process_more_enc: 906 li 24, 1 # encrypt 907 bl aes_gcm_crypt_1x 908 cmpdi 5, 0 909 beq aes_gcm_out 910 911 bl __Process_partial 912 cmpdi 5, 0 913 beq aes_gcm_out 914__Do_combine_enc: 915 bl __Combine_partial 916 cmpdi 5, 0 917 bgt __Process_encrypt 918 b aes_gcm_out 919 920SYM_FUNC_END(aes_p10_gcm_encrypt) 921 922################################################################################ 923# aes_p10_gcm_decrypt (const void *inp, void *out, size_t len, 924# const char *rk, unsigned char iv[16], void *Xip); 925# 8x Decrypt 926# 927################################################################################ 928SYM_FUNC_START(aes_p10_gcm_decrypt) 929 930 cmpdi 5, 0 931 ble __Invalid_msg_len 932 933 SAVE_REGS 934 LOAD_HASH_TABLE 935 936 # initialize ICB: GHASH( IV ), IV - r7 937 lxvb16x 30+32, 0, 7 # load IV - v30 938 939 mr 14, 3 940 mr 9, 4 941 942 # counter 1 943 vxor 31, 31, 31 944 vspltisb 22, 1 945 vsldoi 31, 31, 22,1 # counter 1 946 947 addis 11, 2, permx@toc@ha 948 addi 11, 11, permx@toc@l 949 lxv 10, 0(11) # vs10: vpermxor vector 950 li 11, 0 951 952 # load 9 round keys to VSR 953 lxv 0, 0(6) # round key 0 954 lxv 1, 16(6) # round key 1 955 lxv 2, 32(6) # round key 2 956 lxv 3, 48(6) # round key 3 957 lxv 4, 64(6) # round key 4 958 lxv 5, 80(6) # round key 5 959 lxv 6, 96(6) # round key 6 960 lxv 7, 112(6) # round key 7 961 lxv 8, 128(6) # round key 8 962 963 # load rounds - 10 (128), 12 (192), 14 (256) 964 lwz 23, 240(6) # n rounds 965 li 24, 0 # decrypt 966 967__Process_decrypt: 968 # 969 # Process different blocks 970 # 971 ld 12, 56(7) 972 cmpdi 12, 0 973 bgt __Do_combine_dec 974 cmpdi 5, 128 975 blt __Process_more_dec 976 977# 978# Process 8x AES/GCM blocks 979# 980__Process_8x_dec: 981 # 8x blcoks 982 li 10, 128 983 divdu 12, 5, 10 # n 128 bytes-blocks 984 985 addi 12, 12, -1 # loop - 1 986 987 vmr 15, 30 # first state: IV 988 vadduwm 16, 15, 31 # state + counter 989 vadduwm 17, 16, 31 990 vadduwm 18, 17, 31 991 vadduwm 19, 18, 31 992 vadduwm 20, 19, 31 993 vadduwm 21, 20, 31 994 vadduwm 22, 21, 31 995 xxlor 9, 32+22, 32+22 # save last state 996 997 # vxor state, state, w # addroundkey 998 xxlor 32+29, 0, 0 999 vxor 15, 15, 29 # IV + round key - add round key 0 1000 vxor 16, 16, 29 1001 vxor 17, 17, 29 1002 vxor 18, 18, 29 1003 vxor 19, 19, 29 1004 vxor 20, 20, 29 1005 vxor 21, 21, 29 1006 vxor 22, 22, 29 1007 1008 li 15, 16 1009 li 16, 32 1010 li 17, 48 1011 li 18, 64 1012 li 19, 80 1013 li 20, 96 1014 li 21, 112 1015 1016 # 1017 # Pre-compute first 8 AES state and leave 1/3/5 more rounds 1018 # for the loop. 1019 # 1020 addi 22, 23, -9 # process 8 keys 1021 mtctr 22 # AES key loop 1022 addi 10, 6, 144 1023 1024 LOOP_8AES_STATE # process 8 AES keys 1025 1026__PreLoop_aes_state_dec: 1027 lxv 32+1, 0(10) # round key 1028 AES_CIPHER_8x vcipher 15 1 1029 addi 10, 10, 16 1030 bdnz __PreLoop_aes_state_dec 1031 lxv 32+1, 0(10) # last round key (v1) 1032 1033 cmpdi 12, 0 # Only one loop (8 block) 1034 beq __Finish_ghash_dec 1035 1036# 1037# Loop 8x blocks and compute ghash 1038# 1039__Loop_8x_block_dec: 1040 vcipherlast 15, 15, 1 1041 vcipherlast 16, 16, 1 1042 vcipherlast 17, 17, 1 1043 vcipherlast 18, 18, 1 1044 vcipherlast 19, 19, 1 1045 vcipherlast 20, 20, 1 1046 vcipherlast 21, 21, 1 1047 vcipherlast 22, 22, 1 1048 1049 lxvb16x 32+23, 0, 14 # load block 1050 lxvb16x 32+24, 15, 14 # load block 1051 lxvb16x 32+25, 16, 14 # load block 1052 lxvb16x 32+26, 17, 14 # load block 1053 lxvb16x 32+27, 18, 14 # load block 1054 lxvb16x 32+28, 19, 14 # load block 1055 lxvb16x 32+29, 20, 14 # load block 1056 lxvb16x 32+30, 21, 14 # load block 1057 addi 14, 14, 128 1058 1059 vxor 15, 15, 23 1060 vxor 16, 16, 24 1061 vxor 17, 17, 25 1062 vxor 18, 18, 26 1063 vxor 19, 19, 27 1064 vxor 20, 20, 28 1065 vxor 21, 21, 29 1066 vxor 22, 22, 30 1067 1068 stxvb16x 47, 0, 9 # store output 1069 stxvb16x 48, 15, 9 # store output 1070 stxvb16x 49, 16, 9 # store output 1071 stxvb16x 50, 17, 9 # store output 1072 stxvb16x 51, 18, 9 # store output 1073 stxvb16x 52, 19, 9 # store output 1074 stxvb16x 53, 20, 9 # store output 1075 stxvb16x 54, 21, 9 # store output 1076 1077 addi 9, 9, 128 1078 1079 vmr 15, 23 1080 vmr 16, 24 1081 vmr 17, 25 1082 vmr 18, 26 1083 vmr 19, 27 1084 vmr 20, 28 1085 vmr 21, 29 1086 vmr 22, 30 1087 1088 # ghash here 1089 vxor 15, 15, 0 1090 PPC_GHASH4x 0, 15, 16, 17, 18 1091 1092 vxor 19, 19, 0 1093 PPC_GHASH4x 0, 19, 20, 21, 22 1094 1095 xxlor 32+15, 9, 9 # last state 1096 vadduwm 15, 15, 31 # state + counter 1097 vadduwm 16, 15, 31 1098 vadduwm 17, 16, 31 1099 vadduwm 18, 17, 31 1100 vadduwm 19, 18, 31 1101 vadduwm 20, 19, 31 1102 vadduwm 21, 20, 31 1103 vadduwm 22, 21, 31 1104 xxlor 9, 32+22, 32+22 # save last state 1105 1106 xxlor 32+27, 0, 0 # restore roundkey 0 1107 vxor 15, 15, 27 # IV + round key - add round key 0 1108 vxor 16, 16, 27 1109 vxor 17, 17, 27 1110 vxor 18, 18, 27 1111 vxor 19, 19, 27 1112 vxor 20, 20, 27 1113 vxor 21, 21, 27 1114 vxor 22, 22, 27 1115 1116 addi 5, 5, -128 1117 addi 11, 11, 128 1118 1119 LOOP_8AES_STATE # process 8 AES keys 1120 mtctr 22 # AES key loop 1121 addi 10, 6, 144 1122__LastLoop_aes_state_dec: 1123 lxv 32+1, 0(10) # round key 1124 AES_CIPHER_8x vcipher 15 1 1125 addi 10, 10, 16 1126 bdnz __LastLoop_aes_state_dec 1127 lxv 32+1, 0(10) # last round key (v1) 1128 1129 addi 12, 12, -1 1130 cmpdi 12, 0 1131 bne __Loop_8x_block_dec 1132 1133__Finish_ghash_dec: 1134 vcipherlast 15, 15, 1 1135 vcipherlast 16, 16, 1 1136 vcipherlast 17, 17, 1 1137 vcipherlast 18, 18, 1 1138 vcipherlast 19, 19, 1 1139 vcipherlast 20, 20, 1 1140 vcipherlast 21, 21, 1 1141 vcipherlast 22, 22, 1 1142 1143 lxvb16x 32+23, 0, 14 # load block 1144 lxvb16x 32+24, 15, 14 # load block 1145 lxvb16x 32+25, 16, 14 # load block 1146 lxvb16x 32+26, 17, 14 # load block 1147 lxvb16x 32+27, 18, 14 # load block 1148 lxvb16x 32+28, 19, 14 # load block 1149 lxvb16x 32+29, 20, 14 # load block 1150 lxvb16x 32+30, 21, 14 # load block 1151 addi 14, 14, 128 1152 1153 vxor 15, 15, 23 1154 vxor 16, 16, 24 1155 vxor 17, 17, 25 1156 vxor 18, 18, 26 1157 vxor 19, 19, 27 1158 vxor 20, 20, 28 1159 vxor 21, 21, 29 1160 vxor 22, 22, 30 1161 1162 stxvb16x 47, 0, 9 # store output 1163 stxvb16x 48, 15, 9 # store output 1164 stxvb16x 49, 16, 9 # store output 1165 stxvb16x 50, 17, 9 # store output 1166 stxvb16x 51, 18, 9 # store output 1167 stxvb16x 52, 19, 9 # store output 1168 stxvb16x 53, 20, 9 # store output 1169 stxvb16x 54, 21, 9 # store output 1170 addi 9, 9, 128 1171 1172 #vmr 15, 23 1173 vxor 15, 23, 0 1174 vmr 16, 24 1175 vmr 17, 25 1176 vmr 18, 26 1177 vmr 19, 27 1178 vmr 20, 28 1179 vmr 21, 29 1180 vmr 22, 30 1181 1182 #vxor 15, 15, 0 1183 PPC_GHASH4x 0, 15, 16, 17, 18 1184 1185 vxor 19, 19, 0 1186 PPC_GHASH4x 0, 19, 20, 21, 22 1187 1188 xxlor 30+32, 9, 9 # last ctr 1189 vadduwm 30, 30, 31 # increase ctr 1190 stxvb16x 32+30, 0, 7 # update IV 1191 stxvb16x 32+0, 0, 8 # update Xi 1192 1193 addi 5, 5, -128 1194 addi 11, 11, 128 1195 1196 # 1197 # Done 8x blocks 1198 # 1199 1200 cmpdi 5, 0 1201 beq aes_gcm_out 1202 1203__Process_more_dec: 1204 li 24, 0 # decrypt 1205 bl aes_gcm_crypt_1x 1206 cmpdi 5, 0 1207 beq aes_gcm_out 1208 1209 bl __Process_partial 1210 cmpdi 5, 0 1211 beq aes_gcm_out 1212__Do_combine_dec: 1213 bl __Combine_partial 1214 cmpdi 5, 0 1215 bgt __Process_decrypt 1216 b aes_gcm_out 1217SYM_FUNC_END(aes_p10_gcm_decrypt) 1218 1219SYM_FUNC_START_LOCAL(aes_gcm_out) 1220 1221 mr 3, 11 # return count 1222 1223 RESTORE_REGS 1224 blr 1225 1226__Invalid_msg_len: 1227 li 3, 0 1228 blr 1229SYM_FUNC_END(aes_gcm_out) 1230 1231SYM_DATA_START_LOCAL(PERMX) 1232.align 4 1233# for vector permute and xor 1234permx: 1235.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3 1236SYM_DATA_END(permx) 1237