1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * AES-NI + SSE4.1 implementation of AEGIS-128 4 * 5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> 6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. 7 * Copyright 2024 Google LLC 8 */ 9 10#include <linux/linkage.h> 11 12#define STATE0 %xmm0 13#define STATE1 %xmm1 14#define STATE2 %xmm2 15#define STATE3 %xmm3 16#define STATE4 %xmm4 17#define KEY %xmm5 18#define MSG %xmm5 19#define T0 %xmm6 20#define T1 %xmm7 21 22.section .rodata.cst16.aegis128_const, "aM", @progbits, 32 23.align 16 24.Laegis128_const_0: 25 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d 26 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 27.Laegis128_const_1: 28 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 29 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd 30 31.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32 32.align 32 33.Lzeropad_mask: 34 .octa 0xffffffffffffffffffffffffffffffff 35 .octa 0 36 37.text 38 39/* 40 * aegis128_update 41 * input: 42 * STATE[0-4] - input state 43 * output: 44 * STATE[0-4] - output state (shifted positions) 45 * changed: 46 * T0 47 */ 48.macro aegis128_update 49 movdqa STATE4, T0 50 aesenc STATE0, STATE4 51 aesenc STATE1, STATE0 52 aesenc STATE2, STATE1 53 aesenc STATE3, STATE2 54 aesenc T0, STATE3 55.endm 56 57/* 58 * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register 59 * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8. 60 */ 61.macro load_partial 62 sub $8, %ecx /* LEN - 8 */ 63 jle .Lle8\@ 64 65 /* Load 9 <= LEN <= 15 bytes: */ 66 movq (SRC), MSG /* Load first 8 bytes */ 67 mov (SRC, %rcx), %rax /* Load last 8 bytes */ 68 neg %ecx 69 shl $3, %ecx 70 shr %cl, %rax /* Discard overlapping bytes */ 71 pinsrq $1, %rax, MSG 72 jmp .Ldone\@ 73 74.Lle8\@: 75 add $4, %ecx /* LEN - 4 */ 76 jl .Llt4\@ 77 78 /* Load 4 <= LEN <= 8 bytes: */ 79 mov (SRC), %eax /* Load first 4 bytes */ 80 mov (SRC, %rcx), %r8d /* Load last 4 bytes */ 81 jmp .Lcombine\@ 82 83.Llt4\@: 84 /* Load 1 <= LEN <= 3 bytes: */ 85 add $2, %ecx /* LEN - 2 */ 86 movzbl (SRC), %eax /* Load first byte */ 87 jl .Lmovq\@ 88 movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */ 89.Lcombine\@: 90 shl $3, %ecx 91 shl %cl, %r8 92 or %r8, %rax /* Combine the two parts */ 93.Lmovq\@: 94 movq %rax, MSG 95.Ldone\@: 96.endm 97 98/* 99 * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer 100 * DST. Clobbers %rax, %rcx, and %r8. 101 */ 102.macro store_partial msg 103 sub $8, %ecx /* LEN - 8 */ 104 jl .Llt8\@ 105 106 /* Store 8 <= LEN <= 15 bytes: */ 107 pextrq $1, \msg, %rax 108 mov %ecx, %r8d 109 shl $3, %ecx 110 ror %cl, %rax 111 mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */ 112 movq \msg, (DST) /* Store first 8 bytes */ 113 jmp .Ldone\@ 114 115.Llt8\@: 116 add $4, %ecx /* LEN - 4 */ 117 jl .Llt4\@ 118 119 /* Store 4 <= LEN <= 7 bytes: */ 120 pextrd $1, \msg, %eax 121 mov %ecx, %r8d 122 shl $3, %ecx 123 ror %cl, %eax 124 mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */ 125 movd \msg, (DST) /* Store first 4 bytes */ 126 jmp .Ldone\@ 127 128.Llt4\@: 129 /* Store 1 <= LEN <= 3 bytes: */ 130 pextrb $0, \msg, 0(DST) 131 cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */ 132 jl .Ldone\@ 133 pextrb $1, \msg, 1(DST) 134 je .Ldone\@ 135 pextrb $2, \msg, 2(DST) 136.Ldone\@: 137.endm 138 139/* 140 * void aegis128_aesni_init(struct aegis_state *state, 141 * const struct aegis_block *key, 142 * const u8 iv[AEGIS128_NONCE_SIZE]); 143 */ 144SYM_FUNC_START(aegis128_aesni_init) 145 .set STATEP, %rdi 146 .set KEYP, %rsi 147 .set IVP, %rdx 148 149 /* load IV: */ 150 movdqu (IVP), T1 151 152 /* load key: */ 153 movdqa (KEYP), KEY 154 pxor KEY, T1 155 movdqa T1, STATE0 156 movdqa KEY, STATE3 157 movdqa KEY, STATE4 158 159 /* load the constants: */ 160 movdqa .Laegis128_const_0(%rip), STATE2 161 movdqa .Laegis128_const_1(%rip), STATE1 162 pxor STATE2, STATE3 163 pxor STATE1, STATE4 164 165 /* update 10 times with KEY / KEY xor IV: */ 166 aegis128_update; pxor KEY, STATE4 167 aegis128_update; pxor T1, STATE3 168 aegis128_update; pxor KEY, STATE2 169 aegis128_update; pxor T1, STATE1 170 aegis128_update; pxor KEY, STATE0 171 aegis128_update; pxor T1, STATE4 172 aegis128_update; pxor KEY, STATE3 173 aegis128_update; pxor T1, STATE2 174 aegis128_update; pxor KEY, STATE1 175 aegis128_update; pxor T1, STATE0 176 177 /* store the state: */ 178 movdqu STATE0, 0x00(STATEP) 179 movdqu STATE1, 0x10(STATEP) 180 movdqu STATE2, 0x20(STATEP) 181 movdqu STATE3, 0x30(STATEP) 182 movdqu STATE4, 0x40(STATEP) 183 RET 184SYM_FUNC_END(aegis128_aesni_init) 185 186/* 187 * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, 188 * unsigned int len); 189 * 190 * len must be a multiple of 16. 191 */ 192SYM_FUNC_START(aegis128_aesni_ad) 193 .set STATEP, %rdi 194 .set SRC, %rsi 195 .set LEN, %edx 196 197 test LEN, LEN 198 jz .Lad_out 199 200 /* load the state: */ 201 movdqu 0x00(STATEP), STATE0 202 movdqu 0x10(STATEP), STATE1 203 movdqu 0x20(STATEP), STATE2 204 movdqu 0x30(STATEP), STATE3 205 movdqu 0x40(STATEP), STATE4 206 207.align 8 208.Lad_loop: 209 movdqu 0x00(SRC), MSG 210 aegis128_update 211 pxor MSG, STATE4 212 sub $0x10, LEN 213 jz .Lad_out_1 214 215 movdqu 0x10(SRC), MSG 216 aegis128_update 217 pxor MSG, STATE3 218 sub $0x10, LEN 219 jz .Lad_out_2 220 221 movdqu 0x20(SRC), MSG 222 aegis128_update 223 pxor MSG, STATE2 224 sub $0x10, LEN 225 jz .Lad_out_3 226 227 movdqu 0x30(SRC), MSG 228 aegis128_update 229 pxor MSG, STATE1 230 sub $0x10, LEN 231 jz .Lad_out_4 232 233 movdqu 0x40(SRC), MSG 234 aegis128_update 235 pxor MSG, STATE0 236 sub $0x10, LEN 237 jz .Lad_out_0 238 239 add $0x50, SRC 240 jmp .Lad_loop 241 242 /* store the state: */ 243.Lad_out_0: 244 movdqu STATE0, 0x00(STATEP) 245 movdqu STATE1, 0x10(STATEP) 246 movdqu STATE2, 0x20(STATEP) 247 movdqu STATE3, 0x30(STATEP) 248 movdqu STATE4, 0x40(STATEP) 249 RET 250 251.Lad_out_1: 252 movdqu STATE4, 0x00(STATEP) 253 movdqu STATE0, 0x10(STATEP) 254 movdqu STATE1, 0x20(STATEP) 255 movdqu STATE2, 0x30(STATEP) 256 movdqu STATE3, 0x40(STATEP) 257 RET 258 259.Lad_out_2: 260 movdqu STATE3, 0x00(STATEP) 261 movdqu STATE4, 0x10(STATEP) 262 movdqu STATE0, 0x20(STATEP) 263 movdqu STATE1, 0x30(STATEP) 264 movdqu STATE2, 0x40(STATEP) 265 RET 266 267.Lad_out_3: 268 movdqu STATE2, 0x00(STATEP) 269 movdqu STATE3, 0x10(STATEP) 270 movdqu STATE4, 0x20(STATEP) 271 movdqu STATE0, 0x30(STATEP) 272 movdqu STATE1, 0x40(STATEP) 273 RET 274 275.Lad_out_4: 276 movdqu STATE1, 0x00(STATEP) 277 movdqu STATE2, 0x10(STATEP) 278 movdqu STATE3, 0x20(STATEP) 279 movdqu STATE4, 0x30(STATEP) 280 movdqu STATE0, 0x40(STATEP) 281.Lad_out: 282 RET 283SYM_FUNC_END(aegis128_aesni_ad) 284 285.macro encrypt_block s0 s1 s2 s3 s4 i 286 movdqu (\i * 0x10)(SRC), MSG 287 movdqa MSG, T0 288 pxor \s1, T0 289 pxor \s4, T0 290 movdqa \s2, T1 291 pand \s3, T1 292 pxor T1, T0 293 movdqu T0, (\i * 0x10)(DST) 294 295 aegis128_update 296 pxor MSG, \s4 297 298 sub $0x10, LEN 299 jz .Lenc_out_\i 300.endm 301 302/* 303 * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst, 304 * unsigned int len); 305 * 306 * len must be nonzero and a multiple of 16. 307 */ 308SYM_FUNC_START(aegis128_aesni_enc) 309 .set STATEP, %rdi 310 .set SRC, %rsi 311 .set DST, %rdx 312 .set LEN, %ecx 313 314 /* load the state: */ 315 movdqu 0x00(STATEP), STATE0 316 movdqu 0x10(STATEP), STATE1 317 movdqu 0x20(STATEP), STATE2 318 movdqu 0x30(STATEP), STATE3 319 movdqu 0x40(STATEP), STATE4 320 321.align 8 322.Lenc_loop: 323 encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 324 encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 325 encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 326 encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 327 encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 328 329 add $0x50, SRC 330 add $0x50, DST 331 jmp .Lenc_loop 332 333 /* store the state: */ 334.Lenc_out_0: 335 movdqu STATE4, 0x00(STATEP) 336 movdqu STATE0, 0x10(STATEP) 337 movdqu STATE1, 0x20(STATEP) 338 movdqu STATE2, 0x30(STATEP) 339 movdqu STATE3, 0x40(STATEP) 340 RET 341 342.Lenc_out_1: 343 movdqu STATE3, 0x00(STATEP) 344 movdqu STATE4, 0x10(STATEP) 345 movdqu STATE0, 0x20(STATEP) 346 movdqu STATE1, 0x30(STATEP) 347 movdqu STATE2, 0x40(STATEP) 348 RET 349 350.Lenc_out_2: 351 movdqu STATE2, 0x00(STATEP) 352 movdqu STATE3, 0x10(STATEP) 353 movdqu STATE4, 0x20(STATEP) 354 movdqu STATE0, 0x30(STATEP) 355 movdqu STATE1, 0x40(STATEP) 356 RET 357 358.Lenc_out_3: 359 movdqu STATE1, 0x00(STATEP) 360 movdqu STATE2, 0x10(STATEP) 361 movdqu STATE3, 0x20(STATEP) 362 movdqu STATE4, 0x30(STATEP) 363 movdqu STATE0, 0x40(STATEP) 364 RET 365 366.Lenc_out_4: 367 movdqu STATE0, 0x00(STATEP) 368 movdqu STATE1, 0x10(STATEP) 369 movdqu STATE2, 0x20(STATEP) 370 movdqu STATE3, 0x30(STATEP) 371 movdqu STATE4, 0x40(STATEP) 372.Lenc_out: 373 RET 374SYM_FUNC_END(aegis128_aesni_enc) 375 376/* 377 * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src, 378 * u8 *dst, unsigned int len); 379 */ 380SYM_FUNC_START(aegis128_aesni_enc_tail) 381 .set STATEP, %rdi 382 .set SRC, %rsi 383 .set DST, %rdx 384 .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ 385 386 /* load the state: */ 387 movdqu 0x00(STATEP), STATE0 388 movdqu 0x10(STATEP), STATE1 389 movdqu 0x20(STATEP), STATE2 390 movdqu 0x30(STATEP), STATE3 391 movdqu 0x40(STATEP), STATE4 392 393 /* encrypt message: */ 394 mov LEN, %r9d 395 load_partial 396 397 movdqa MSG, T0 398 pxor STATE1, T0 399 pxor STATE4, T0 400 movdqa STATE2, T1 401 pand STATE3, T1 402 pxor T1, T0 403 404 mov %r9d, LEN 405 store_partial T0 406 407 aegis128_update 408 pxor MSG, STATE4 409 410 /* store the state: */ 411 movdqu STATE4, 0x00(STATEP) 412 movdqu STATE0, 0x10(STATEP) 413 movdqu STATE1, 0x20(STATEP) 414 movdqu STATE2, 0x30(STATEP) 415 movdqu STATE3, 0x40(STATEP) 416 RET 417SYM_FUNC_END(aegis128_aesni_enc_tail) 418 419.macro decrypt_block s0 s1 s2 s3 s4 i 420 movdqu (\i * 0x10)(SRC), MSG 421 pxor \s1, MSG 422 pxor \s4, MSG 423 movdqa \s2, T1 424 pand \s3, T1 425 pxor T1, MSG 426 movdqu MSG, (\i * 0x10)(DST) 427 428 aegis128_update 429 pxor MSG, \s4 430 431 sub $0x10, LEN 432 jz .Ldec_out_\i 433.endm 434 435/* 436 * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst, 437 * unsigned int len); 438 * 439 * len must be nonzero and a multiple of 16. 440 */ 441SYM_FUNC_START(aegis128_aesni_dec) 442 .set STATEP, %rdi 443 .set SRC, %rsi 444 .set DST, %rdx 445 .set LEN, %ecx 446 447 /* load the state: */ 448 movdqu 0x00(STATEP), STATE0 449 movdqu 0x10(STATEP), STATE1 450 movdqu 0x20(STATEP), STATE2 451 movdqu 0x30(STATEP), STATE3 452 movdqu 0x40(STATEP), STATE4 453 454.align 8 455.Ldec_loop: 456 decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 457 decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 458 decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 459 decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 460 decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 461 462 add $0x50, SRC 463 add $0x50, DST 464 jmp .Ldec_loop 465 466 /* store the state: */ 467.Ldec_out_0: 468 movdqu STATE4, 0x00(STATEP) 469 movdqu STATE0, 0x10(STATEP) 470 movdqu STATE1, 0x20(STATEP) 471 movdqu STATE2, 0x30(STATEP) 472 movdqu STATE3, 0x40(STATEP) 473 RET 474 475.Ldec_out_1: 476 movdqu STATE3, 0x00(STATEP) 477 movdqu STATE4, 0x10(STATEP) 478 movdqu STATE0, 0x20(STATEP) 479 movdqu STATE1, 0x30(STATEP) 480 movdqu STATE2, 0x40(STATEP) 481 RET 482 483.Ldec_out_2: 484 movdqu STATE2, 0x00(STATEP) 485 movdqu STATE3, 0x10(STATEP) 486 movdqu STATE4, 0x20(STATEP) 487 movdqu STATE0, 0x30(STATEP) 488 movdqu STATE1, 0x40(STATEP) 489 RET 490 491.Ldec_out_3: 492 movdqu STATE1, 0x00(STATEP) 493 movdqu STATE2, 0x10(STATEP) 494 movdqu STATE3, 0x20(STATEP) 495 movdqu STATE4, 0x30(STATEP) 496 movdqu STATE0, 0x40(STATEP) 497 RET 498 499.Ldec_out_4: 500 movdqu STATE0, 0x00(STATEP) 501 movdqu STATE1, 0x10(STATEP) 502 movdqu STATE2, 0x20(STATEP) 503 movdqu STATE3, 0x30(STATEP) 504 movdqu STATE4, 0x40(STATEP) 505.Ldec_out: 506 RET 507SYM_FUNC_END(aegis128_aesni_dec) 508 509/* 510 * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src, 511 * u8 *dst, unsigned int len); 512 */ 513SYM_FUNC_START(aegis128_aesni_dec_tail) 514 .set STATEP, %rdi 515 .set SRC, %rsi 516 .set DST, %rdx 517 .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ 518 519 /* load the state: */ 520 movdqu 0x00(STATEP), STATE0 521 movdqu 0x10(STATEP), STATE1 522 movdqu 0x20(STATEP), STATE2 523 movdqu 0x30(STATEP), STATE3 524 movdqu 0x40(STATEP), STATE4 525 526 /* decrypt message: */ 527 mov LEN, %r9d 528 load_partial 529 530 pxor STATE1, MSG 531 pxor STATE4, MSG 532 movdqa STATE2, T1 533 pand STATE3, T1 534 pxor T1, MSG 535 536 mov %r9d, LEN 537 store_partial MSG 538 539 /* mask with byte count: */ 540 lea .Lzeropad_mask+16(%rip), %rax 541 sub %r9, %rax 542 movdqu (%rax), T0 543 pand T0, MSG 544 545 aegis128_update 546 pxor MSG, STATE4 547 548 /* store the state: */ 549 movdqu STATE4, 0x00(STATEP) 550 movdqu STATE0, 0x10(STATEP) 551 movdqu STATE1, 0x20(STATEP) 552 movdqu STATE2, 0x30(STATEP) 553 movdqu STATE3, 0x40(STATEP) 554 RET 555SYM_FUNC_END(aegis128_aesni_dec_tail) 556 557/* 558 * void aegis128_aesni_final(struct aegis_state *state, 559 * struct aegis_block *tag_xor, 560 * unsigned int assoclen, unsigned int cryptlen); 561 */ 562SYM_FUNC_START(aegis128_aesni_final) 563 .set STATEP, %rdi 564 .set TAG_XOR, %rsi 565 .set ASSOCLEN, %edx 566 .set CRYPTLEN, %ecx 567 568 /* load the state: */ 569 movdqu 0x00(STATEP), STATE0 570 movdqu 0x10(STATEP), STATE1 571 movdqu 0x20(STATEP), STATE2 572 movdqu 0x30(STATEP), STATE3 573 movdqu 0x40(STATEP), STATE4 574 575 /* prepare length block: */ 576 movd ASSOCLEN, MSG 577 pinsrd $2, CRYPTLEN, MSG 578 psllq $3, MSG /* multiply by 8 (to get bit count) */ 579 580 pxor STATE3, MSG 581 582 /* update state: */ 583 aegis128_update; pxor MSG, STATE4 584 aegis128_update; pxor MSG, STATE3 585 aegis128_update; pxor MSG, STATE2 586 aegis128_update; pxor MSG, STATE1 587 aegis128_update; pxor MSG, STATE0 588 aegis128_update; pxor MSG, STATE4 589 aegis128_update; pxor MSG, STATE3 590 591 /* xor tag: */ 592 movdqu (TAG_XOR), MSG 593 594 pxor STATE0, MSG 595 pxor STATE1, MSG 596 pxor STATE2, MSG 597 pxor STATE3, MSG 598 pxor STATE4, MSG 599 600 movdqu MSG, (TAG_XOR) 601 RET 602SYM_FUNC_END(aegis128_aesni_final) 603