1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Implement AES algorithm in Intel AES-NI instructions. 4 * 5 * The white paper of AES-NI instructions can be downloaded from: 6 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 7 * 8 * Copyright (C) 2008, Intel Corp. 9 * Author: Huang Ying <ying.huang@intel.com> 10 * Vinodh Gopal <vinodh.gopal@intel.com> 11 * Kahraman Akdemir 12 * 13 * Copyright (c) 2010, Intel Corporation. 14 * 15 * Ported x86_64 version to x86: 16 * Author: Mathias Krause <minipli@googlemail.com> 17 */ 18 19#include <linux/linkage.h> 20#include <linux/objtool.h> 21#include <asm/frame.h> 22 23#define STATE1 %xmm0 24#define STATE2 %xmm4 25#define STATE3 %xmm5 26#define STATE4 %xmm6 27#define STATE STATE1 28#define IN1 %xmm1 29#define IN2 %xmm7 30#define IN3 %xmm8 31#define IN4 %xmm9 32#define IN IN1 33#define KEY %xmm2 34#define IV %xmm3 35 36#define BSWAP_MASK %xmm10 37#define CTR %xmm11 38#define INC %xmm12 39 40#define GF128MUL_MASK %xmm7 41 42#ifdef __x86_64__ 43#define AREG %rax 44#define KEYP %rdi 45#define OUTP %rsi 46#define UKEYP OUTP 47#define INP %rdx 48#define LEN %rcx 49#define IVP %r8 50#define KLEN %r9d 51#define T1 %r10 52#define TKEYP T1 53#define T2 %r11 54#define TCTR_LOW T2 55#else 56#define AREG %eax 57#define KEYP %edi 58#define OUTP AREG 59#define UKEYP OUTP 60#define INP %edx 61#define LEN %esi 62#define IVP %ebp 63#define KLEN %ebx 64#define T1 %ecx 65#define TKEYP T1 66#endif 67 68SYM_FUNC_START_LOCAL(_key_expansion_256a) 69 pshufd $0b11111111, %xmm1, %xmm1 70 shufps $0b00010000, %xmm0, %xmm4 71 pxor %xmm4, %xmm0 72 shufps $0b10001100, %xmm0, %xmm4 73 pxor %xmm4, %xmm0 74 pxor %xmm1, %xmm0 75 movaps %xmm0, (TKEYP) 76 add $0x10, TKEYP 77 RET 78SYM_FUNC_END(_key_expansion_256a) 79SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) 80 81SYM_FUNC_START_LOCAL(_key_expansion_192a) 82 pshufd $0b01010101, %xmm1, %xmm1 83 shufps $0b00010000, %xmm0, %xmm4 84 pxor %xmm4, %xmm0 85 shufps $0b10001100, %xmm0, %xmm4 86 pxor %xmm4, %xmm0 87 pxor %xmm1, %xmm0 88 89 movaps %xmm2, %xmm5 90 movaps %xmm2, %xmm6 91 pslldq $4, %xmm5 92 pshufd $0b11111111, %xmm0, %xmm3 93 pxor %xmm3, %xmm2 94 pxor %xmm5, %xmm2 95 96 movaps %xmm0, %xmm1 97 shufps $0b01000100, %xmm0, %xmm6 98 movaps %xmm6, (TKEYP) 99 shufps $0b01001110, %xmm2, %xmm1 100 movaps %xmm1, 0x10(TKEYP) 101 add $0x20, TKEYP 102 RET 103SYM_FUNC_END(_key_expansion_192a) 104 105SYM_FUNC_START_LOCAL(_key_expansion_192b) 106 pshufd $0b01010101, %xmm1, %xmm1 107 shufps $0b00010000, %xmm0, %xmm4 108 pxor %xmm4, %xmm0 109 shufps $0b10001100, %xmm0, %xmm4 110 pxor %xmm4, %xmm0 111 pxor %xmm1, %xmm0 112 113 movaps %xmm2, %xmm5 114 pslldq $4, %xmm5 115 pshufd $0b11111111, %xmm0, %xmm3 116 pxor %xmm3, %xmm2 117 pxor %xmm5, %xmm2 118 119 movaps %xmm0, (TKEYP) 120 add $0x10, TKEYP 121 RET 122SYM_FUNC_END(_key_expansion_192b) 123 124SYM_FUNC_START_LOCAL(_key_expansion_256b) 125 pshufd $0b10101010, %xmm1, %xmm1 126 shufps $0b00010000, %xmm2, %xmm4 127 pxor %xmm4, %xmm2 128 shufps $0b10001100, %xmm2, %xmm4 129 pxor %xmm4, %xmm2 130 pxor %xmm1, %xmm2 131 movaps %xmm2, (TKEYP) 132 add $0x10, TKEYP 133 RET 134SYM_FUNC_END(_key_expansion_256b) 135 136/* 137 * void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 138 * unsigned int key_len) 139 */ 140SYM_FUNC_START(aesni_set_key) 141 FRAME_BEGIN 142#ifndef __x86_64__ 143 pushl KEYP 144 movl (FRAME_OFFSET+8)(%esp), KEYP # ctx 145 movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key 146 movl (FRAME_OFFSET+16)(%esp), %edx # key_len 147#endif 148 movups (UKEYP), %xmm0 # user key (first 16 bytes) 149 movaps %xmm0, (KEYP) 150 lea 0x10(KEYP), TKEYP # key addr 151 movl %edx, 480(KEYP) 152 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 153 cmp $24, %dl 154 jb .Lenc_key128 155 je .Lenc_key192 156 movups 0x10(UKEYP), %xmm2 # other user key 157 movaps %xmm2, (TKEYP) 158 add $0x10, TKEYP 159 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 160 call _key_expansion_256a 161 aeskeygenassist $0x1, %xmm0, %xmm1 162 call _key_expansion_256b 163 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 164 call _key_expansion_256a 165 aeskeygenassist $0x2, %xmm0, %xmm1 166 call _key_expansion_256b 167 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 168 call _key_expansion_256a 169 aeskeygenassist $0x4, %xmm0, %xmm1 170 call _key_expansion_256b 171 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 172 call _key_expansion_256a 173 aeskeygenassist $0x8, %xmm0, %xmm1 174 call _key_expansion_256b 175 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 176 call _key_expansion_256a 177 aeskeygenassist $0x10, %xmm0, %xmm1 178 call _key_expansion_256b 179 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 180 call _key_expansion_256a 181 aeskeygenassist $0x20, %xmm0, %xmm1 182 call _key_expansion_256b 183 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 184 call _key_expansion_256a 185 jmp .Ldec_key 186.Lenc_key192: 187 movq 0x10(UKEYP), %xmm2 # other user key 188 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 189 call _key_expansion_192a 190 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 191 call _key_expansion_192b 192 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 193 call _key_expansion_192a 194 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 195 call _key_expansion_192b 196 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 197 call _key_expansion_192a 198 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 199 call _key_expansion_192b 200 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 201 call _key_expansion_192a 202 aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 203 call _key_expansion_192b 204 jmp .Ldec_key 205.Lenc_key128: 206 aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 207 call _key_expansion_128 208 aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 209 call _key_expansion_128 210 aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 211 call _key_expansion_128 212 aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 213 call _key_expansion_128 214 aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 215 call _key_expansion_128 216 aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 217 call _key_expansion_128 218 aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 219 call _key_expansion_128 220 aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 221 call _key_expansion_128 222 aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 223 call _key_expansion_128 224 aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 225 call _key_expansion_128 226.Ldec_key: 227 sub $0x10, TKEYP 228 movaps (KEYP), %xmm0 229 movaps (TKEYP), %xmm1 230 movaps %xmm0, 240(TKEYP) 231 movaps %xmm1, 240(KEYP) 232 add $0x10, KEYP 233 lea 240-16(TKEYP), UKEYP 234.align 4 235.Ldec_key_loop: 236 movaps (KEYP), %xmm0 237 aesimc %xmm0, %xmm1 238 movaps %xmm1, (UKEYP) 239 add $0x10, KEYP 240 sub $0x10, UKEYP 241 cmp TKEYP, KEYP 242 jb .Ldec_key_loop 243#ifndef __x86_64__ 244 popl KEYP 245#endif 246 FRAME_END 247 RET 248SYM_FUNC_END(aesni_set_key) 249 250/* 251 * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) 252 */ 253SYM_FUNC_START(aesni_enc) 254 FRAME_BEGIN 255#ifndef __x86_64__ 256 pushl KEYP 257 pushl KLEN 258 movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 259 movl (FRAME_OFFSET+16)(%esp), OUTP # dst 260 movl (FRAME_OFFSET+20)(%esp), INP # src 261#endif 262 movl 480(KEYP), KLEN # key length 263 movups (INP), STATE # input 264 call _aesni_enc1 265 movups STATE, (OUTP) # output 266#ifndef __x86_64__ 267 popl KLEN 268 popl KEYP 269#endif 270 FRAME_END 271 RET 272SYM_FUNC_END(aesni_enc) 273 274/* 275 * _aesni_enc1: internal ABI 276 * input: 277 * KEYP: key struct pointer 278 * KLEN: round count 279 * STATE: initial state (input) 280 * output: 281 * STATE: finial state (output) 282 * changed: 283 * KEY 284 * TKEYP (T1) 285 */ 286SYM_FUNC_START_LOCAL(_aesni_enc1) 287 movaps (KEYP), KEY # key 288 mov KEYP, TKEYP 289 pxor KEY, STATE # round 0 290 add $0x30, TKEYP 291 cmp $24, KLEN 292 jb .Lenc128 293 lea 0x20(TKEYP), TKEYP 294 je .Lenc192 295 add $0x20, TKEYP 296 movaps -0x60(TKEYP), KEY 297 aesenc KEY, STATE 298 movaps -0x50(TKEYP), KEY 299 aesenc KEY, STATE 300.align 4 301.Lenc192: 302 movaps -0x40(TKEYP), KEY 303 aesenc KEY, STATE 304 movaps -0x30(TKEYP), KEY 305 aesenc KEY, STATE 306.align 4 307.Lenc128: 308 movaps -0x20(TKEYP), KEY 309 aesenc KEY, STATE 310 movaps -0x10(TKEYP), KEY 311 aesenc KEY, STATE 312 movaps (TKEYP), KEY 313 aesenc KEY, STATE 314 movaps 0x10(TKEYP), KEY 315 aesenc KEY, STATE 316 movaps 0x20(TKEYP), KEY 317 aesenc KEY, STATE 318 movaps 0x30(TKEYP), KEY 319 aesenc KEY, STATE 320 movaps 0x40(TKEYP), KEY 321 aesenc KEY, STATE 322 movaps 0x50(TKEYP), KEY 323 aesenc KEY, STATE 324 movaps 0x60(TKEYP), KEY 325 aesenc KEY, STATE 326 movaps 0x70(TKEYP), KEY 327 aesenclast KEY, STATE 328 RET 329SYM_FUNC_END(_aesni_enc1) 330 331/* 332 * _aesni_enc4: internal ABI 333 * input: 334 * KEYP: key struct pointer 335 * KLEN: round count 336 * STATE1: initial state (input) 337 * STATE2 338 * STATE3 339 * STATE4 340 * output: 341 * STATE1: finial state (output) 342 * STATE2 343 * STATE3 344 * STATE4 345 * changed: 346 * KEY 347 * TKEYP (T1) 348 */ 349SYM_FUNC_START_LOCAL(_aesni_enc4) 350 movaps (KEYP), KEY # key 351 mov KEYP, TKEYP 352 pxor KEY, STATE1 # round 0 353 pxor KEY, STATE2 354 pxor KEY, STATE3 355 pxor KEY, STATE4 356 add $0x30, TKEYP 357 cmp $24, KLEN 358 jb .L4enc128 359 lea 0x20(TKEYP), TKEYP 360 je .L4enc192 361 add $0x20, TKEYP 362 movaps -0x60(TKEYP), KEY 363 aesenc KEY, STATE1 364 aesenc KEY, STATE2 365 aesenc KEY, STATE3 366 aesenc KEY, STATE4 367 movaps -0x50(TKEYP), KEY 368 aesenc KEY, STATE1 369 aesenc KEY, STATE2 370 aesenc KEY, STATE3 371 aesenc KEY, STATE4 372#.align 4 373.L4enc192: 374 movaps -0x40(TKEYP), KEY 375 aesenc KEY, STATE1 376 aesenc KEY, STATE2 377 aesenc KEY, STATE3 378 aesenc KEY, STATE4 379 movaps -0x30(TKEYP), KEY 380 aesenc KEY, STATE1 381 aesenc KEY, STATE2 382 aesenc KEY, STATE3 383 aesenc KEY, STATE4 384#.align 4 385.L4enc128: 386 movaps -0x20(TKEYP), KEY 387 aesenc KEY, STATE1 388 aesenc KEY, STATE2 389 aesenc KEY, STATE3 390 aesenc KEY, STATE4 391 movaps -0x10(TKEYP), KEY 392 aesenc KEY, STATE1 393 aesenc KEY, STATE2 394 aesenc KEY, STATE3 395 aesenc KEY, STATE4 396 movaps (TKEYP), KEY 397 aesenc KEY, STATE1 398 aesenc KEY, STATE2 399 aesenc KEY, STATE3 400 aesenc KEY, STATE4 401 movaps 0x10(TKEYP), KEY 402 aesenc KEY, STATE1 403 aesenc KEY, STATE2 404 aesenc KEY, STATE3 405 aesenc KEY, STATE4 406 movaps 0x20(TKEYP), KEY 407 aesenc KEY, STATE1 408 aesenc KEY, STATE2 409 aesenc KEY, STATE3 410 aesenc KEY, STATE4 411 movaps 0x30(TKEYP), KEY 412 aesenc KEY, STATE1 413 aesenc KEY, STATE2 414 aesenc KEY, STATE3 415 aesenc KEY, STATE4 416 movaps 0x40(TKEYP), KEY 417 aesenc KEY, STATE1 418 aesenc KEY, STATE2 419 aesenc KEY, STATE3 420 aesenc KEY, STATE4 421 movaps 0x50(TKEYP), KEY 422 aesenc KEY, STATE1 423 aesenc KEY, STATE2 424 aesenc KEY, STATE3 425 aesenc KEY, STATE4 426 movaps 0x60(TKEYP), KEY 427 aesenc KEY, STATE1 428 aesenc KEY, STATE2 429 aesenc KEY, STATE3 430 aesenc KEY, STATE4 431 movaps 0x70(TKEYP), KEY 432 aesenclast KEY, STATE1 # last round 433 aesenclast KEY, STATE2 434 aesenclast KEY, STATE3 435 aesenclast KEY, STATE4 436 RET 437SYM_FUNC_END(_aesni_enc4) 438 439/* 440 * _aesni_dec1: internal ABI 441 * input: 442 * KEYP: key struct pointer 443 * KLEN: key length 444 * STATE: initial state (input) 445 * output: 446 * STATE: finial state (output) 447 * changed: 448 * KEY 449 * TKEYP (T1) 450 */ 451SYM_FUNC_START_LOCAL(_aesni_dec1) 452 movaps (KEYP), KEY # key 453 mov KEYP, TKEYP 454 pxor KEY, STATE # round 0 455 add $0x30, TKEYP 456 cmp $24, KLEN 457 jb .Ldec128 458 lea 0x20(TKEYP), TKEYP 459 je .Ldec192 460 add $0x20, TKEYP 461 movaps -0x60(TKEYP), KEY 462 aesdec KEY, STATE 463 movaps -0x50(TKEYP), KEY 464 aesdec KEY, STATE 465.align 4 466.Ldec192: 467 movaps -0x40(TKEYP), KEY 468 aesdec KEY, STATE 469 movaps -0x30(TKEYP), KEY 470 aesdec KEY, STATE 471.align 4 472.Ldec128: 473 movaps -0x20(TKEYP), KEY 474 aesdec KEY, STATE 475 movaps -0x10(TKEYP), KEY 476 aesdec KEY, STATE 477 movaps (TKEYP), KEY 478 aesdec KEY, STATE 479 movaps 0x10(TKEYP), KEY 480 aesdec KEY, STATE 481 movaps 0x20(TKEYP), KEY 482 aesdec KEY, STATE 483 movaps 0x30(TKEYP), KEY 484 aesdec KEY, STATE 485 movaps 0x40(TKEYP), KEY 486 aesdec KEY, STATE 487 movaps 0x50(TKEYP), KEY 488 aesdec KEY, STATE 489 movaps 0x60(TKEYP), KEY 490 aesdec KEY, STATE 491 movaps 0x70(TKEYP), KEY 492 aesdeclast KEY, STATE 493 RET 494SYM_FUNC_END(_aesni_dec1) 495 496/* 497 * _aesni_dec4: internal ABI 498 * input: 499 * KEYP: key struct pointer 500 * KLEN: key length 501 * STATE1: initial state (input) 502 * STATE2 503 * STATE3 504 * STATE4 505 * output: 506 * STATE1: finial state (output) 507 * STATE2 508 * STATE3 509 * STATE4 510 * changed: 511 * KEY 512 * TKEYP (T1) 513 */ 514SYM_FUNC_START_LOCAL(_aesni_dec4) 515 movaps (KEYP), KEY # key 516 mov KEYP, TKEYP 517 pxor KEY, STATE1 # round 0 518 pxor KEY, STATE2 519 pxor KEY, STATE3 520 pxor KEY, STATE4 521 add $0x30, TKEYP 522 cmp $24, KLEN 523 jb .L4dec128 524 lea 0x20(TKEYP), TKEYP 525 je .L4dec192 526 add $0x20, TKEYP 527 movaps -0x60(TKEYP), KEY 528 aesdec KEY, STATE1 529 aesdec KEY, STATE2 530 aesdec KEY, STATE3 531 aesdec KEY, STATE4 532 movaps -0x50(TKEYP), KEY 533 aesdec KEY, STATE1 534 aesdec KEY, STATE2 535 aesdec KEY, STATE3 536 aesdec KEY, STATE4 537.align 4 538.L4dec192: 539 movaps -0x40(TKEYP), KEY 540 aesdec KEY, STATE1 541 aesdec KEY, STATE2 542 aesdec KEY, STATE3 543 aesdec KEY, STATE4 544 movaps -0x30(TKEYP), KEY 545 aesdec KEY, STATE1 546 aesdec KEY, STATE2 547 aesdec KEY, STATE3 548 aesdec KEY, STATE4 549.align 4 550.L4dec128: 551 movaps -0x20(TKEYP), KEY 552 aesdec KEY, STATE1 553 aesdec KEY, STATE2 554 aesdec KEY, STATE3 555 aesdec KEY, STATE4 556 movaps -0x10(TKEYP), KEY 557 aesdec KEY, STATE1 558 aesdec KEY, STATE2 559 aesdec KEY, STATE3 560 aesdec KEY, STATE4 561 movaps (TKEYP), KEY 562 aesdec KEY, STATE1 563 aesdec KEY, STATE2 564 aesdec KEY, STATE3 565 aesdec KEY, STATE4 566 movaps 0x10(TKEYP), KEY 567 aesdec KEY, STATE1 568 aesdec KEY, STATE2 569 aesdec KEY, STATE3 570 aesdec KEY, STATE4 571 movaps 0x20(TKEYP), KEY 572 aesdec KEY, STATE1 573 aesdec KEY, STATE2 574 aesdec KEY, STATE3 575 aesdec KEY, STATE4 576 movaps 0x30(TKEYP), KEY 577 aesdec KEY, STATE1 578 aesdec KEY, STATE2 579 aesdec KEY, STATE3 580 aesdec KEY, STATE4 581 movaps 0x40(TKEYP), KEY 582 aesdec KEY, STATE1 583 aesdec KEY, STATE2 584 aesdec KEY, STATE3 585 aesdec KEY, STATE4 586 movaps 0x50(TKEYP), KEY 587 aesdec KEY, STATE1 588 aesdec KEY, STATE2 589 aesdec KEY, STATE3 590 aesdec KEY, STATE4 591 movaps 0x60(TKEYP), KEY 592 aesdec KEY, STATE1 593 aesdec KEY, STATE2 594 aesdec KEY, STATE3 595 aesdec KEY, STATE4 596 movaps 0x70(TKEYP), KEY 597 aesdeclast KEY, STATE1 # last round 598 aesdeclast KEY, STATE2 599 aesdeclast KEY, STATE3 600 aesdeclast KEY, STATE4 601 RET 602SYM_FUNC_END(_aesni_dec4) 603 604/* 605 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 606 * size_t len) 607 */ 608SYM_FUNC_START(aesni_ecb_enc) 609 FRAME_BEGIN 610#ifndef __x86_64__ 611 pushl LEN 612 pushl KEYP 613 pushl KLEN 614 movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 615 movl (FRAME_OFFSET+20)(%esp), OUTP # dst 616 movl (FRAME_OFFSET+24)(%esp), INP # src 617 movl (FRAME_OFFSET+28)(%esp), LEN # len 618#endif 619 test LEN, LEN # check length 620 jz .Lecb_enc_ret 621 mov 480(KEYP), KLEN 622 cmp $16, LEN 623 jb .Lecb_enc_ret 624 cmp $64, LEN 625 jb .Lecb_enc_loop1 626.align 4 627.Lecb_enc_loop4: 628 movups (INP), STATE1 629 movups 0x10(INP), STATE2 630 movups 0x20(INP), STATE3 631 movups 0x30(INP), STATE4 632 call _aesni_enc4 633 movups STATE1, (OUTP) 634 movups STATE2, 0x10(OUTP) 635 movups STATE3, 0x20(OUTP) 636 movups STATE4, 0x30(OUTP) 637 sub $64, LEN 638 add $64, INP 639 add $64, OUTP 640 cmp $64, LEN 641 jge .Lecb_enc_loop4 642 cmp $16, LEN 643 jb .Lecb_enc_ret 644.align 4 645.Lecb_enc_loop1: 646 movups (INP), STATE1 647 call _aesni_enc1 648 movups STATE1, (OUTP) 649 sub $16, LEN 650 add $16, INP 651 add $16, OUTP 652 cmp $16, LEN 653 jge .Lecb_enc_loop1 654.Lecb_enc_ret: 655#ifndef __x86_64__ 656 popl KLEN 657 popl KEYP 658 popl LEN 659#endif 660 FRAME_END 661 RET 662SYM_FUNC_END(aesni_ecb_enc) 663 664/* 665 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 666 * size_t len); 667 */ 668SYM_FUNC_START(aesni_ecb_dec) 669 FRAME_BEGIN 670#ifndef __x86_64__ 671 pushl LEN 672 pushl KEYP 673 pushl KLEN 674 movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 675 movl (FRAME_OFFSET+20)(%esp), OUTP # dst 676 movl (FRAME_OFFSET+24)(%esp), INP # src 677 movl (FRAME_OFFSET+28)(%esp), LEN # len 678#endif 679 test LEN, LEN 680 jz .Lecb_dec_ret 681 mov 480(KEYP), KLEN 682 add $240, KEYP 683 cmp $16, LEN 684 jb .Lecb_dec_ret 685 cmp $64, LEN 686 jb .Lecb_dec_loop1 687.align 4 688.Lecb_dec_loop4: 689 movups (INP), STATE1 690 movups 0x10(INP), STATE2 691 movups 0x20(INP), STATE3 692 movups 0x30(INP), STATE4 693 call _aesni_dec4 694 movups STATE1, (OUTP) 695 movups STATE2, 0x10(OUTP) 696 movups STATE3, 0x20(OUTP) 697 movups STATE4, 0x30(OUTP) 698 sub $64, LEN 699 add $64, INP 700 add $64, OUTP 701 cmp $64, LEN 702 jge .Lecb_dec_loop4 703 cmp $16, LEN 704 jb .Lecb_dec_ret 705.align 4 706.Lecb_dec_loop1: 707 movups (INP), STATE1 708 call _aesni_dec1 709 movups STATE1, (OUTP) 710 sub $16, LEN 711 add $16, INP 712 add $16, OUTP 713 cmp $16, LEN 714 jge .Lecb_dec_loop1 715.Lecb_dec_ret: 716#ifndef __x86_64__ 717 popl KLEN 718 popl KEYP 719 popl LEN 720#endif 721 FRAME_END 722 RET 723SYM_FUNC_END(aesni_ecb_dec) 724 725/* 726 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 727 * size_t len, u8 *iv) 728 */ 729SYM_FUNC_START(aesni_cbc_enc) 730 FRAME_BEGIN 731#ifndef __x86_64__ 732 pushl IVP 733 pushl LEN 734 pushl KEYP 735 pushl KLEN 736 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 737 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 738 movl (FRAME_OFFSET+28)(%esp), INP # src 739 movl (FRAME_OFFSET+32)(%esp), LEN # len 740 movl (FRAME_OFFSET+36)(%esp), IVP # iv 741#endif 742 cmp $16, LEN 743 jb .Lcbc_enc_ret 744 mov 480(KEYP), KLEN 745 movups (IVP), STATE # load iv as initial state 746.align 4 747.Lcbc_enc_loop: 748 movups (INP), IN # load input 749 pxor IN, STATE 750 call _aesni_enc1 751 movups STATE, (OUTP) # store output 752 sub $16, LEN 753 add $16, INP 754 add $16, OUTP 755 cmp $16, LEN 756 jge .Lcbc_enc_loop 757 movups STATE, (IVP) 758.Lcbc_enc_ret: 759#ifndef __x86_64__ 760 popl KLEN 761 popl KEYP 762 popl LEN 763 popl IVP 764#endif 765 FRAME_END 766 RET 767SYM_FUNC_END(aesni_cbc_enc) 768 769/* 770 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 771 * size_t len, u8 *iv) 772 */ 773SYM_FUNC_START(aesni_cbc_dec) 774 FRAME_BEGIN 775#ifndef __x86_64__ 776 pushl IVP 777 pushl LEN 778 pushl KEYP 779 pushl KLEN 780 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 781 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 782 movl (FRAME_OFFSET+28)(%esp), INP # src 783 movl (FRAME_OFFSET+32)(%esp), LEN # len 784 movl (FRAME_OFFSET+36)(%esp), IVP # iv 785#endif 786 cmp $16, LEN 787 jb .Lcbc_dec_just_ret 788 mov 480(KEYP), KLEN 789 add $240, KEYP 790 movups (IVP), IV 791 cmp $64, LEN 792 jb .Lcbc_dec_loop1 793.align 4 794.Lcbc_dec_loop4: 795 movups (INP), IN1 796 movaps IN1, STATE1 797 movups 0x10(INP), IN2 798 movaps IN2, STATE2 799#ifdef __x86_64__ 800 movups 0x20(INP), IN3 801 movaps IN3, STATE3 802 movups 0x30(INP), IN4 803 movaps IN4, STATE4 804#else 805 movups 0x20(INP), IN1 806 movaps IN1, STATE3 807 movups 0x30(INP), IN2 808 movaps IN2, STATE4 809#endif 810 call _aesni_dec4 811 pxor IV, STATE1 812#ifdef __x86_64__ 813 pxor IN1, STATE2 814 pxor IN2, STATE3 815 pxor IN3, STATE4 816 movaps IN4, IV 817#else 818 pxor IN1, STATE4 819 movaps IN2, IV 820 movups (INP), IN1 821 pxor IN1, STATE2 822 movups 0x10(INP), IN2 823 pxor IN2, STATE3 824#endif 825 movups STATE1, (OUTP) 826 movups STATE2, 0x10(OUTP) 827 movups STATE3, 0x20(OUTP) 828 movups STATE4, 0x30(OUTP) 829 sub $64, LEN 830 add $64, INP 831 add $64, OUTP 832 cmp $64, LEN 833 jge .Lcbc_dec_loop4 834 cmp $16, LEN 835 jb .Lcbc_dec_ret 836.align 4 837.Lcbc_dec_loop1: 838 movups (INP), IN 839 movaps IN, STATE 840 call _aesni_dec1 841 pxor IV, STATE 842 movups STATE, (OUTP) 843 movaps IN, IV 844 sub $16, LEN 845 add $16, INP 846 add $16, OUTP 847 cmp $16, LEN 848 jge .Lcbc_dec_loop1 849.Lcbc_dec_ret: 850 movups IV, (IVP) 851.Lcbc_dec_just_ret: 852#ifndef __x86_64__ 853 popl KLEN 854 popl KEYP 855 popl LEN 856 popl IVP 857#endif 858 FRAME_END 859 RET 860SYM_FUNC_END(aesni_cbc_dec) 861 862/* 863 * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 864 * size_t len, u8 *iv) 865 */ 866SYM_FUNC_START(aesni_cts_cbc_enc) 867 FRAME_BEGIN 868#ifndef __x86_64__ 869 pushl IVP 870 pushl LEN 871 pushl KEYP 872 pushl KLEN 873 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 874 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 875 movl (FRAME_OFFSET+28)(%esp), INP # src 876 movl (FRAME_OFFSET+32)(%esp), LEN # len 877 movl (FRAME_OFFSET+36)(%esp), IVP # iv 878 lea .Lcts_permute_table, T1 879#else 880 lea .Lcts_permute_table(%rip), T1 881#endif 882 mov 480(KEYP), KLEN 883 movups (IVP), STATE 884 sub $16, LEN 885 mov T1, IVP 886 add $32, IVP 887 add LEN, T1 888 sub LEN, IVP 889 movups (T1), %xmm4 890 movups (IVP), %xmm5 891 892 movups (INP), IN1 893 add LEN, INP 894 movups (INP), IN2 895 896 pxor IN1, STATE 897 call _aesni_enc1 898 899 pshufb %xmm5, IN2 900 pxor STATE, IN2 901 pshufb %xmm4, STATE 902 add OUTP, LEN 903 movups STATE, (LEN) 904 905 movaps IN2, STATE 906 call _aesni_enc1 907 movups STATE, (OUTP) 908 909#ifndef __x86_64__ 910 popl KLEN 911 popl KEYP 912 popl LEN 913 popl IVP 914#endif 915 FRAME_END 916 RET 917SYM_FUNC_END(aesni_cts_cbc_enc) 918 919/* 920 * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 921 * size_t len, u8 *iv) 922 */ 923SYM_FUNC_START(aesni_cts_cbc_dec) 924 FRAME_BEGIN 925#ifndef __x86_64__ 926 pushl IVP 927 pushl LEN 928 pushl KEYP 929 pushl KLEN 930 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 931 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 932 movl (FRAME_OFFSET+28)(%esp), INP # src 933 movl (FRAME_OFFSET+32)(%esp), LEN # len 934 movl (FRAME_OFFSET+36)(%esp), IVP # iv 935 lea .Lcts_permute_table, T1 936#else 937 lea .Lcts_permute_table(%rip), T1 938#endif 939 mov 480(KEYP), KLEN 940 add $240, KEYP 941 movups (IVP), IV 942 sub $16, LEN 943 mov T1, IVP 944 add $32, IVP 945 add LEN, T1 946 sub LEN, IVP 947 movups (T1), %xmm4 948 949 movups (INP), STATE 950 add LEN, INP 951 movups (INP), IN1 952 953 call _aesni_dec1 954 movaps STATE, IN2 955 pshufb %xmm4, STATE 956 pxor IN1, STATE 957 958 add OUTP, LEN 959 movups STATE, (LEN) 960 961 movups (IVP), %xmm0 962 pshufb %xmm0, IN1 963 pblendvb IN2, IN1 964 movaps IN1, STATE 965 call _aesni_dec1 966 967 pxor IV, STATE 968 movups STATE, (OUTP) 969 970#ifndef __x86_64__ 971 popl KLEN 972 popl KEYP 973 popl LEN 974 popl IVP 975#endif 976 FRAME_END 977 RET 978SYM_FUNC_END(aesni_cts_cbc_dec) 979 980.pushsection .rodata 981.align 16 982.Lcts_permute_table: 983 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 984 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 985 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 986 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 987 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 988 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 989#ifdef __x86_64__ 990.Lbswap_mask: 991 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 992#endif 993.popsection 994 995#ifdef __x86_64__ 996/* 997 * _aesni_inc_init: internal ABI 998 * setup registers used by _aesni_inc 999 * input: 1000 * IV 1001 * output: 1002 * CTR: == IV, in little endian 1003 * TCTR_LOW: == lower qword of CTR 1004 * INC: == 1, in little endian 1005 * BSWAP_MASK == endian swapping mask 1006 */ 1007SYM_FUNC_START_LOCAL(_aesni_inc_init) 1008 movaps .Lbswap_mask(%rip), BSWAP_MASK 1009 movaps IV, CTR 1010 pshufb BSWAP_MASK, CTR 1011 mov $1, TCTR_LOW 1012 movq TCTR_LOW, INC 1013 movq CTR, TCTR_LOW 1014 RET 1015SYM_FUNC_END(_aesni_inc_init) 1016 1017/* 1018 * _aesni_inc: internal ABI 1019 * Increase IV by 1, IV is in big endian 1020 * input: 1021 * IV 1022 * CTR: == IV, in little endian 1023 * TCTR_LOW: == lower qword of CTR 1024 * INC: == 1, in little endian 1025 * BSWAP_MASK == endian swapping mask 1026 * output: 1027 * IV: Increase by 1 1028 * changed: 1029 * CTR: == output IV, in little endian 1030 * TCTR_LOW: == lower qword of CTR 1031 */ 1032SYM_FUNC_START_LOCAL(_aesni_inc) 1033 paddq INC, CTR 1034 add $1, TCTR_LOW 1035 jnc .Linc_low 1036 pslldq $8, INC 1037 paddq INC, CTR 1038 psrldq $8, INC 1039.Linc_low: 1040 movaps CTR, IV 1041 pshufb BSWAP_MASK, IV 1042 RET 1043SYM_FUNC_END(_aesni_inc) 1044 1045/* 1046 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 1047 * size_t len, u8 *iv) 1048 */ 1049SYM_FUNC_START(aesni_ctr_enc) 1050 ANNOTATE_NOENDBR 1051 FRAME_BEGIN 1052 cmp $16, LEN 1053 jb .Lctr_enc_just_ret 1054 mov 480(KEYP), KLEN 1055 movups (IVP), IV 1056 call _aesni_inc_init 1057 cmp $64, LEN 1058 jb .Lctr_enc_loop1 1059.align 4 1060.Lctr_enc_loop4: 1061 movaps IV, STATE1 1062 call _aesni_inc 1063 movups (INP), IN1 1064 movaps IV, STATE2 1065 call _aesni_inc 1066 movups 0x10(INP), IN2 1067 movaps IV, STATE3 1068 call _aesni_inc 1069 movups 0x20(INP), IN3 1070 movaps IV, STATE4 1071 call _aesni_inc 1072 movups 0x30(INP), IN4 1073 call _aesni_enc4 1074 pxor IN1, STATE1 1075 movups STATE1, (OUTP) 1076 pxor IN2, STATE2 1077 movups STATE2, 0x10(OUTP) 1078 pxor IN3, STATE3 1079 movups STATE3, 0x20(OUTP) 1080 pxor IN4, STATE4 1081 movups STATE4, 0x30(OUTP) 1082 sub $64, LEN 1083 add $64, INP 1084 add $64, OUTP 1085 cmp $64, LEN 1086 jge .Lctr_enc_loop4 1087 cmp $16, LEN 1088 jb .Lctr_enc_ret 1089.align 4 1090.Lctr_enc_loop1: 1091 movaps IV, STATE 1092 call _aesni_inc 1093 movups (INP), IN 1094 call _aesni_enc1 1095 pxor IN, STATE 1096 movups STATE, (OUTP) 1097 sub $16, LEN 1098 add $16, INP 1099 add $16, OUTP 1100 cmp $16, LEN 1101 jge .Lctr_enc_loop1 1102.Lctr_enc_ret: 1103 movups IV, (IVP) 1104.Lctr_enc_just_ret: 1105 FRAME_END 1106 RET 1107SYM_FUNC_END(aesni_ctr_enc) 1108 1109#endif 1110 1111.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 1112.align 16 1113.Lgf128mul_x_ble_mask: 1114 .octa 0x00000000000000010000000000000087 1115.previous 1116 1117/* 1118 * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs 1119 * input: 1120 * IV: current IV 1121 * GF128MUL_MASK == mask with 0x87 and 0x01 1122 * output: 1123 * IV: next IV 1124 * changed: 1125 * KEY: == temporary value 1126 */ 1127.macro _aesni_gf128mul_x_ble 1128 pshufd $0x13, IV, KEY 1129 paddq IV, IV 1130 psrad $31, KEY 1131 pand GF128MUL_MASK, KEY 1132 pxor KEY, IV 1133.endm 1134 1135.macro _aesni_xts_crypt enc 1136 FRAME_BEGIN 1137#ifndef __x86_64__ 1138 pushl IVP 1139 pushl LEN 1140 pushl KEYP 1141 pushl KLEN 1142 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 1143 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 1144 movl (FRAME_OFFSET+28)(%esp), INP # src 1145 movl (FRAME_OFFSET+32)(%esp), LEN # len 1146 movl (FRAME_OFFSET+36)(%esp), IVP # iv 1147 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 1148#else 1149 movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK 1150#endif 1151 movups (IVP), IV 1152 1153 mov 480(KEYP), KLEN 1154.if !\enc 1155 add $240, KEYP 1156 1157 test $15, LEN 1158 jz .Lxts_loop4\@ 1159 sub $16, LEN 1160.endif 1161 1162.Lxts_loop4\@: 1163 sub $64, LEN 1164 jl .Lxts_1x\@ 1165 1166 movdqa IV, STATE1 1167 movdqu 0x00(INP), IN 1168 pxor IN, STATE1 1169 movdqu IV, 0x00(OUTP) 1170 1171 _aesni_gf128mul_x_ble 1172 movdqa IV, STATE2 1173 movdqu 0x10(INP), IN 1174 pxor IN, STATE2 1175 movdqu IV, 0x10(OUTP) 1176 1177 _aesni_gf128mul_x_ble 1178 movdqa IV, STATE3 1179 movdqu 0x20(INP), IN 1180 pxor IN, STATE3 1181 movdqu IV, 0x20(OUTP) 1182 1183 _aesni_gf128mul_x_ble 1184 movdqa IV, STATE4 1185 movdqu 0x30(INP), IN 1186 pxor IN, STATE4 1187 movdqu IV, 0x30(OUTP) 1188 1189.if \enc 1190 call _aesni_enc4 1191.else 1192 call _aesni_dec4 1193.endif 1194 1195 movdqu 0x00(OUTP), IN 1196 pxor IN, STATE1 1197 movdqu STATE1, 0x00(OUTP) 1198 1199 movdqu 0x10(OUTP), IN 1200 pxor IN, STATE2 1201 movdqu STATE2, 0x10(OUTP) 1202 1203 movdqu 0x20(OUTP), IN 1204 pxor IN, STATE3 1205 movdqu STATE3, 0x20(OUTP) 1206 1207 movdqu 0x30(OUTP), IN 1208 pxor IN, STATE4 1209 movdqu STATE4, 0x30(OUTP) 1210 1211 _aesni_gf128mul_x_ble 1212 1213 add $64, INP 1214 add $64, OUTP 1215 test LEN, LEN 1216 jnz .Lxts_loop4\@ 1217 1218.Lxts_ret_iv\@: 1219 movups IV, (IVP) 1220 1221.Lxts_ret\@: 1222#ifndef __x86_64__ 1223 popl KLEN 1224 popl KEYP 1225 popl LEN 1226 popl IVP 1227#endif 1228 FRAME_END 1229 RET 1230 1231.Lxts_1x\@: 1232 add $64, LEN 1233 jz .Lxts_ret_iv\@ 1234.if \enc 1235 sub $16, LEN 1236 jl .Lxts_cts4\@ 1237.endif 1238 1239.Lxts_loop1\@: 1240 movdqu (INP), STATE 1241.if \enc 1242 pxor IV, STATE 1243 call _aesni_enc1 1244.else 1245 add $16, INP 1246 sub $16, LEN 1247 jl .Lxts_cts1\@ 1248 pxor IV, STATE 1249 call _aesni_dec1 1250.endif 1251 pxor IV, STATE 1252 _aesni_gf128mul_x_ble 1253 1254 test LEN, LEN 1255 jz .Lxts_out\@ 1256 1257.if \enc 1258 add $16, INP 1259 sub $16, LEN 1260 jl .Lxts_cts1\@ 1261.endif 1262 1263 movdqu STATE, (OUTP) 1264 add $16, OUTP 1265 jmp .Lxts_loop1\@ 1266 1267.Lxts_out\@: 1268 movdqu STATE, (OUTP) 1269 jmp .Lxts_ret_iv\@ 1270 1271.if \enc 1272.Lxts_cts4\@: 1273 movdqa STATE4, STATE 1274 sub $16, OUTP 1275.Lxts_cts1\@: 1276.else 1277.Lxts_cts1\@: 1278 movdqa IV, STATE4 1279 _aesni_gf128mul_x_ble 1280 1281 pxor IV, STATE 1282 call _aesni_dec1 1283 pxor IV, STATE 1284.endif 1285#ifndef __x86_64__ 1286 lea .Lcts_permute_table, T1 1287#else 1288 lea .Lcts_permute_table(%rip), T1 1289#endif 1290 add LEN, INP /* rewind input pointer */ 1291 add $16, LEN /* # bytes in final block */ 1292 movups (INP), IN1 1293 1294 mov T1, IVP 1295 add $32, IVP 1296 add LEN, T1 1297 sub LEN, IVP 1298 add OUTP, LEN 1299 1300 movups (T1), %xmm4 1301 movaps STATE, IN2 1302 pshufb %xmm4, STATE 1303 movups STATE, (LEN) 1304 1305 movups (IVP), %xmm0 1306 pshufb %xmm0, IN1 1307 pblendvb IN2, IN1 1308 movaps IN1, STATE 1309 1310.if \enc 1311 pxor IV, STATE 1312 call _aesni_enc1 1313 pxor IV, STATE 1314.else 1315 pxor STATE4, STATE 1316 call _aesni_dec1 1317 pxor STATE4, STATE 1318.endif 1319 1320 movups STATE, (OUTP) 1321 jmp .Lxts_ret\@ 1322.endm 1323 1324/* 1325 * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, 1326 * const u8 *src, unsigned int len, le128 *iv) 1327 */ 1328SYM_FUNC_START(aesni_xts_enc) 1329 _aesni_xts_crypt 1 1330SYM_FUNC_END(aesni_xts_enc) 1331 1332/* 1333 * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst, 1334 * const u8 *src, unsigned int len, le128 *iv) 1335 */ 1336SYM_FUNC_START(aesni_xts_dec) 1337 _aesni_xts_crypt 0 1338SYM_FUNC_END(aesni_xts_dec) 1339