1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 4 * 5 * Copyright (C) 2013 - 2017 Linaro Ltd. 6 * Copyright (C) 2024 Google LLC 7 * 8 * Author: Ard Biesheuvel <ardb@kernel.org> 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .text 15 .arch armv8-a+crypto 16 17 .macro load_round_keys, rk, nr, tmp 18 sub w\tmp, \nr, #10 19 add \tmp, \rk, w\tmp, sxtw #4 20 ld1 {v10.4s-v13.4s}, [\rk] 21 ld1 {v14.4s-v17.4s}, [\tmp], #64 22 ld1 {v18.4s-v21.4s}, [\tmp], #64 23 ld1 {v3.4s-v5.4s}, [\tmp] 24 .endm 25 26 .macro dround, va, vb, vk 27 aese \va\().16b, \vk\().16b 28 aesmc \va\().16b, \va\().16b 29 aese \vb\().16b, \vk\().16b 30 aesmc \vb\().16b, \vb\().16b 31 .endm 32 33 .macro aes_encrypt, va, vb, nr 34 tbz \nr, #2, .L\@ 35 dround \va, \vb, v10 36 dround \va, \vb, v11 37 tbz \nr, #1, .L\@ 38 dround \va, \vb, v12 39 dround \va, \vb, v13 40.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3 41 dround \va, \vb, \v 42 .endr 43 aese \va\().16b, v4.16b 44 aese \vb\().16b, v4.16b 45 .endm 46 47 .macro aes_ccm_do_crypt,enc 48 load_round_keys x3, w4, x10 49 50 ld1 {v0.16b}, [x5] /* load mac */ 51 cbz x2, ce_aes_ccm_final 52 ldr x8, [x6, #8] /* load lower ctr */ 53CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 540: /* outer loop */ 55 ld1 {v1.8b}, [x6] /* load upper ctr */ 56 prfm pldl1strm, [x1] 57 add x8, x8, #1 58 rev x9, x8 59 ins v1.d[1], x9 /* no carry in lower ctr */ 60 61 aes_encrypt v0, v1, w4 62 63 subs w2, w2, #16 64 bmi ce_aes_ccm_crypt_tail 65 ld1 {v2.16b}, [x1], #16 /* load next input block */ 66 .if \enc == 1 67 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 68 eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */ 69 .else 70 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 71 eor v6.16b, v2.16b, v5.16b /* final round enc */ 72 .endif 73 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 74 st1 {v6.16b}, [x0], #16 /* write output block */ 75 bne 0b 76CPU_LE( rev x8, x8 ) 77 str x8, [x6, #8] /* store lsb end of ctr (BE) */ 78 cbnz x7, ce_aes_ccm_final 79 st1 {v0.16b}, [x5] /* store mac */ 80 ret 81 .endm 82 83SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail) 84 eor v0.16b, v0.16b, v5.16b /* final round mac */ 85 eor v1.16b, v1.16b, v5.16b /* final round enc */ 86 87 add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */ 88 add x0, x0, w2, sxtw /* rewind the output pointer */ 89 90 adr_l x8, .Lpermute /* load permute vectors */ 91 add x9, x8, w2, sxtw 92 sub x8, x8, w2, sxtw 93 ld1 {v7.16b-v8.16b}, [x9] 94 ld1 {v9.16b}, [x8] 95 96 ld1 {v2.16b}, [x1] /* load a full block of input */ 97 tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */ 98 eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */ 99 bif v2.16b, v7.16b, v22.16b /* select plaintext */ 100 tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */ 101 tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */ 102 eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */ 103 104 st1 {v7.16b}, [x0] /* store output block */ 105 cbz x7, 0f 106 107SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL) 108 ld1 {v1.16b}, [x7] /* load 1st ctriv */ 109 110 aes_encrypt v0, v1, w4 111 112 /* final round key cancels out */ 113 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 1140: st1 {v0.16b}, [x5] /* store result */ 115 ret 116SYM_FUNC_END(ce_aes_ccm_crypt_tail) 117 118 /* 119 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 120 * u8 const rk[], u32 rounds, u8 mac[], 121 * u8 ctr[], u8 const final_iv[]); 122 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 123 * u8 const rk[], u32 rounds, u8 mac[], 124 * u8 ctr[], u8 const final_iv[]); 125 */ 126SYM_FUNC_START(ce_aes_ccm_encrypt) 127 movi v22.16b, #255 128 aes_ccm_do_crypt 1 129SYM_FUNC_END(ce_aes_ccm_encrypt) 130 131SYM_FUNC_START(ce_aes_ccm_decrypt) 132 movi v22.16b, #0 133 aes_ccm_do_crypt 0 134SYM_FUNC_END(ce_aes_ccm_decrypt) 135 136 .section ".rodata", "a" 137 .align 6 138 .fill 15, 1, 0xff 139.Lpermute: 140 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 141 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 142 .fill 15, 1, 0xff 143