1d6699d29SAndrew Turner /*-
2d6699d29SAndrew Turner * Copyright (c) 2016 The FreeBSD Foundation
3ed9b7f44SOleksandr Tymoshenko * Copyright (c) 2020 Ampere Computing
4d6699d29SAndrew Turner * All rights reserved.
5d6699d29SAndrew Turner *
6d6699d29SAndrew Turner * This software was developed by Andrew Turner under
7d6699d29SAndrew Turner * sponsorship from the FreeBSD Foundation.
8d6699d29SAndrew Turner *
9d6699d29SAndrew Turner * Redistribution and use in source and binary forms, with or without
10d6699d29SAndrew Turner * modification, are permitted provided that the following conditions
11d6699d29SAndrew Turner * are met:
12d6699d29SAndrew Turner * 1. Redistributions of source code must retain the above copyright
13d6699d29SAndrew Turner * notice, this list of conditions and the following disclaimer.
14d6699d29SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright
15d6699d29SAndrew Turner * notice, this list of conditions and the following disclaimer in the
16d6699d29SAndrew Turner * documentation and/or other materials provided with the distribution.
17d6699d29SAndrew Turner *
18d6699d29SAndrew Turner * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19d6699d29SAndrew Turner * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20d6699d29SAndrew Turner * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21d6699d29SAndrew Turner * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22d6699d29SAndrew Turner * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23d6699d29SAndrew Turner * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24d6699d29SAndrew Turner * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25d6699d29SAndrew Turner * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26d6699d29SAndrew Turner * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27d6699d29SAndrew Turner * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28d6699d29SAndrew Turner * SUCH DAMAGE.
29da45b462SMitchell Horne *
30da45b462SMitchell Horne * This file is derived from aesni_wrap.c:
31da45b462SMitchell Horne * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
32da45b462SMitchell Horne * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
33da45b462SMitchell Horne * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
34da45b462SMitchell Horne * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
35da45b462SMitchell Horne * Copyright (c) 2014 The FreeBSD Foundation
36d6699d29SAndrew Turner */
37d6699d29SAndrew Turner
38d6699d29SAndrew Turner /*
39d6699d29SAndrew Turner * This code is built with floating-point enabled. Make sure to have entered
40d6699d29SAndrew Turner * into floating-point context before calling any of these functions.
41d6699d29SAndrew Turner */
42d6699d29SAndrew Turner
43d6699d29SAndrew Turner #include <sys/param.h>
44d6699d29SAndrew Turner #include <sys/systm.h>
45d6699d29SAndrew Turner #include <sys/malloc.h>
46d6699d29SAndrew Turner #include <sys/queue.h>
47d6699d29SAndrew Turner
48d6699d29SAndrew Turner #include <opencrypto/cryptodev.h>
49ed9b7f44SOleksandr Tymoshenko #include <opencrypto/gmac.h>
50ed9b7f44SOleksandr Tymoshenko #include <crypto/rijndael/rijndael.h>
51d6699d29SAndrew Turner #include <crypto/armv8/armv8_crypto.h>
52d6699d29SAndrew Turner
53d6699d29SAndrew Turner #include <arm_neon.h>
54d6699d29SAndrew Turner
55d6699d29SAndrew Turner static uint8x16_t
armv8_aes_enc(int rounds,const uint8x16_t * keysched,const uint8x16_t from)56d6699d29SAndrew Turner armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
57d6699d29SAndrew Turner {
58d6699d29SAndrew Turner uint8x16_t tmp;
59d6699d29SAndrew Turner int i;
60d6699d29SAndrew Turner
61d6699d29SAndrew Turner tmp = from;
62d6699d29SAndrew Turner for (i = 0; i < rounds - 1; i += 2) {
63d6699d29SAndrew Turner tmp = vaeseq_u8(tmp, keysched[i]);
64d6699d29SAndrew Turner tmp = vaesmcq_u8(tmp);
65d6699d29SAndrew Turner tmp = vaeseq_u8(tmp, keysched[i + 1]);
66d6699d29SAndrew Turner tmp = vaesmcq_u8(tmp);
67d6699d29SAndrew Turner }
68d6699d29SAndrew Turner
69d6699d29SAndrew Turner tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
70d6699d29SAndrew Turner tmp = vaesmcq_u8(tmp);
71d6699d29SAndrew Turner tmp = vaeseq_u8(tmp, keysched[rounds]);
72d6699d29SAndrew Turner tmp = veorq_u8(tmp, keysched[rounds + 1]);
73d6699d29SAndrew Turner
74d6699d29SAndrew Turner return (tmp);
75d6699d29SAndrew Turner }
76d6699d29SAndrew Turner
77d6699d29SAndrew Turner static uint8x16_t
armv8_aes_dec(int rounds,const uint8x16_t * keysched,const uint8x16_t from)78d6699d29SAndrew Turner armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
79d6699d29SAndrew Turner {
80d6699d29SAndrew Turner uint8x16_t tmp;
81d6699d29SAndrew Turner int i;
82d6699d29SAndrew Turner
83d6699d29SAndrew Turner tmp = from;
84d6699d29SAndrew Turner for (i = 0; i < rounds - 1; i += 2) {
85d6699d29SAndrew Turner tmp = vaesdq_u8(tmp, keysched[i]);
86d6699d29SAndrew Turner tmp = vaesimcq_u8(tmp);
87d6699d29SAndrew Turner tmp = vaesdq_u8(tmp, keysched[i+1]);
88d6699d29SAndrew Turner tmp = vaesimcq_u8(tmp);
89d6699d29SAndrew Turner }
90d6699d29SAndrew Turner
91d6699d29SAndrew Turner tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
92d6699d29SAndrew Turner tmp = vaesimcq_u8(tmp);
93d6699d29SAndrew Turner tmp = vaesdq_u8(tmp, keysched[rounds]);
94d6699d29SAndrew Turner tmp = veorq_u8(tmp, keysched[rounds + 1]);
95d6699d29SAndrew Turner
96d6699d29SAndrew Turner return (tmp);
97d6699d29SAndrew Turner }
98d6699d29SAndrew Turner
99d6699d29SAndrew Turner void
armv8_aes_encrypt_cbc(const AES_key_t * key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])100ed9b7f44SOleksandr Tymoshenko armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
101*26b08c5dSMark Johnston struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
102*26b08c5dSMark Johnston const uint8_t iv[static AES_BLOCK_LEN])
103d6699d29SAndrew Turner {
104d6699d29SAndrew Turner uint8x16_t tot, ivreg, tmp;
105*26b08c5dSMark Johnston uint8_t block[AES_BLOCK_LEN], *from, *to;
106*26b08c5dSMark Johnston size_t fromseglen, oseglen, seglen, toseglen;
107d6699d29SAndrew Turner
108*26b08c5dSMark Johnston KASSERT(len % AES_BLOCK_LEN == 0,
109*26b08c5dSMark Johnston ("%s: length %zu not a multiple of the block size", __func__, len));
110*26b08c5dSMark Johnston
111d6699d29SAndrew Turner ivreg = vld1q_u8(iv);
112*26b08c5dSMark Johnston for (; len > 0; len -= seglen) {
113*26b08c5dSMark Johnston from = crypto_cursor_segment(fromc, &fromseglen);
114*26b08c5dSMark Johnston to = crypto_cursor_segment(toc, &toseglen);
115*26b08c5dSMark Johnston
116*26b08c5dSMark Johnston seglen = ulmin(len, ulmin(fromseglen, toseglen));
117*26b08c5dSMark Johnston if (seglen < AES_BLOCK_LEN) {
118*26b08c5dSMark Johnston crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
119*26b08c5dSMark Johnston tmp = vld1q_u8(block);
120ed9b7f44SOleksandr Tymoshenko tot = armv8_aes_enc(key->aes_rounds - 1,
121ed9b7f44SOleksandr Tymoshenko (const void *)key->aes_key, veorq_u8(tmp, ivreg));
122d6699d29SAndrew Turner ivreg = tot;
123*26b08c5dSMark Johnston vst1q_u8(block, tot);
124*26b08c5dSMark Johnston crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
125*26b08c5dSMark Johnston seglen = AES_BLOCK_LEN;
126*26b08c5dSMark Johnston } else {
127*26b08c5dSMark Johnston for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
128*26b08c5dSMark Johnston seglen -= AES_BLOCK_LEN) {
129*26b08c5dSMark Johnston tmp = vld1q_u8(from);
130*26b08c5dSMark Johnston tot = armv8_aes_enc(key->aes_rounds - 1,
131*26b08c5dSMark Johnston (const void *)key->aes_key,
132*26b08c5dSMark Johnston veorq_u8(tmp, ivreg));
133*26b08c5dSMark Johnston ivreg = tot;
134d6699d29SAndrew Turner vst1q_u8(to, tot);
135d6699d29SAndrew Turner from += AES_BLOCK_LEN;
136d6699d29SAndrew Turner to += AES_BLOCK_LEN;
137d6699d29SAndrew Turner }
138*26b08c5dSMark Johnston seglen = oseglen - seglen;
139*26b08c5dSMark Johnston crypto_cursor_advance(fromc, seglen);
140*26b08c5dSMark Johnston crypto_cursor_advance(toc, seglen);
141*26b08c5dSMark Johnston }
142*26b08c5dSMark Johnston }
143*26b08c5dSMark Johnston
144*26b08c5dSMark Johnston explicit_bzero(block, sizeof(block));
145d6699d29SAndrew Turner }
146d6699d29SAndrew Turner
147d6699d29SAndrew Turner void
armv8_aes_decrypt_cbc(const AES_key_t * key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])148ed9b7f44SOleksandr Tymoshenko armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
149*26b08c5dSMark Johnston struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
150*26b08c5dSMark Johnston const uint8_t iv[static AES_BLOCK_LEN])
151d6699d29SAndrew Turner {
152d6699d29SAndrew Turner uint8x16_t ivreg, nextiv, tmp;
153*26b08c5dSMark Johnston uint8_t block[AES_BLOCK_LEN], *from, *to;
154*26b08c5dSMark Johnston size_t fromseglen, oseglen, seglen, toseglen;
155d6699d29SAndrew Turner
156*26b08c5dSMark Johnston KASSERT(len % AES_BLOCK_LEN == 0,
157*26b08c5dSMark Johnston ("%s: length %zu not a multiple of the block size", __func__, len));
158*26b08c5dSMark Johnston
159d6699d29SAndrew Turner ivreg = vld1q_u8(iv);
160*26b08c5dSMark Johnston for (; len > 0; len -= seglen) {
161*26b08c5dSMark Johnston from = crypto_cursor_segment(fromc, &fromseglen);
162*26b08c5dSMark Johnston to = crypto_cursor_segment(toc, &toseglen);
163*26b08c5dSMark Johnston
164*26b08c5dSMark Johnston seglen = ulmin(len, ulmin(fromseglen, toseglen));
165*26b08c5dSMark Johnston if (seglen < AES_BLOCK_LEN) {
166*26b08c5dSMark Johnston crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
167*26b08c5dSMark Johnston nextiv = vld1q_u8(block);
168ed9b7f44SOleksandr Tymoshenko tmp = armv8_aes_dec(key->aes_rounds - 1,
169ed9b7f44SOleksandr Tymoshenko (const void *)key->aes_key, nextiv);
170*26b08c5dSMark Johnston vst1q_u8(block, veorq_u8(tmp, ivreg));
171d6699d29SAndrew Turner ivreg = nextiv;
172*26b08c5dSMark Johnston crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
173*26b08c5dSMark Johnston seglen = AES_BLOCK_LEN;
174*26b08c5dSMark Johnston } else {
175*26b08c5dSMark Johnston for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
176*26b08c5dSMark Johnston seglen -= AES_BLOCK_LEN) {
177*26b08c5dSMark Johnston nextiv = vld1q_u8(from);
178*26b08c5dSMark Johnston tmp = armv8_aes_dec(key->aes_rounds - 1,
179*26b08c5dSMark Johnston (const void *)key->aes_key, nextiv);
180*26b08c5dSMark Johnston vst1q_u8(to, veorq_u8(tmp, ivreg));
181*26b08c5dSMark Johnston ivreg = nextiv;
182*26b08c5dSMark Johnston from += AES_BLOCK_LEN;
183*26b08c5dSMark Johnston to += AES_BLOCK_LEN;
184d6699d29SAndrew Turner }
185*26b08c5dSMark Johnston crypto_cursor_advance(fromc, oseglen - seglen);
186*26b08c5dSMark Johnston crypto_cursor_advance(toc, oseglen - seglen);
187*26b08c5dSMark Johnston seglen = oseglen - seglen;
188*26b08c5dSMark Johnston }
189*26b08c5dSMark Johnston }
190*26b08c5dSMark Johnston
191*26b08c5dSMark Johnston explicit_bzero(block, sizeof(block));
192d6699d29SAndrew Turner }
1934979620eSMitchell Horne
1944979620eSMitchell Horne #define AES_XTS_BLOCKSIZE 16
1954979620eSMitchell Horne #define AES_XTS_IVSIZE 8
1964979620eSMitchell Horne #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
1974979620eSMitchell Horne
1984979620eSMitchell Horne static inline int32x4_t
xts_crank_lfsr(int32x4_t inp)1994979620eSMitchell Horne xts_crank_lfsr(int32x4_t inp)
2004979620eSMitchell Horne {
2014979620eSMitchell Horne const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
2024979620eSMitchell Horne int32x4_t xtweak, ret;
2034979620eSMitchell Horne
2044979620eSMitchell Horne /* set up xor mask */
2054979620eSMitchell Horne xtweak = vextq_s32(inp, inp, 3);
2064979620eSMitchell Horne xtweak = vshrq_n_s32(xtweak, 31);
2074979620eSMitchell Horne xtweak &= alphamask;
2084979620eSMitchell Horne
2094979620eSMitchell Horne /* next term */
2104979620eSMitchell Horne ret = vshlq_n_s32(inp, 1);
2114979620eSMitchell Horne ret ^= xtweak;
2124979620eSMitchell Horne
2134979620eSMitchell Horne return ret;
2144979620eSMitchell Horne }
2154979620eSMitchell Horne
2164979620eSMitchell Horne static void
armv8_aes_crypt_xts_block(int rounds,const uint8x16_t * key_schedule,uint8x16_t * tweak,const uint8_t * from,uint8_t * to,int do_encrypt)2174979620eSMitchell Horne armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
2184979620eSMitchell Horne uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
2194979620eSMitchell Horne {
2204979620eSMitchell Horne uint8x16_t block;
2214979620eSMitchell Horne
2224979620eSMitchell Horne block = vld1q_u8(from) ^ *tweak;
2234979620eSMitchell Horne
2244979620eSMitchell Horne if (do_encrypt)
2254979620eSMitchell Horne block = armv8_aes_enc(rounds - 1, key_schedule, block);
2264979620eSMitchell Horne else
2274979620eSMitchell Horne block = armv8_aes_dec(rounds - 1, key_schedule, block);
2284979620eSMitchell Horne
2294979620eSMitchell Horne vst1q_u8(to, block ^ *tweak);
2304979620eSMitchell Horne
2314979620eSMitchell Horne *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
2324979620eSMitchell Horne }
2334979620eSMitchell Horne
2344979620eSMitchell Horne static void
armv8_aes_crypt_xts(int rounds,const uint8x16_t * data_schedule,const uint8x16_t * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN],int do_encrypt)2354979620eSMitchell Horne armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
236*26b08c5dSMark Johnston const uint8x16_t *tweak_schedule, size_t len,
237*26b08c5dSMark Johnston struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
238*26b08c5dSMark Johnston const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
2394979620eSMitchell Horne {
2404979620eSMitchell Horne uint8x16_t tweakreg;
241*26b08c5dSMark Johnston uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16);
2424979620eSMitchell Horne uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
243*26b08c5dSMark Johnston uint8_t *from, *to;
244*26b08c5dSMark Johnston size_t fromseglen, oseglen, seglen, toseglen;
245*26b08c5dSMark Johnston
246*26b08c5dSMark Johnston KASSERT(len % AES_XTS_BLOCKSIZE == 0,
247*26b08c5dSMark Johnston ("%s: length %zu not a multiple of the block size", __func__, len));
2484979620eSMitchell Horne
2494979620eSMitchell Horne /*
2504979620eSMitchell Horne * Prepare tweak as E_k2(IV). IV is specified as LE representation
2514979620eSMitchell Horne * of a 64-bit block number which we allow to be passed in directly.
2524979620eSMitchell Horne */
2534979620eSMitchell Horne #if BYTE_ORDER == LITTLE_ENDIAN
2544979620eSMitchell Horne bcopy(iv, tweak, AES_XTS_IVSIZE);
2554979620eSMitchell Horne /* Last 64 bits of IV are always zero. */
2564979620eSMitchell Horne bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
2574979620eSMitchell Horne #else
2584979620eSMitchell Horne #error Only LITTLE_ENDIAN architectures are supported.
2594979620eSMitchell Horne #endif
2604979620eSMitchell Horne tweakreg = vld1q_u8(tweak);
2614979620eSMitchell Horne tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
2624979620eSMitchell Horne
263*26b08c5dSMark Johnston for (; len > 0; len -= seglen) {
264*26b08c5dSMark Johnston from = crypto_cursor_segment(fromc, &fromseglen);
265*26b08c5dSMark Johnston to = crypto_cursor_segment(toc, &toseglen);
266*26b08c5dSMark Johnston
267*26b08c5dSMark Johnston seglen = ulmin(len, ulmin(fromseglen, toseglen));
268*26b08c5dSMark Johnston if (seglen < AES_XTS_BLOCKSIZE) {
269*26b08c5dSMark Johnston crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block);
270*26b08c5dSMark Johnston armv8_aes_crypt_xts_block(rounds, data_schedule,
271*26b08c5dSMark Johnston &tweakreg, block, block, do_encrypt);
272*26b08c5dSMark Johnston crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block);
273*26b08c5dSMark Johnston seglen = AES_XTS_BLOCKSIZE;
274*26b08c5dSMark Johnston } else {
275*26b08c5dSMark Johnston for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE;
276*26b08c5dSMark Johnston seglen -= AES_XTS_BLOCKSIZE) {
277*26b08c5dSMark Johnston armv8_aes_crypt_xts_block(rounds, data_schedule,
278*26b08c5dSMark Johnston &tweakreg, from, to, do_encrypt);
2794979620eSMitchell Horne from += AES_XTS_BLOCKSIZE;
2804979620eSMitchell Horne to += AES_XTS_BLOCKSIZE;
2814979620eSMitchell Horne }
282*26b08c5dSMark Johnston seglen = oseglen - seglen;
283*26b08c5dSMark Johnston crypto_cursor_advance(fromc, seglen);
284*26b08c5dSMark Johnston crypto_cursor_advance(toc, seglen);
285*26b08c5dSMark Johnston }
286*26b08c5dSMark Johnston }
287*26b08c5dSMark Johnston
288*26b08c5dSMark Johnston explicit_bzero(block, sizeof(block));
2894979620eSMitchell Horne }
2904979620eSMitchell Horne
2914979620eSMitchell Horne void
armv8_aes_encrypt_xts(AES_key_t * data_schedule,const void * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])292ed9b7f44SOleksandr Tymoshenko armv8_aes_encrypt_xts(AES_key_t *data_schedule,
293*26b08c5dSMark Johnston const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc,
294*26b08c5dSMark Johnston struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN])
2954979620eSMitchell Horne {
296ed9b7f44SOleksandr Tymoshenko armv8_aes_crypt_xts(data_schedule->aes_rounds,
297*26b08c5dSMark Johnston (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
298*26b08c5dSMark Johnston toc, iv, 1);
2994979620eSMitchell Horne }
3004979620eSMitchell Horne
3014979620eSMitchell Horne void
armv8_aes_decrypt_xts(AES_key_t * data_schedule,const void * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])302ed9b7f44SOleksandr Tymoshenko armv8_aes_decrypt_xts(AES_key_t *data_schedule,
303*26b08c5dSMark Johnston const void *tweak_schedule, size_t len,
304*26b08c5dSMark Johnston struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
3054979620eSMitchell Horne const uint8_t iv[static AES_BLOCK_LEN])
3064979620eSMitchell Horne {
307ed9b7f44SOleksandr Tymoshenko armv8_aes_crypt_xts(data_schedule->aes_rounds,
308*26b08c5dSMark Johnston (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
309*26b08c5dSMark Johnston toc, iv, 0);
310ed9b7f44SOleksandr Tymoshenko
311ed9b7f44SOleksandr Tymoshenko }
312ed9b7f44SOleksandr Tymoshenko #define AES_INC_COUNTER(counter) \
313ed9b7f44SOleksandr Tymoshenko do { \
314ed9b7f44SOleksandr Tymoshenko for (int pos = AES_BLOCK_LEN - 1; \
315ed9b7f44SOleksandr Tymoshenko pos >= 0; pos--) \
316ed9b7f44SOleksandr Tymoshenko if (++(counter)[pos]) \
317ed9b7f44SOleksandr Tymoshenko break; \
318ed9b7f44SOleksandr Tymoshenko } while (0)
319ed9b7f44SOleksandr Tymoshenko
3207509b677SMark Johnston struct armv8_gcm_state {
3217509b677SMark Johnston __uint128_val_t EK0;
3227509b677SMark Johnston __uint128_val_t EKi;
3237509b677SMark Johnston __uint128_val_t Xi;
3247509b677SMark Johnston __uint128_val_t lenblock;
3257509b677SMark Johnston uint8_t aes_counter[AES_BLOCK_LEN];
3267509b677SMark Johnston };
3277509b677SMark Johnston
3280b3235efSMark Johnston static void
armv8_aes_gmac_setup(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint8_t * authdata,size_t authdatalen,const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)3290b3235efSMark Johnston armv8_aes_gmac_setup(struct armv8_gcm_state *s, AES_key_t *aes_key,
3300b3235efSMark Johnston const uint8_t *authdata, size_t authdatalen,
3310b3235efSMark Johnston const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable)
3320b3235efSMark Johnston {
3330b3235efSMark Johnston uint8_t block[AES_BLOCK_LEN];
3340b3235efSMark Johnston size_t trailer;
3350b3235efSMark Johnston
3360b3235efSMark Johnston bzero(s->aes_counter, AES_BLOCK_LEN);
3370b3235efSMark Johnston memcpy(s->aes_counter, iv, AES_GCM_IV_LEN);
3380b3235efSMark Johnston
3390b3235efSMark Johnston /* Setup the counter */
3400b3235efSMark Johnston s->aes_counter[AES_BLOCK_LEN - 1] = 1;
3410b3235efSMark Johnston
3420b3235efSMark Johnston /* EK0 for a final GMAC round */
3430b3235efSMark Johnston aes_v8_encrypt(s->aes_counter, s->EK0.c, aes_key);
3440b3235efSMark Johnston
3450b3235efSMark Johnston /* GCM starts with 2 as counter, 1 is used for final xor of tag. */
3460b3235efSMark Johnston s->aes_counter[AES_BLOCK_LEN - 1] = 2;
3470b3235efSMark Johnston
3480b3235efSMark Johnston memset(s->Xi.c, 0, sizeof(s->Xi.c));
3490b3235efSMark Johnston trailer = authdatalen % AES_BLOCK_LEN;
3500b3235efSMark Johnston if (authdatalen - trailer > 0) {
3510b3235efSMark Johnston gcm_ghash_v8(s->Xi.u, Htable, authdata, authdatalen - trailer);
3520b3235efSMark Johnston authdata += authdatalen - trailer;
3530b3235efSMark Johnston }
3540b3235efSMark Johnston if (trailer > 0 || authdatalen == 0) {
3550b3235efSMark Johnston memset(block, 0, sizeof(block));
3560b3235efSMark Johnston memcpy(block, authdata, trailer);
3570b3235efSMark Johnston gcm_ghash_v8(s->Xi.u, Htable, block, AES_BLOCK_LEN);
3580b3235efSMark Johnston }
3590b3235efSMark Johnston }
3600b3235efSMark Johnston
3610b3235efSMark Johnston static void
armv8_aes_gmac_finish(struct armv8_gcm_state * s,size_t len,size_t authdatalen,const __uint128_val_t * Htable)3620b3235efSMark Johnston armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len,
3630b3235efSMark Johnston size_t authdatalen, const __uint128_val_t *Htable)
3640b3235efSMark Johnston {
3650b3235efSMark Johnston /* Lengths block */
3660b3235efSMark Johnston s->lenblock.u[0] = s->lenblock.u[1] = 0;
3670b3235efSMark Johnston s->lenblock.d[1] = htobe32(authdatalen * 8);
3680b3235efSMark Johnston s->lenblock.d[3] = htobe32(len * 8);
3690b3235efSMark Johnston gcm_ghash_v8(s->Xi.u, Htable, s->lenblock.c, AES_BLOCK_LEN);
3700b3235efSMark Johnston
3710b3235efSMark Johnston s->Xi.u[0] ^= s->EK0.u[0];
3720b3235efSMark Johnston s->Xi.u[1] ^= s->EK0.u[1];
3730b3235efSMark Johnston }
3740b3235efSMark Johnston
375*26b08c5dSMark Johnston static void
armv8_aes_encrypt_gcm_block(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint64_t * from,uint64_t * to)376*26b08c5dSMark Johnston armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
377*26b08c5dSMark Johnston const uint64_t *from, uint64_t *to)
378*26b08c5dSMark Johnston {
379*26b08c5dSMark Johnston aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key);
380*26b08c5dSMark Johnston AES_INC_COUNTER(s->aes_counter);
381*26b08c5dSMark Johnston to[0] = from[0] ^ s->EKi.u[0];
382*26b08c5dSMark Johnston to[1] = from[1] ^ s->EKi.u[1];
383*26b08c5dSMark Johnston }
384*26b08c5dSMark Johnston
385*26b08c5dSMark Johnston static void
armv8_aes_decrypt_gcm_block(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint64_t * from,uint64_t * to)386*26b08c5dSMark Johnston armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
387*26b08c5dSMark Johnston const uint64_t *from, uint64_t *to)
388*26b08c5dSMark Johnston {
389*26b08c5dSMark Johnston armv8_aes_encrypt_gcm_block(s, aes_key, from, to);
390*26b08c5dSMark Johnston }
391*26b08c5dSMark Johnston
392ed9b7f44SOleksandr Tymoshenko void
armv8_aes_encrypt_gcm(AES_key_t * aes_key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,size_t authdatalen,const uint8_t * authdata,uint8_t tag[static GMAC_DIGEST_LEN],const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)393ed9b7f44SOleksandr Tymoshenko armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
394*26b08c5dSMark Johnston struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
395ed9b7f44SOleksandr Tymoshenko size_t authdatalen, const uint8_t *authdata,
396ed9b7f44SOleksandr Tymoshenko uint8_t tag[static GMAC_DIGEST_LEN],
397ed9b7f44SOleksandr Tymoshenko const uint8_t iv[static AES_GCM_IV_LEN],
398ed9b7f44SOleksandr Tymoshenko const __uint128_val_t *Htable)
399ed9b7f44SOleksandr Tymoshenko {
4007509b677SMark Johnston struct armv8_gcm_state s;
401*26b08c5dSMark Johnston uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN);
402*26b08c5dSMark Johnston uint64_t *from64, *to64;
403*26b08c5dSMark Johnston size_t fromseglen, i, olen, oseglen, seglen, toseglen;
404ed9b7f44SOleksandr Tymoshenko
4050b3235efSMark Johnston armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
406ed9b7f44SOleksandr Tymoshenko
407*26b08c5dSMark Johnston for (olen = len; len > 0; len -= seglen) {
408*26b08c5dSMark Johnston from64 = crypto_cursor_segment(fromc, &fromseglen);
409*26b08c5dSMark Johnston to64 = crypto_cursor_segment(toc, &toseglen);
410ed9b7f44SOleksandr Tymoshenko
411*26b08c5dSMark Johnston seglen = ulmin(len, ulmin(fromseglen, toseglen));
412*26b08c5dSMark Johnston if (seglen < AES_BLOCK_LEN) {
413*26b08c5dSMark Johnston seglen = ulmin(len, AES_BLOCK_LEN);
414ed9b7f44SOleksandr Tymoshenko
4150dc70760SMark Johnston memset(block, 0, sizeof(block));
416*26b08c5dSMark Johnston crypto_cursor_copydata(fromc, (int)seglen, block);
417*26b08c5dSMark Johnston
418*26b08c5dSMark Johnston if (seglen == AES_BLOCK_LEN) {
419*26b08c5dSMark Johnston armv8_aes_encrypt_gcm_block(&s, aes_key,
420*26b08c5dSMark Johnston (uint64_t *)block, (uint64_t *)block);
421*26b08c5dSMark Johnston } else {
422*26b08c5dSMark Johnston aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
423*26b08c5dSMark Johnston AES_INC_COUNTER(s.aes_counter);
424*26b08c5dSMark Johnston for (i = 0; i < seglen; i++)
425*26b08c5dSMark Johnston block[i] ^= s.EKi.c[i];
426*26b08c5dSMark Johnston }
427*26b08c5dSMark Johnston gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
428*26b08c5dSMark Johnston
429*26b08c5dSMark Johnston crypto_cursor_copyback(toc, (int)seglen, block);
430*26b08c5dSMark Johnston } else {
431*26b08c5dSMark Johnston for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
432*26b08c5dSMark Johnston seglen -= AES_BLOCK_LEN) {
433*26b08c5dSMark Johnston armv8_aes_encrypt_gcm_block(&s, aes_key, from64,
434*26b08c5dSMark Johnston to64);
435*26b08c5dSMark Johnston gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64,
436*26b08c5dSMark Johnston AES_BLOCK_LEN);
437*26b08c5dSMark Johnston
438*26b08c5dSMark Johnston from64 += 2;
439*26b08c5dSMark Johnston to64 += 2;
440ed9b7f44SOleksandr Tymoshenko }
441ed9b7f44SOleksandr Tymoshenko
442*26b08c5dSMark Johnston seglen = oseglen - seglen;
443*26b08c5dSMark Johnston crypto_cursor_advance(fromc, seglen);
444*26b08c5dSMark Johnston crypto_cursor_advance(toc, seglen);
445*26b08c5dSMark Johnston }
446ed9b7f44SOleksandr Tymoshenko }
447ed9b7f44SOleksandr Tymoshenko
448*26b08c5dSMark Johnston armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
4497509b677SMark Johnston memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
450ed9b7f44SOleksandr Tymoshenko
451*26b08c5dSMark Johnston explicit_bzero(block, sizeof(block));
4527509b677SMark Johnston explicit_bzero(&s, sizeof(s));
453ed9b7f44SOleksandr Tymoshenko }
454ed9b7f44SOleksandr Tymoshenko
455ed9b7f44SOleksandr Tymoshenko int
armv8_aes_decrypt_gcm(AES_key_t * aes_key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,size_t authdatalen,const uint8_t * authdata,const uint8_t tag[static GMAC_DIGEST_LEN],const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)456ed9b7f44SOleksandr Tymoshenko armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
457*26b08c5dSMark Johnston struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
458ed9b7f44SOleksandr Tymoshenko size_t authdatalen, const uint8_t *authdata,
459ed9b7f44SOleksandr Tymoshenko const uint8_t tag[static GMAC_DIGEST_LEN],
460ed9b7f44SOleksandr Tymoshenko const uint8_t iv[static AES_GCM_IV_LEN],
461ed9b7f44SOleksandr Tymoshenko const __uint128_val_t *Htable)
462ed9b7f44SOleksandr Tymoshenko {
4637509b677SMark Johnston struct armv8_gcm_state s;
464*26b08c5dSMark Johnston struct crypto_buffer_cursor fromcc;
465*26b08c5dSMark Johnston uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from;
466*26b08c5dSMark Johnston uint64_t *block64, *from64, *to64;
467*26b08c5dSMark Johnston size_t fromseglen, olen, oseglen, seglen, toseglen;
468ed9b7f44SOleksandr Tymoshenko int error;
469ed9b7f44SOleksandr Tymoshenko
4700b3235efSMark Johnston armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
471ed9b7f44SOleksandr Tymoshenko
472*26b08c5dSMark Johnston crypto_cursor_copy(fromc, &fromcc);
473*26b08c5dSMark Johnston for (olen = len; len > 0; len -= seglen) {
474*26b08c5dSMark Johnston from = crypto_cursor_segment(&fromcc, &fromseglen);
475*26b08c5dSMark Johnston seglen = ulmin(len, fromseglen);
476*26b08c5dSMark Johnston seglen -= seglen % AES_BLOCK_LEN;
477*26b08c5dSMark Johnston if (seglen > 0) {
478*26b08c5dSMark Johnston gcm_ghash_v8(s.Xi.u, Htable, from, seglen);
479*26b08c5dSMark Johnston crypto_cursor_advance(&fromcc, seglen);
480*26b08c5dSMark Johnston } else {
4810dc70760SMark Johnston memset(block, 0, sizeof(block));
482*26b08c5dSMark Johnston seglen = ulmin(len, AES_BLOCK_LEN);
483*26b08c5dSMark Johnston crypto_cursor_copydata(&fromcc, seglen, block);
484*26b08c5dSMark Johnston gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
485*26b08c5dSMark Johnston }
486ed9b7f44SOleksandr Tymoshenko }
487ed9b7f44SOleksandr Tymoshenko
488*26b08c5dSMark Johnston armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
489ed9b7f44SOleksandr Tymoshenko
4907509b677SMark Johnston if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
491ed9b7f44SOleksandr Tymoshenko error = EBADMSG;
492ed9b7f44SOleksandr Tymoshenko goto out;
493ed9b7f44SOleksandr Tymoshenko }
494ed9b7f44SOleksandr Tymoshenko
495*26b08c5dSMark Johnston block64 = (uint64_t *)block;
496*26b08c5dSMark Johnston for (len = olen; len > 0; len -= seglen) {
497*26b08c5dSMark Johnston from64 = crypto_cursor_segment(fromc, &fromseglen);
498*26b08c5dSMark Johnston to64 = crypto_cursor_segment(toc, &toseglen);
499ed9b7f44SOleksandr Tymoshenko
500*26b08c5dSMark Johnston seglen = ulmin(len, ulmin(fromseglen, toseglen));
501*26b08c5dSMark Johnston if (seglen < AES_BLOCK_LEN) {
502*26b08c5dSMark Johnston seglen = ulmin(len, AES_BLOCK_LEN);
503*26b08c5dSMark Johnston
504*26b08c5dSMark Johnston memset(block, 0, sizeof(block));
505*26b08c5dSMark Johnston crypto_cursor_copydata(fromc, seglen, block);
506*26b08c5dSMark Johnston
507*26b08c5dSMark Johnston armv8_aes_decrypt_gcm_block(&s, aes_key, block64,
508*26b08c5dSMark Johnston block64);
509*26b08c5dSMark Johnston
510*26b08c5dSMark Johnston crypto_cursor_copyback(toc, (int)seglen, block);
511*26b08c5dSMark Johnston } else {
512*26b08c5dSMark Johnston for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
513*26b08c5dSMark Johnston seglen -= AES_BLOCK_LEN) {
514*26b08c5dSMark Johnston armv8_aes_decrypt_gcm_block(&s, aes_key, from64,
515*26b08c5dSMark Johnston to64);
516*26b08c5dSMark Johnston
517ed9b7f44SOleksandr Tymoshenko from64 += 2;
518*26b08c5dSMark Johnston to64 += 2;
519ed9b7f44SOleksandr Tymoshenko }
520ed9b7f44SOleksandr Tymoshenko
521*26b08c5dSMark Johnston seglen = oseglen - seglen;
522*26b08c5dSMark Johnston crypto_cursor_advance(fromc, seglen);
523*26b08c5dSMark Johnston crypto_cursor_advance(toc, seglen);
524*26b08c5dSMark Johnston }
525ed9b7f44SOleksandr Tymoshenko }
526ed9b7f44SOleksandr Tymoshenko
527*26b08c5dSMark Johnston error = 0;
528ed9b7f44SOleksandr Tymoshenko out:
529*26b08c5dSMark Johnston explicit_bzero(block, sizeof(block));
5307509b677SMark Johnston explicit_bzero(&s, sizeof(s));
531ed9b7f44SOleksandr Tymoshenko return (error);
5324979620eSMitchell Horne }
533