1 /*-
2 * Copyright (c) 2016 The FreeBSD Foundation
3 * Copyright (c) 2020 Ampere Computing
4 * All rights reserved.
5 *
6 * This software was developed by Andrew Turner under
7 * sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * This file is derived from aesni_wrap.c:
31 * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
32 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
33 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
34 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
35 * Copyright (c) 2014 The FreeBSD Foundation
36 */
37
38 /*
39 * This code is built with floating-point enabled. Make sure to have entered
40 * into floating-point context before calling any of these functions.
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47
48 #include <opencrypto/cryptodev.h>
49 #include <opencrypto/gmac.h>
50 #include <crypto/rijndael/rijndael.h>
51 #include <crypto/armv8/armv8_crypto.h>
52
53 #include <arm_neon.h>
54
55 static uint8x16_t
armv8_aes_enc(int rounds,const uint8x16_t * keysched,const uint8x16_t from)56 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
57 {
58 uint8x16_t tmp;
59 int i;
60
61 tmp = from;
62 for (i = 0; i < rounds - 1; i += 2) {
63 tmp = vaeseq_u8(tmp, keysched[i]);
64 tmp = vaesmcq_u8(tmp);
65 tmp = vaeseq_u8(tmp, keysched[i + 1]);
66 tmp = vaesmcq_u8(tmp);
67 }
68
69 tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
70 tmp = vaesmcq_u8(tmp);
71 tmp = vaeseq_u8(tmp, keysched[rounds]);
72 tmp = veorq_u8(tmp, keysched[rounds + 1]);
73
74 return (tmp);
75 }
76
77 static uint8x16_t
armv8_aes_dec(int rounds,const uint8x16_t * keysched,const uint8x16_t from)78 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
79 {
80 uint8x16_t tmp;
81 int i;
82
83 tmp = from;
84 for (i = 0; i < rounds - 1; i += 2) {
85 tmp = vaesdq_u8(tmp, keysched[i]);
86 tmp = vaesimcq_u8(tmp);
87 tmp = vaesdq_u8(tmp, keysched[i+1]);
88 tmp = vaesimcq_u8(tmp);
89 }
90
91 tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
92 tmp = vaesimcq_u8(tmp);
93 tmp = vaesdq_u8(tmp, keysched[rounds]);
94 tmp = veorq_u8(tmp, keysched[rounds + 1]);
95
96 return (tmp);
97 }
98
99 void
armv8_aes_encrypt_cbc(const AES_key_t * key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])100 armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
101 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
102 const uint8_t iv[static AES_BLOCK_LEN])
103 {
104 uint8x16_t tot, ivreg, tmp;
105 uint8_t block[AES_BLOCK_LEN], *from, *to;
106 size_t fromseglen, oseglen, seglen, toseglen;
107
108 KASSERT(len % AES_BLOCK_LEN == 0,
109 ("%s: length %zu not a multiple of the block size", __func__, len));
110
111 ivreg = vld1q_u8(iv);
112 for (; len > 0; len -= seglen) {
113 from = crypto_cursor_segment(fromc, &fromseglen);
114 to = crypto_cursor_segment(toc, &toseglen);
115
116 seglen = ulmin(len, ulmin(fromseglen, toseglen));
117 if (seglen < AES_BLOCK_LEN) {
118 crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
119 tmp = vld1q_u8(block);
120 tot = armv8_aes_enc(key->aes_rounds - 1,
121 (const void *)key->aes_key, veorq_u8(tmp, ivreg));
122 ivreg = tot;
123 vst1q_u8(block, tot);
124 crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
125 seglen = AES_BLOCK_LEN;
126 } else {
127 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
128 seglen -= AES_BLOCK_LEN) {
129 tmp = vld1q_u8(from);
130 tot = armv8_aes_enc(key->aes_rounds - 1,
131 (const void *)key->aes_key,
132 veorq_u8(tmp, ivreg));
133 ivreg = tot;
134 vst1q_u8(to, tot);
135 from += AES_BLOCK_LEN;
136 to += AES_BLOCK_LEN;
137 }
138 seglen = oseglen - seglen;
139 crypto_cursor_advance(fromc, seglen);
140 crypto_cursor_advance(toc, seglen);
141 }
142 }
143
144 explicit_bzero(block, sizeof(block));
145 }
146
147 void
armv8_aes_decrypt_cbc(const AES_key_t * key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])148 armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
149 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
150 const uint8_t iv[static AES_BLOCK_LEN])
151 {
152 uint8x16_t ivreg, nextiv, tmp;
153 uint8_t block[AES_BLOCK_LEN], *from, *to;
154 size_t fromseglen, oseglen, seglen, toseglen;
155
156 KASSERT(len % AES_BLOCK_LEN == 0,
157 ("%s: length %zu not a multiple of the block size", __func__, len));
158
159 ivreg = vld1q_u8(iv);
160 for (; len > 0; len -= seglen) {
161 from = crypto_cursor_segment(fromc, &fromseglen);
162 to = crypto_cursor_segment(toc, &toseglen);
163
164 seglen = ulmin(len, ulmin(fromseglen, toseglen));
165 if (seglen < AES_BLOCK_LEN) {
166 crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block);
167 nextiv = vld1q_u8(block);
168 tmp = armv8_aes_dec(key->aes_rounds - 1,
169 (const void *)key->aes_key, nextiv);
170 vst1q_u8(block, veorq_u8(tmp, ivreg));
171 ivreg = nextiv;
172 crypto_cursor_copyback(toc, AES_BLOCK_LEN, block);
173 seglen = AES_BLOCK_LEN;
174 } else {
175 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
176 seglen -= AES_BLOCK_LEN) {
177 nextiv = vld1q_u8(from);
178 tmp = armv8_aes_dec(key->aes_rounds - 1,
179 (const void *)key->aes_key, nextiv);
180 vst1q_u8(to, veorq_u8(tmp, ivreg));
181 ivreg = nextiv;
182 from += AES_BLOCK_LEN;
183 to += AES_BLOCK_LEN;
184 }
185 crypto_cursor_advance(fromc, oseglen - seglen);
186 crypto_cursor_advance(toc, oseglen - seglen);
187 seglen = oseglen - seglen;
188 }
189 }
190
191 explicit_bzero(block, sizeof(block));
192 }
193
194 #define AES_XTS_BLOCKSIZE 16
195 #define AES_XTS_IVSIZE 8
196 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
197
198 static inline int32x4_t
xts_crank_lfsr(int32x4_t inp)199 xts_crank_lfsr(int32x4_t inp)
200 {
201 const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
202 int32x4_t xtweak, ret;
203
204 /* set up xor mask */
205 xtweak = vextq_s32(inp, inp, 3);
206 xtweak = vshrq_n_s32(xtweak, 31);
207 xtweak &= alphamask;
208
209 /* next term */
210 ret = vshlq_n_s32(inp, 1);
211 ret ^= xtweak;
212
213 return ret;
214 }
215
216 static void
armv8_aes_crypt_xts_block(int rounds,const uint8x16_t * key_schedule,uint8x16_t * tweak,const uint8_t * from,uint8_t * to,int do_encrypt)217 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
218 uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
219 {
220 uint8x16_t block;
221
222 block = vld1q_u8(from) ^ *tweak;
223
224 if (do_encrypt)
225 block = armv8_aes_enc(rounds - 1, key_schedule, block);
226 else
227 block = armv8_aes_dec(rounds - 1, key_schedule, block);
228
229 vst1q_u8(to, block ^ *tweak);
230
231 *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
232 }
233
234 static void
armv8_aes_crypt_xts(int rounds,const uint8x16_t * data_schedule,const uint8x16_t * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN],int do_encrypt)235 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
236 const uint8x16_t *tweak_schedule, size_t len,
237 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
238 const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
239 {
240 uint8x16_t tweakreg;
241 uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16);
242 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
243 uint8_t *from, *to;
244 size_t fromseglen, oseglen, seglen, toseglen;
245
246 KASSERT(len % AES_XTS_BLOCKSIZE == 0,
247 ("%s: length %zu not a multiple of the block size", __func__, len));
248
249 /*
250 * Prepare tweak as E_k2(IV). IV is specified as LE representation
251 * of a 64-bit block number which we allow to be passed in directly.
252 */
253 #if BYTE_ORDER == LITTLE_ENDIAN
254 bcopy(iv, tweak, AES_XTS_IVSIZE);
255 /* Last 64 bits of IV are always zero. */
256 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
257 #else
258 #error Only LITTLE_ENDIAN architectures are supported.
259 #endif
260 tweakreg = vld1q_u8(tweak);
261 tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
262
263 for (; len > 0; len -= seglen) {
264 from = crypto_cursor_segment(fromc, &fromseglen);
265 to = crypto_cursor_segment(toc, &toseglen);
266
267 seglen = ulmin(len, ulmin(fromseglen, toseglen));
268 if (seglen < AES_XTS_BLOCKSIZE) {
269 crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block);
270 armv8_aes_crypt_xts_block(rounds, data_schedule,
271 &tweakreg, block, block, do_encrypt);
272 crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block);
273 seglen = AES_XTS_BLOCKSIZE;
274 } else {
275 for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE;
276 seglen -= AES_XTS_BLOCKSIZE) {
277 armv8_aes_crypt_xts_block(rounds, data_schedule,
278 &tweakreg, from, to, do_encrypt);
279 from += AES_XTS_BLOCKSIZE;
280 to += AES_XTS_BLOCKSIZE;
281 }
282 seglen = oseglen - seglen;
283 crypto_cursor_advance(fromc, seglen);
284 crypto_cursor_advance(toc, seglen);
285 }
286 }
287
288 explicit_bzero(block, sizeof(block));
289 }
290
291 void
armv8_aes_encrypt_xts(AES_key_t * data_schedule,const void * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])292 armv8_aes_encrypt_xts(AES_key_t *data_schedule,
293 const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc,
294 struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN])
295 {
296 armv8_aes_crypt_xts(data_schedule->aes_rounds,
297 (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
298 toc, iv, 1);
299 }
300
301 void
armv8_aes_decrypt_xts(AES_key_t * data_schedule,const void * tweak_schedule,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,const uint8_t iv[static AES_BLOCK_LEN])302 armv8_aes_decrypt_xts(AES_key_t *data_schedule,
303 const void *tweak_schedule, size_t len,
304 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
305 const uint8_t iv[static AES_BLOCK_LEN])
306 {
307 armv8_aes_crypt_xts(data_schedule->aes_rounds,
308 (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc,
309 toc, iv, 0);
310
311 }
312 #define AES_INC_COUNTER(counter) \
313 do { \
314 for (int pos = AES_BLOCK_LEN - 1; \
315 pos >= 0; pos--) \
316 if (++(counter)[pos]) \
317 break; \
318 } while (0)
319
320 struct armv8_gcm_state {
321 __uint128_val_t EK0;
322 __uint128_val_t EKi;
323 __uint128_val_t Xi;
324 __uint128_val_t lenblock;
325 uint8_t aes_counter[AES_BLOCK_LEN];
326 };
327
328 static void
armv8_aes_gmac_setup(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint8_t * authdata,size_t authdatalen,const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)329 armv8_aes_gmac_setup(struct armv8_gcm_state *s, AES_key_t *aes_key,
330 const uint8_t *authdata, size_t authdatalen,
331 const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable)
332 {
333 uint8_t block[AES_BLOCK_LEN];
334 size_t trailer;
335
336 bzero(s->aes_counter, AES_BLOCK_LEN);
337 memcpy(s->aes_counter, iv, AES_GCM_IV_LEN);
338
339 /* Setup the counter */
340 s->aes_counter[AES_BLOCK_LEN - 1] = 1;
341
342 /* EK0 for a final GMAC round */
343 aes_v8_encrypt(s->aes_counter, s->EK0.c, aes_key);
344
345 /* GCM starts with 2 as counter, 1 is used for final xor of tag. */
346 s->aes_counter[AES_BLOCK_LEN - 1] = 2;
347
348 memset(s->Xi.c, 0, sizeof(s->Xi.c));
349 trailer = authdatalen % AES_BLOCK_LEN;
350 if (authdatalen - trailer > 0) {
351 gcm_ghash_v8(s->Xi.u, Htable, authdata, authdatalen - trailer);
352 authdata += authdatalen - trailer;
353 }
354 if (trailer > 0 || authdatalen == 0) {
355 memset(block, 0, sizeof(block));
356 memcpy(block, authdata, trailer);
357 gcm_ghash_v8(s->Xi.u, Htable, block, AES_BLOCK_LEN);
358 }
359 }
360
361 static void
armv8_aes_gmac_finish(struct armv8_gcm_state * s,size_t len,size_t authdatalen,const __uint128_val_t * Htable)362 armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len,
363 size_t authdatalen, const __uint128_val_t *Htable)
364 {
365 /* Lengths block */
366 s->lenblock.u[0] = s->lenblock.u[1] = 0;
367 s->lenblock.d[1] = htobe32(authdatalen * 8);
368 s->lenblock.d[3] = htobe32(len * 8);
369 gcm_ghash_v8(s->Xi.u, Htable, s->lenblock.c, AES_BLOCK_LEN);
370
371 s->Xi.u[0] ^= s->EK0.u[0];
372 s->Xi.u[1] ^= s->EK0.u[1];
373 }
374
375 static void
armv8_aes_encrypt_gcm_block(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint64_t * from,uint64_t * to)376 armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
377 const uint64_t *from, uint64_t *to)
378 {
379 aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key);
380 AES_INC_COUNTER(s->aes_counter);
381 to[0] = from[0] ^ s->EKi.u[0];
382 to[1] = from[1] ^ s->EKi.u[1];
383 }
384
385 static void
armv8_aes_decrypt_gcm_block(struct armv8_gcm_state * s,AES_key_t * aes_key,const uint64_t * from,uint64_t * to)386 armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key,
387 const uint64_t *from, uint64_t *to)
388 {
389 armv8_aes_encrypt_gcm_block(s, aes_key, from, to);
390 }
391
392 void
armv8_aes_encrypt_gcm(AES_key_t * aes_key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,size_t authdatalen,const uint8_t * authdata,uint8_t tag[static GMAC_DIGEST_LEN],const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)393 armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
394 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
395 size_t authdatalen, const uint8_t *authdata,
396 uint8_t tag[static GMAC_DIGEST_LEN],
397 const uint8_t iv[static AES_GCM_IV_LEN],
398 const __uint128_val_t *Htable)
399 {
400 struct armv8_gcm_state s;
401 uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN);
402 uint64_t *from64, *to64;
403 size_t fromseglen, i, olen, oseglen, seglen, toseglen;
404
405 armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
406
407 for (olen = len; len > 0; len -= seglen) {
408 from64 = crypto_cursor_segment(fromc, &fromseglen);
409 to64 = crypto_cursor_segment(toc, &toseglen);
410
411 seglen = ulmin(len, ulmin(fromseglen, toseglen));
412 if (seglen < AES_BLOCK_LEN) {
413 seglen = ulmin(len, AES_BLOCK_LEN);
414
415 memset(block, 0, sizeof(block));
416 crypto_cursor_copydata(fromc, (int)seglen, block);
417
418 if (seglen == AES_BLOCK_LEN) {
419 armv8_aes_encrypt_gcm_block(&s, aes_key,
420 (uint64_t *)block, (uint64_t *)block);
421 } else {
422 aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
423 AES_INC_COUNTER(s.aes_counter);
424 for (i = 0; i < seglen; i++)
425 block[i] ^= s.EKi.c[i];
426 }
427 gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
428
429 crypto_cursor_copyback(toc, (int)seglen, block);
430 } else {
431 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
432 seglen -= AES_BLOCK_LEN) {
433 armv8_aes_encrypt_gcm_block(&s, aes_key, from64,
434 to64);
435 gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64,
436 AES_BLOCK_LEN);
437
438 from64 += 2;
439 to64 += 2;
440 }
441
442 seglen = oseglen - seglen;
443 crypto_cursor_advance(fromc, seglen);
444 crypto_cursor_advance(toc, seglen);
445 }
446 }
447
448 armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
449 memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
450
451 explicit_bzero(block, sizeof(block));
452 explicit_bzero(&s, sizeof(s));
453 }
454
455 int
armv8_aes_decrypt_gcm(AES_key_t * aes_key,size_t len,struct crypto_buffer_cursor * fromc,struct crypto_buffer_cursor * toc,size_t authdatalen,const uint8_t * authdata,const uint8_t tag[static GMAC_DIGEST_LEN],const uint8_t iv[static AES_GCM_IV_LEN],const __uint128_val_t * Htable)456 armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
457 struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc,
458 size_t authdatalen, const uint8_t *authdata,
459 const uint8_t tag[static GMAC_DIGEST_LEN],
460 const uint8_t iv[static AES_GCM_IV_LEN],
461 const __uint128_val_t *Htable)
462 {
463 struct armv8_gcm_state s;
464 struct crypto_buffer_cursor fromcc;
465 uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from;
466 uint64_t *block64, *from64, *to64;
467 size_t fromseglen, olen, oseglen, seglen, toseglen;
468 int error;
469
470 armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable);
471
472 crypto_cursor_copy(fromc, &fromcc);
473 for (olen = len; len > 0; len -= seglen) {
474 from = crypto_cursor_segment(&fromcc, &fromseglen);
475 seglen = ulmin(len, fromseglen);
476 seglen -= seglen % AES_BLOCK_LEN;
477 if (seglen > 0) {
478 gcm_ghash_v8(s.Xi.u, Htable, from, seglen);
479 crypto_cursor_advance(&fromcc, seglen);
480 } else {
481 memset(block, 0, sizeof(block));
482 seglen = ulmin(len, AES_BLOCK_LEN);
483 crypto_cursor_copydata(&fromcc, seglen, block);
484 gcm_ghash_v8(s.Xi.u, Htable, block, seglen);
485 }
486 }
487
488 armv8_aes_gmac_finish(&s, olen, authdatalen, Htable);
489
490 if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
491 error = EBADMSG;
492 goto out;
493 }
494
495 block64 = (uint64_t *)block;
496 for (len = olen; len > 0; len -= seglen) {
497 from64 = crypto_cursor_segment(fromc, &fromseglen);
498 to64 = crypto_cursor_segment(toc, &toseglen);
499
500 seglen = ulmin(len, ulmin(fromseglen, toseglen));
501 if (seglen < AES_BLOCK_LEN) {
502 seglen = ulmin(len, AES_BLOCK_LEN);
503
504 memset(block, 0, sizeof(block));
505 crypto_cursor_copydata(fromc, seglen, block);
506
507 armv8_aes_decrypt_gcm_block(&s, aes_key, block64,
508 block64);
509
510 crypto_cursor_copyback(toc, (int)seglen, block);
511 } else {
512 for (oseglen = seglen; seglen >= AES_BLOCK_LEN;
513 seglen -= AES_BLOCK_LEN) {
514 armv8_aes_decrypt_gcm_block(&s, aes_key, from64,
515 to64);
516
517 from64 += 2;
518 to64 += 2;
519 }
520
521 seglen = oseglen - seglen;
522 crypto_cursor_advance(fromc, seglen);
523 crypto_cursor_advance(toc, seglen);
524 }
525 }
526
527 error = 0;
528 out:
529 explicit_bzero(block, sizeof(block));
530 explicit_bzero(&s, sizeof(s));
531 return (error);
532 }
533