1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/zfs_context.h>
27 #include <sys/cmn_err.h>
28 #include <modes/modes.h>
29 #include <sys/crypto/common.h>
30 #include <sys/crypto/icp.h>
31 #include <sys/crypto/impl.h>
32 #include <sys/byteorder.h>
33 #include <sys/simd.h>
34 #include <modes/gcm_impl.h>
35 #ifdef CAN_USE_GCM_ASM
36 #include <aes/aes_impl.h>
37 #endif
38
39 #define GHASH(c, d, t, o) \
40 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
41 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
42 (uint64_t *)(void *)(t));
43
44 /* Select GCM implementation */
45 #define IMPL_FASTEST (UINT32_MAX)
46 #define IMPL_CYCLE (UINT32_MAX-1)
47 #ifdef CAN_USE_GCM_ASM
48 #define IMPL_AVX (UINT32_MAX-2)
49 #if CAN_USE_GCM_ASM >= 2
50 #define IMPL_AVX2 (UINT32_MAX-3)
51 #endif
52 #endif
53 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
54 static uint32_t icp_gcm_impl = IMPL_FASTEST;
55 static uint32_t user_sel_impl = IMPL_FASTEST;
56
57 #ifdef CAN_USE_GCM_ASM
58 /* Does the architecture we run on support the MOVBE instruction? */
59 boolean_t gcm_avx_can_use_movbe = B_FALSE;
60 /*
61 * Whether to use the optimized openssl gcm and ghash implementations.
62 */
63 static gcm_impl gcm_impl_used = GCM_IMPL_GENERIC;
64 #define GCM_IMPL_USED (*(volatile gcm_impl *)&gcm_impl_used)
65
66 extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
67
68 static inline boolean_t gcm_avx_will_work(void);
69 static inline boolean_t gcm_avx2_will_work(void);
70 static inline void gcm_use_impl(gcm_impl impl);
71 static inline gcm_impl gcm_toggle_impl(void);
72
73 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
74 crypto_data_t *, size_t);
75
76 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
77 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
78 static int gcm_init_avx(gcm_ctx_t *, const uint8_t *, size_t, const uint8_t *,
79 size_t, size_t);
80 #endif /* ifdef CAN_USE_GCM_ASM */
81
82 /*
83 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
84 * is done in another function.
85 */
86 int
gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))87 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
88 crypto_data_t *out, size_t block_size,
89 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
90 void (*copy_block)(uint8_t *, uint8_t *),
91 void (*xor_block)(uint8_t *, uint8_t *))
92 {
93 #ifdef CAN_USE_GCM_ASM
94 if (ctx->impl != GCM_IMPL_GENERIC)
95 return (gcm_mode_encrypt_contiguous_blocks_avx(
96 ctx, data, length, out, block_size));
97 #endif
98
99 const gcm_impl_ops_t *gops;
100 size_t remainder = length;
101 size_t need = 0;
102 uint8_t *datap = (uint8_t *)data;
103 uint8_t *blockp;
104 uint8_t *lastp;
105 void *iov_or_mp;
106 offset_t offset;
107 uint8_t *out_data_1;
108 uint8_t *out_data_2;
109 size_t out_data_1_len;
110 uint64_t counter;
111 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
112
113 if (length + ctx->gcm_remainder_len < block_size) {
114 /* accumulate bytes here and return */
115 memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
116 datap,
117 length);
118 ctx->gcm_remainder_len += length;
119 if (ctx->gcm_copy_to == NULL) {
120 ctx->gcm_copy_to = datap;
121 }
122 return (CRYPTO_SUCCESS);
123 }
124
125 crypto_init_ptrs(out, &iov_or_mp, &offset);
126
127 gops = gcm_impl_get_ops();
128 do {
129 /* Unprocessed data from last call. */
130 if (ctx->gcm_remainder_len > 0) {
131 need = block_size - ctx->gcm_remainder_len;
132
133 if (need > remainder)
134 return (CRYPTO_DATA_LEN_RANGE);
135
136 memcpy(&((uint8_t *)ctx->gcm_remainder)
137 [ctx->gcm_remainder_len], datap, need);
138
139 blockp = (uint8_t *)ctx->gcm_remainder;
140 } else {
141 blockp = datap;
142 }
143
144 /*
145 * Increment counter. Counter bits are confined
146 * to the bottom 32 bits of the counter block.
147 */
148 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
149 counter = htonll(counter + 1);
150 counter &= counter_mask;
151 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
152
153 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
154 (uint8_t *)ctx->gcm_tmp);
155 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
156
157 lastp = (uint8_t *)ctx->gcm_tmp;
158
159 ctx->gcm_processed_data_len += block_size;
160
161 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
162 &out_data_1_len, &out_data_2, block_size);
163
164 /* copy block to where it belongs */
165 if (out_data_1_len == block_size) {
166 copy_block(lastp, out_data_1);
167 } else {
168 memcpy(out_data_1, lastp, out_data_1_len);
169 if (out_data_2 != NULL) {
170 memcpy(out_data_2,
171 lastp + out_data_1_len,
172 block_size - out_data_1_len);
173 }
174 }
175 /* update offset */
176 out->cd_offset += block_size;
177
178 /* add ciphertext to the hash */
179 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
180
181 /* Update pointer to next block of data to be processed. */
182 if (ctx->gcm_remainder_len != 0) {
183 datap += need;
184 ctx->gcm_remainder_len = 0;
185 } else {
186 datap += block_size;
187 }
188
189 remainder = (size_t)&data[length] - (size_t)datap;
190
191 /* Incomplete last block. */
192 if (remainder > 0 && remainder < block_size) {
193 memcpy(ctx->gcm_remainder, datap, remainder);
194 ctx->gcm_remainder_len = remainder;
195 ctx->gcm_copy_to = datap;
196 goto out;
197 }
198 ctx->gcm_copy_to = NULL;
199
200 } while (remainder > 0);
201 out:
202 return (CRYPTO_SUCCESS);
203 }
204
205 int
gcm_encrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))206 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
207 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
208 void (*copy_block)(uint8_t *, uint8_t *),
209 void (*xor_block)(uint8_t *, uint8_t *))
210 {
211 (void) copy_block;
212 #ifdef CAN_USE_GCM_ASM
213 if (ctx->impl != GCM_IMPL_GENERIC)
214 return (gcm_encrypt_final_avx(ctx, out, block_size));
215 #endif
216
217 const gcm_impl_ops_t *gops;
218 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
219 uint8_t *ghash, *macp = NULL;
220 int i, rv;
221
222 if (out->cd_length <
223 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
224 return (CRYPTO_DATA_LEN_RANGE);
225 }
226
227 gops = gcm_impl_get_ops();
228 ghash = (uint8_t *)ctx->gcm_ghash;
229
230 if (ctx->gcm_remainder_len > 0) {
231 uint64_t counter;
232 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
233
234 /*
235 * Here is where we deal with data that is not a
236 * multiple of the block size.
237 */
238
239 /*
240 * Increment counter.
241 */
242 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
243 counter = htonll(counter + 1);
244 counter &= counter_mask;
245 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
246
247 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
248 (uint8_t *)ctx->gcm_tmp);
249
250 macp = (uint8_t *)ctx->gcm_remainder;
251 memset(macp + ctx->gcm_remainder_len, 0,
252 block_size - ctx->gcm_remainder_len);
253
254 /* XOR with counter block */
255 for (i = 0; i < ctx->gcm_remainder_len; i++) {
256 macp[i] ^= tmpp[i];
257 }
258
259 /* add ciphertext to the hash */
260 GHASH(ctx, macp, ghash, gops);
261
262 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
263 }
264
265 ctx->gcm_len_a_len_c[1] =
266 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
267 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
268 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
269 (uint8_t *)ctx->gcm_J0);
270 xor_block((uint8_t *)ctx->gcm_J0, ghash);
271
272 if (ctx->gcm_remainder_len > 0) {
273 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
274 if (rv != CRYPTO_SUCCESS)
275 return (rv);
276 }
277 out->cd_offset += ctx->gcm_remainder_len;
278 ctx->gcm_remainder_len = 0;
279 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
280 if (rv != CRYPTO_SUCCESS)
281 return (rv);
282 out->cd_offset += ctx->gcm_tag_len;
283
284 return (CRYPTO_SUCCESS);
285 }
286
287 /*
288 * This will only deal with decrypting the last block of the input that
289 * might not be a multiple of block length.
290 */
291 static void
gcm_decrypt_incomplete_block(gcm_ctx_t * ctx,size_t block_size,size_t index,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))292 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
293 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
294 void (*xor_block)(uint8_t *, uint8_t *))
295 {
296 uint8_t *datap, *outp, *counterp;
297 uint64_t counter;
298 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
299 int i;
300
301 /*
302 * Increment counter.
303 * Counter bits are confined to the bottom 32 bits
304 */
305 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
306 counter = htonll(counter + 1);
307 counter &= counter_mask;
308 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
309
310 datap = (uint8_t *)ctx->gcm_remainder;
311 outp = &((ctx->gcm_pt_buf)[index]);
312 counterp = (uint8_t *)ctx->gcm_tmp;
313
314 /* authentication tag */
315 memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
316 memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
317
318 /* add ciphertext to the hash */
319 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
320
321 /* decrypt remaining ciphertext */
322 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
323
324 /* XOR with counter block */
325 for (i = 0; i < ctx->gcm_remainder_len; i++) {
326 outp[i] = datap[i] ^ counterp[i];
327 }
328 }
329
330 int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))331 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
332 crypto_data_t *out, size_t block_size,
333 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
334 void (*copy_block)(uint8_t *, uint8_t *),
335 void (*xor_block)(uint8_t *, uint8_t *))
336 {
337 (void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
338 (void) xor_block;
339 size_t new_len;
340 uint8_t *new;
341
342 /*
343 * Copy contiguous ciphertext input blocks to plaintext buffer.
344 * Ciphertext will be decrypted in the final.
345 */
346 if (length > 0) {
347 new_len = ctx->gcm_pt_buf_len + length;
348 new = vmem_alloc(new_len, KM_SLEEP);
349 if (new == NULL) {
350 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
351 ctx->gcm_pt_buf = NULL;
352 return (CRYPTO_HOST_MEMORY);
353 }
354
355 if (ctx->gcm_pt_buf != NULL) {
356 memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
357 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
358 } else {
359 ASSERT0(ctx->gcm_pt_buf_len);
360 }
361
362 ctx->gcm_pt_buf = new;
363 ctx->gcm_pt_buf_len = new_len;
364 memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
365 length);
366 ctx->gcm_processed_data_len += length;
367 }
368
369 ctx->gcm_remainder_len = 0;
370 return (CRYPTO_SUCCESS);
371 }
372
373 int
gcm_decrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))374 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
375 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
376 void (*xor_block)(uint8_t *, uint8_t *))
377 {
378 #ifdef CAN_USE_GCM_ASM
379 if (ctx->impl != GCM_IMPL_GENERIC)
380 return (gcm_decrypt_final_avx(ctx, out, block_size));
381 #endif
382
383 const gcm_impl_ops_t *gops;
384 size_t pt_len;
385 size_t remainder;
386 uint8_t *ghash;
387 uint8_t *blockp;
388 uint8_t *cbp;
389 uint64_t counter;
390 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
391 int processed = 0, rv;
392
393 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
394
395 gops = gcm_impl_get_ops();
396 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
397 ghash = (uint8_t *)ctx->gcm_ghash;
398 blockp = ctx->gcm_pt_buf;
399 remainder = pt_len;
400 while (remainder > 0) {
401 /* Incomplete last block */
402 if (remainder < block_size) {
403 memcpy(ctx->gcm_remainder, blockp, remainder);
404 ctx->gcm_remainder_len = remainder;
405 /*
406 * not expecting anymore ciphertext, just
407 * compute plaintext for the remaining input
408 */
409 gcm_decrypt_incomplete_block(ctx, block_size,
410 processed, encrypt_block, xor_block);
411 ctx->gcm_remainder_len = 0;
412 goto out;
413 }
414 /* add ciphertext to the hash */
415 GHASH(ctx, blockp, ghash, gops);
416
417 /*
418 * Increment counter.
419 * Counter bits are confined to the bottom 32 bits
420 */
421 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
422 counter = htonll(counter + 1);
423 counter &= counter_mask;
424 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
425
426 cbp = (uint8_t *)ctx->gcm_tmp;
427 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
428
429 /* XOR with ciphertext */
430 xor_block(cbp, blockp);
431
432 processed += block_size;
433 blockp += block_size;
434 remainder -= block_size;
435 }
436 out:
437 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
438 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
439 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
440 (uint8_t *)ctx->gcm_J0);
441 xor_block((uint8_t *)ctx->gcm_J0, ghash);
442
443 /* compare the input authentication tag with what we calculated */
444 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
445 /* They don't match */
446 return (CRYPTO_INVALID_MAC);
447 } else {
448 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
449 if (rv != CRYPTO_SUCCESS)
450 return (rv);
451 out->cd_offset += pt_len;
452 }
453 return (CRYPTO_SUCCESS);
454 }
455
456 static int
gcm_validate_args(CK_AES_GCM_PARAMS * gcm_param)457 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
458 {
459 size_t tag_len;
460
461 /*
462 * Check the length of the authentication tag (in bits).
463 */
464 tag_len = gcm_param->ulTagBits;
465 switch (tag_len) {
466 case 32:
467 case 64:
468 case 96:
469 case 104:
470 case 112:
471 case 120:
472 case 128:
473 break;
474 default:
475 return (CRYPTO_MECHANISM_PARAM_INVALID);
476 }
477
478 if (gcm_param->ulIvLen == 0)
479 return (CRYPTO_MECHANISM_PARAM_INVALID);
480
481 return (CRYPTO_SUCCESS);
482 }
483
484 static void
gcm_format_initial_blocks(const uint8_t * iv,ulong_t iv_len,gcm_ctx_t * ctx,size_t block_size,void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))485 gcm_format_initial_blocks(const uint8_t *iv, ulong_t iv_len,
486 gcm_ctx_t *ctx, size_t block_size,
487 void (*copy_block)(uint8_t *, uint8_t *),
488 void (*xor_block)(uint8_t *, uint8_t *))
489 {
490 const gcm_impl_ops_t *gops;
491 uint8_t *cb;
492 ulong_t remainder = iv_len;
493 ulong_t processed = 0;
494 uint8_t *datap, *ghash;
495 uint64_t len_a_len_c[2];
496
497 gops = gcm_impl_get_ops();
498 ghash = (uint8_t *)ctx->gcm_ghash;
499 cb = (uint8_t *)ctx->gcm_cb;
500 if (iv_len == 12) {
501 memcpy(cb, iv, 12);
502 cb[12] = 0;
503 cb[13] = 0;
504 cb[14] = 0;
505 cb[15] = 1;
506 /* J0 will be used again in the final */
507 copy_block(cb, (uint8_t *)ctx->gcm_J0);
508 } else {
509 /* GHASH the IV */
510 do {
511 if (remainder < block_size) {
512 memset(cb, 0, block_size);
513 memcpy(cb, &(iv[processed]), remainder);
514 datap = (uint8_t *)cb;
515 remainder = 0;
516 } else {
517 datap = (uint8_t *)(&(iv[processed]));
518 processed += block_size;
519 remainder -= block_size;
520 }
521 GHASH(ctx, datap, ghash, gops);
522 } while (remainder > 0);
523
524 len_a_len_c[0] = 0;
525 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
526 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
527
528 /* J0 will be used again in the final */
529 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
530 }
531 }
532
533 static int
gcm_init(gcm_ctx_t * ctx,const uint8_t * iv,size_t iv_len,const uint8_t * auth_data,size_t auth_data_len,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))534 gcm_init(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
535 const uint8_t *auth_data, size_t auth_data_len, size_t block_size,
536 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
537 void (*copy_block)(uint8_t *, uint8_t *),
538 void (*xor_block)(uint8_t *, uint8_t *))
539 {
540 const gcm_impl_ops_t *gops;
541 uint8_t *ghash, *datap, *authp;
542 size_t remainder, processed;
543
544 /* encrypt zero block to get subkey H */
545 memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
546 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
547 (uint8_t *)ctx->gcm_H);
548
549 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
550 copy_block, xor_block);
551
552 gops = gcm_impl_get_ops();
553 authp = (uint8_t *)ctx->gcm_tmp;
554 ghash = (uint8_t *)ctx->gcm_ghash;
555 memset(authp, 0, block_size);
556 memset(ghash, 0, block_size);
557
558 processed = 0;
559 remainder = auth_data_len;
560 do {
561 if (remainder < block_size) {
562 /*
563 * There's not a block full of data, pad rest of
564 * buffer with zero
565 */
566
567 if (auth_data != NULL) {
568 memset(authp, 0, block_size);
569 memcpy(authp, &(auth_data[processed]),
570 remainder);
571 } else {
572 ASSERT0(remainder);
573 }
574
575 datap = (uint8_t *)authp;
576 remainder = 0;
577 } else {
578 datap = (uint8_t *)(&(auth_data[processed]));
579 processed += block_size;
580 remainder -= block_size;
581 }
582
583 /* add auth data to the hash */
584 GHASH(ctx, datap, ghash, gops);
585
586 } while (remainder > 0);
587
588 return (CRYPTO_SUCCESS);
589 }
590
591 /*
592 * Init the GCM context struct. Handle the cycle and avx implementations here.
593 */
594 int
gcm_init_ctx(gcm_ctx_t * gcm_ctx,char * param,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))595 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param,
596 size_t block_size, int (*encrypt_block)(const void *, const uint8_t *,
597 uint8_t *), void (*copy_block)(uint8_t *, uint8_t *),
598 void (*xor_block)(uint8_t *, uint8_t *))
599 {
600 CK_AES_GCM_PARAMS *gcm_param;
601 int rv = CRYPTO_SUCCESS;
602 size_t tag_len, iv_len;
603
604 if (param != NULL) {
605 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
606
607 /* GCM mode. */
608 if ((rv = gcm_validate_args(gcm_param)) != 0) {
609 return (rv);
610 }
611 gcm_ctx->gcm_flags |= GCM_MODE;
612
613 size_t tbits = gcm_param->ulTagBits;
614 tag_len = CRYPTO_BITS2BYTES(tbits);
615 iv_len = gcm_param->ulIvLen;
616
617 gcm_ctx->gcm_tag_len = tag_len;
618 gcm_ctx->gcm_processed_data_len = 0;
619
620 /* these values are in bits */
621 gcm_ctx->gcm_len_a_len_c[0]
622 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
623 } else {
624 return (CRYPTO_MECHANISM_PARAM_INVALID);
625 }
626
627 const uint8_t *iv = (const uint8_t *)gcm_param->pIv;
628 const uint8_t *aad = (const uint8_t *)gcm_param->pAAD;
629 size_t aad_len = gcm_param->ulAADLen;
630
631 #ifdef CAN_USE_GCM_ASM
632 boolean_t needs_bswap =
633 ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap;
634
635 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
636 gcm_ctx->impl = GCM_IMPL_USED;
637 } else {
638 /*
639 * Handle the "cycle" implementation by creating different
640 * contexts, one per implementation.
641 */
642 gcm_ctx->impl = gcm_toggle_impl();
643
644 /* The AVX impl. doesn't handle byte swapped key schedules. */
645 if (needs_bswap == B_TRUE) {
646 gcm_ctx->impl = GCM_IMPL_GENERIC;
647 }
648 /*
649 * If this is an AVX context, use the MOVBE and the BSWAP
650 * variants alternately.
651 */
652 if (gcm_ctx->impl == GCM_IMPL_AVX &&
653 zfs_movbe_available() == B_TRUE) {
654 (void) atomic_toggle_boolean_nv(
655 (volatile boolean_t *)&gcm_avx_can_use_movbe);
656 }
657 }
658 /*
659 * We don't handle byte swapped key schedules in the avx code path,
660 * still they could be created by the aes generic implementation.
661 * Make sure not to use them since we'll corrupt data if we do.
662 */
663 if (gcm_ctx->impl != GCM_IMPL_GENERIC && needs_bswap == B_TRUE) {
664 gcm_ctx->impl = GCM_IMPL_GENERIC;
665
666 cmn_err_once(CE_WARN,
667 "ICP: Can't use the aes generic or cycle implementations "
668 "in combination with the gcm avx or avx2-vaes "
669 "implementation!");
670 cmn_err_once(CE_WARN,
671 "ICP: Falling back to a compatible implementation, "
672 "aes-gcm performance will likely be degraded.");
673 cmn_err_once(CE_WARN,
674 "ICP: Choose at least the x86_64 aes implementation to "
675 "restore performance.");
676 }
677
678 /*
679 * AVX implementations use Htable with sizes depending on
680 * implementation.
681 */
682 if (gcm_ctx->impl != GCM_IMPL_GENERIC) {
683 rv = gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len,
684 block_size);
685 }
686 else
687 #endif /* ifdef CAN_USE_GCM_ASM */
688 if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size,
689 encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) {
690 rv = CRYPTO_MECHANISM_PARAM_INVALID;
691 }
692
693 return (rv);
694 }
695
696 void *
gcm_alloc_ctx(int kmflag)697 gcm_alloc_ctx(int kmflag)
698 {
699 gcm_ctx_t *gcm_ctx;
700
701 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
702 return (NULL);
703
704 gcm_ctx->gcm_flags = GCM_MODE;
705 return (gcm_ctx);
706 }
707
708 /* GCM implementation that contains the fastest methods */
709 static gcm_impl_ops_t gcm_fastest_impl = {
710 .name = "fastest"
711 };
712
713 /* All compiled in implementations */
714 static const gcm_impl_ops_t *gcm_all_impl[] = {
715 &gcm_generic_impl,
716 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
717 &gcm_pclmulqdq_impl,
718 #endif
719 };
720
721 /* Indicate that benchmark has been completed */
722 static boolean_t gcm_impl_initialized = B_FALSE;
723
724 /* Hold all supported implementations */
725 static size_t gcm_supp_impl_cnt = 0;
726 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
727
728 /*
729 * Returns the GCM operations for encrypt/decrypt/key setup. When a
730 * SIMD implementation is not allowed in the current context, then
731 * fallback to the fastest generic implementation.
732 */
733 const gcm_impl_ops_t *
gcm_impl_get_ops(void)734 gcm_impl_get_ops(void)
735 {
736 if (!kfpu_allowed())
737 return (&gcm_generic_impl);
738
739 const gcm_impl_ops_t *ops = NULL;
740 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
741
742 switch (impl) {
743 case IMPL_FASTEST:
744 ASSERT(gcm_impl_initialized);
745 ops = &gcm_fastest_impl;
746 break;
747 case IMPL_CYCLE:
748 /* Cycle through supported implementations */
749 ASSERT(gcm_impl_initialized);
750 ASSERT3U(gcm_supp_impl_cnt, >, 0);
751 static size_t cycle_impl_idx = 0;
752 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
753 ops = gcm_supp_impl[idx];
754 break;
755 #ifdef CAN_USE_GCM_ASM
756 case IMPL_AVX:
757 #if CAN_USE_GCM_ASM >= 2
758 case IMPL_AVX2:
759 #endif
760 /*
761 * Make sure that we return a valid implementation while
762 * switching to the avx implementation since there still
763 * may be unfinished non-avx contexts around.
764 */
765 ops = &gcm_generic_impl;
766 break;
767 #endif
768 default:
769 ASSERT3U(impl, <, gcm_supp_impl_cnt);
770 ASSERT3U(gcm_supp_impl_cnt, >, 0);
771 if (impl < ARRAY_SIZE(gcm_all_impl))
772 ops = gcm_supp_impl[impl];
773 break;
774 }
775
776 ASSERT3P(ops, !=, NULL);
777
778 return (ops);
779 }
780
781 /*
782 * Initialize all supported implementations.
783 */
784 void
gcm_impl_init(void)785 gcm_impl_init(void)
786 {
787 gcm_impl_ops_t *curr_impl;
788 int i, c;
789
790 /* Move supported implementations into gcm_supp_impls */
791 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
792 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
793
794 if (curr_impl->is_supported())
795 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
796 }
797 gcm_supp_impl_cnt = c;
798
799 /*
800 * Set the fastest implementation given the assumption that the
801 * hardware accelerated version is the fastest.
802 */
803 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
804 if (gcm_pclmulqdq_impl.is_supported()) {
805 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
806 sizeof (gcm_fastest_impl));
807 } else
808 #endif
809 {
810 memcpy(&gcm_fastest_impl, &gcm_generic_impl,
811 sizeof (gcm_fastest_impl));
812 }
813
814 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
815
816 #ifdef CAN_USE_GCM_ASM
817 /*
818 * Use the avx implementation if it's available and the implementation
819 * hasn't changed from its default value of fastest on module load.
820 */
821 #if CAN_USE_GCM_ASM >= 2
822 if (gcm_avx2_will_work()) {
823 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
824 gcm_use_impl(GCM_IMPL_AVX2);
825 }
826 } else
827 #endif
828 if (gcm_avx_will_work()) {
829 #ifdef HAVE_MOVBE
830 if (zfs_movbe_available() == B_TRUE) {
831 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
832 }
833 #endif
834 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
835 gcm_use_impl(GCM_IMPL_AVX);
836 }
837 }
838 #endif
839 /* Finish initialization */
840 atomic_swap_32(&icp_gcm_impl, user_sel_impl);
841 gcm_impl_initialized = B_TRUE;
842 }
843
844 static const struct {
845 const char *name;
846 uint32_t sel;
847 } gcm_impl_opts[] = {
848 { "cycle", IMPL_CYCLE },
849 { "fastest", IMPL_FASTEST },
850 #ifdef CAN_USE_GCM_ASM
851 { "avx", IMPL_AVX },
852 { "avx2-vaes", IMPL_AVX2 },
853 #endif
854 };
855
856 /*
857 * Function sets desired gcm implementation.
858 *
859 * If we are called before init(), user preference will be saved in
860 * user_sel_impl, and applied in later init() call. This occurs when module
861 * parameter is specified on module load. Otherwise, directly update
862 * icp_gcm_impl.
863 *
864 * @val Name of gcm implementation to use
865 * @param Unused.
866 */
867 int
gcm_impl_set(const char * val)868 gcm_impl_set(const char *val)
869 {
870 int err = -EINVAL;
871 char req_name[GCM_IMPL_NAME_MAX];
872 uint32_t impl = GCM_IMPL_READ(user_sel_impl);
873 size_t i;
874
875 /* sanitize input */
876 i = strnlen(val, GCM_IMPL_NAME_MAX);
877 if (i == 0 || i >= GCM_IMPL_NAME_MAX)
878 return (err);
879
880 strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
881 while (i > 0 && isspace(req_name[i-1]))
882 i--;
883 req_name[i] = '\0';
884
885 /* Check mandatory options */
886 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
887 #ifdef CAN_USE_GCM_ASM
888 #if CAN_USE_GCM_ASM >= 2
889 /* Ignore avx implementation if it won't work. */
890 if (gcm_impl_opts[i].sel == IMPL_AVX2 &&
891 !gcm_avx2_will_work()) {
892 continue;
893 }
894 #endif
895 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
896 continue;
897 }
898 #endif
899 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
900 impl = gcm_impl_opts[i].sel;
901 err = 0;
902 break;
903 }
904 }
905
906 /* check all supported impl if init() was already called */
907 if (err != 0 && gcm_impl_initialized) {
908 /* check all supported implementations */
909 for (i = 0; i < gcm_supp_impl_cnt; i++) {
910 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
911 impl = i;
912 err = 0;
913 break;
914 }
915 }
916 }
917 #ifdef CAN_USE_GCM_ASM
918 /*
919 * Use the avx implementation if available and the requested one is
920 * avx or fastest.
921 */
922 #if CAN_USE_GCM_ASM >= 2
923 if (gcm_avx2_will_work() == B_TRUE &&
924 (impl == IMPL_AVX2 || impl == IMPL_FASTEST)) {
925 gcm_use_impl(GCM_IMPL_AVX2);
926 } else
927 #endif
928 if (gcm_avx_will_work() == B_TRUE &&
929 (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
930 gcm_use_impl(GCM_IMPL_AVX);
931 } else {
932 gcm_use_impl(GCM_IMPL_GENERIC);
933 }
934 #endif
935
936 if (err == 0) {
937 if (gcm_impl_initialized)
938 atomic_swap_32(&icp_gcm_impl, impl);
939 else
940 atomic_swap_32(&user_sel_impl, impl);
941 }
942
943 return (err);
944 }
945
946 #if defined(_KERNEL) && defined(__linux__)
947
948 static int
icp_gcm_impl_set(const char * val,zfs_kernel_param_t * kp)949 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
950 {
951 return (gcm_impl_set(val));
952 }
953
954 static int
icp_gcm_impl_get(char * buffer,zfs_kernel_param_t * kp)955 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
956 {
957 int i, cnt = 0;
958 char *fmt;
959 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
960
961 /* list mandatory options */
962 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
963 #ifdef CAN_USE_GCM_ASM
964 /* Ignore avx implementation if it won't work. */
965 #if CAN_USE_GCM_ASM >= 2
966 if (gcm_impl_opts[i].sel == IMPL_AVX2 &&
967 !gcm_avx2_will_work()) {
968 continue;
969 }
970 #endif
971 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
972 continue;
973 }
974 #endif
975 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
976 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
977 gcm_impl_opts[i].name);
978 }
979
980 /* list all supported implementations */
981 for (i = 0; i < gcm_supp_impl_cnt; i++) {
982 fmt = (i == impl) ? "[%s] " : "%s ";
983 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
984 gcm_supp_impl[i]->name);
985 }
986
987 return (cnt);
988 }
989
990 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
991 NULL, 0644);
992 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
993 #endif /* defined(__KERNEL) */
994
995 #ifdef CAN_USE_GCM_ASM
996 #define GCM_BLOCK_LEN 16
997 /*
998 * The openssl asm routines are 6x aggregated and need that many bytes
999 * at minimum.
1000 */
1001 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
1002 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
1003 /*
1004 * Ensure the chunk size is reasonable since we are allocating a
1005 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
1006 */
1007 #define GCM_AVX_MAX_CHUNK_SIZE \
1008 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
1009
1010 /* Clear the FPU registers since they hold sensitive internal state. */
1011 #define clear_fpu_regs() clear_fpu_regs_avx()
1012
1013 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1014
1015 /* Get the chunk size module parameter. */
1016 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1017
1018 /*
1019 * Module parameter: number of bytes to process at once while owning the FPU.
1020 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1021 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1022 */
1023 static uint32_t gcm_avx_chunk_size =
1024 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1025
1026 /*
1027 * GCM definitions: uint128_t is copied from include/crypto/modes.h
1028 * Avoiding u128 because it is already defined in kernel sources.
1029 */
1030 typedef struct {
1031 uint64_t hi, lo;
1032 } uint128_t;
1033
1034 extern void ASMABI clear_fpu_regs_avx(void);
1035 extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
1036 extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
1037 const uint32_t pt[4], uint32_t ct[4]);
1038
1039 extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
1040 #if CAN_USE_GCM_ASM >= 2
1041 extern void ASMABI gcm_init_vpclmulqdq_avx2(uint128_t Htable[16],
1042 const uint64_t H[2]);
1043 #endif
1044 extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
1045 const uint8_t *in, size_t len);
1046 #if CAN_USE_GCM_ASM >= 2
1047 extern void ASMABI gcm_ghash_vpclmulqdq_avx2(uint64_t ghash[2],
1048 const uint64_t *Htable, const uint8_t *in, size_t len);
1049 #endif
GHASH_AVX(gcm_ctx_t * ctx,const uint8_t * in,size_t len)1050 static inline void GHASH_AVX(gcm_ctx_t *ctx, const uint8_t *in, size_t len)
1051 {
1052 switch (ctx->impl) {
1053 #if CAN_USE_GCM_ASM >= 2
1054 case GCM_IMPL_AVX2:
1055 gcm_ghash_vpclmulqdq_avx2(ctx->gcm_ghash,
1056 (const uint64_t *)ctx->gcm_Htable, in, len);
1057 break;
1058 #endif
1059
1060 case GCM_IMPL_AVX:
1061 gcm_ghash_avx(ctx->gcm_ghash,
1062 (const uint64_t *)ctx->gcm_Htable, in, len);
1063 break;
1064
1065 default:
1066 VERIFY(B_FALSE);
1067 }
1068 }
1069
1070 typedef size_t ASMABI aesni_gcm_encrypt_impl(const uint8_t *, uint8_t *,
1071 size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *);
1072 extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1073 const void *, uint64_t *, uint64_t *);
1074 #if CAN_USE_GCM_ASM >= 2
1075 extern void ASMABI aes_gcm_enc_update_vaes_avx2(const uint8_t *in,
1076 uint8_t *out, size_t len, const void *key, const uint8_t ivec[16],
1077 const uint128_t Htable[16], uint8_t Xi[16]);
1078 #endif
1079
1080 typedef size_t ASMABI aesni_gcm_decrypt_impl(const uint8_t *, uint8_t *,
1081 size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *);
1082 extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1083 const void *, uint64_t *, uint64_t *);
1084 #if CAN_USE_GCM_ASM >= 2
1085 extern void ASMABI aes_gcm_dec_update_vaes_avx2(const uint8_t *in,
1086 uint8_t *out, size_t len, const void *key, const uint8_t ivec[16],
1087 const uint128_t Htable[16], uint8_t Xi[16]);
1088 #endif
1089
1090 static inline boolean_t
gcm_avx2_will_work(void)1091 gcm_avx2_will_work(void)
1092 {
1093 return (kfpu_allowed() &&
1094 zfs_avx2_available() && zfs_vaes_available() &&
1095 zfs_vpclmulqdq_available());
1096 }
1097
1098 static inline boolean_t
gcm_avx_will_work(void)1099 gcm_avx_will_work(void)
1100 {
1101 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1102 return (kfpu_allowed() &&
1103 zfs_avx_available() && zfs_aes_available() &&
1104 zfs_pclmulqdq_available());
1105 }
1106
1107 static inline void
gcm_use_impl(gcm_impl impl)1108 gcm_use_impl(gcm_impl impl)
1109 {
1110 switch (impl) {
1111 #if CAN_USE_GCM_ASM >= 2
1112 case GCM_IMPL_AVX2:
1113 if (gcm_avx2_will_work() == B_TRUE) {
1114 atomic_swap_32(&gcm_impl_used, impl);
1115 return;
1116 }
1117
1118 zfs_fallthrough;
1119 #endif
1120
1121 case GCM_IMPL_AVX:
1122 if (gcm_avx_will_work() == B_TRUE) {
1123 atomic_swap_32(&gcm_impl_used, impl);
1124 return;
1125 }
1126
1127 zfs_fallthrough;
1128
1129 default:
1130 atomic_swap_32(&gcm_impl_used, GCM_IMPL_GENERIC);
1131 }
1132 }
1133
1134 static inline boolean_t
gcm_impl_will_work(gcm_impl impl)1135 gcm_impl_will_work(gcm_impl impl)
1136 {
1137 switch (impl) {
1138 #if CAN_USE_GCM_ASM >= 2
1139 case GCM_IMPL_AVX2:
1140 return (gcm_avx2_will_work());
1141 #endif
1142
1143 case GCM_IMPL_AVX:
1144 return (gcm_avx_will_work());
1145
1146 default:
1147 return (B_TRUE);
1148 }
1149 }
1150
1151 static inline gcm_impl
gcm_toggle_impl(void)1152 gcm_toggle_impl(void)
1153 {
1154 gcm_impl current_impl, new_impl;
1155 do { /* handle races */
1156 current_impl = atomic_load_32(&gcm_impl_used);
1157 new_impl = current_impl;
1158 while (B_TRUE) { /* handle incompatble implementations */
1159 new_impl = (new_impl + 1) % GCM_IMPL_MAX;
1160 if (gcm_impl_will_work(new_impl)) {
1161 break;
1162 }
1163 }
1164
1165 } while (atomic_cas_32(&gcm_impl_used, current_impl, new_impl) !=
1166 current_impl);
1167
1168 return (new_impl);
1169 }
1170
1171
1172 /* Increment the GCM counter block by n. */
1173 static inline void
gcm_incr_counter_block_by(gcm_ctx_t * ctx,int n)1174 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
1175 {
1176 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
1177 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
1178
1179 counter = htonll(counter + n);
1180 counter &= counter_mask;
1181 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
1182 }
1183
aesni_gcm_encrypt_avx(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1184 static size_t aesni_gcm_encrypt_avx(const uint8_t *in, uint8_t *out,
1185 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1186 uint64_t *Xip)
1187 {
1188 (void) Htable;
1189 return (aesni_gcm_encrypt(in, out, len, key, iv, Xip));
1190 }
1191
1192 #if CAN_USE_GCM_ASM >= 2
1193 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
1194 // bits of a |size_t|.
1195 // This is from boringssl/crypto/fipsmodule/aes/gcm.cc.inc
1196 static const size_t kSizeTWithoutLower4Bits = (size_t)-16;
1197
1198 /* The following CRYPTO methods are from boringssl/crypto/internal.h */
CRYPTO_bswap4(uint32_t x)1199 static inline uint32_t CRYPTO_bswap4(uint32_t x) {
1200 return (__builtin_bswap32(x));
1201 }
1202
CRYPTO_load_u32_be(const void * in)1203 static inline uint32_t CRYPTO_load_u32_be(const void *in) {
1204 uint32_t v;
1205 memcpy(&v, in, sizeof (v));
1206 return (CRYPTO_bswap4(v));
1207 }
1208
CRYPTO_store_u32_be(void * out,uint32_t v)1209 static inline void CRYPTO_store_u32_be(void *out, uint32_t v) {
1210 v = CRYPTO_bswap4(v);
1211 memcpy(out, &v, sizeof (v));
1212 }
1213
aesni_gcm_encrypt_avx2(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1214 static size_t aesni_gcm_encrypt_avx2(const uint8_t *in, uint8_t *out,
1215 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1216 uint64_t *Xip)
1217 {
1218 uint8_t *ivec = (uint8_t *)iv;
1219 len &= kSizeTWithoutLower4Bits;
1220 aes_gcm_enc_update_vaes_avx2(in, out, len, key, ivec,
1221 (const uint128_t *)Htable, (uint8_t *)Xip);
1222 CRYPTO_store_u32_be(&ivec[12],
1223 CRYPTO_load_u32_be(&ivec[12]) + len / 16);
1224 return (len);
1225 }
1226 #endif /* if CAN_USE_GCM_ASM >= 2 */
1227
1228 /*
1229 * Encrypt multiple blocks of data in GCM mode.
1230 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1231 * if possible. While processing a chunk the FPU is "locked".
1232 */
1233 static int
gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size)1234 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
1235 size_t length, crypto_data_t *out, size_t block_size)
1236 {
1237 size_t bleft = length;
1238 size_t need = 0;
1239 size_t done = 0;
1240 uint8_t *datap = (uint8_t *)data;
1241 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1242 aesni_gcm_encrypt_impl *encrypt_blocks =
1243 #if CAN_USE_GCM_ASM >= 2
1244 ctx->impl == GCM_IMPL_AVX2 ?
1245 aesni_gcm_encrypt_avx2 :
1246 #endif
1247 aesni_gcm_encrypt_avx;
1248 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1249 uint64_t *ghash = ctx->gcm_ghash;
1250 uint64_t *htable = ctx->gcm_Htable;
1251 uint64_t *cb = ctx->gcm_cb;
1252 uint8_t *ct_buf = NULL;
1253 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1254 int rv = CRYPTO_SUCCESS;
1255
1256 ASSERT(block_size == GCM_BLOCK_LEN);
1257 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1258 B_FALSE);
1259 /*
1260 * If the last call left an incomplete block, try to fill
1261 * it first.
1262 */
1263 if (ctx->gcm_remainder_len > 0) {
1264 need = block_size - ctx->gcm_remainder_len;
1265 if (length < need) {
1266 /* Accumulate bytes here and return. */
1267 memcpy((uint8_t *)ctx->gcm_remainder +
1268 ctx->gcm_remainder_len, datap, length);
1269
1270 ctx->gcm_remainder_len += length;
1271 if (ctx->gcm_copy_to == NULL) {
1272 ctx->gcm_copy_to = datap;
1273 }
1274 return (CRYPTO_SUCCESS);
1275 } else {
1276 /* Complete incomplete block. */
1277 memcpy((uint8_t *)ctx->gcm_remainder +
1278 ctx->gcm_remainder_len, datap, need);
1279
1280 ctx->gcm_copy_to = NULL;
1281 }
1282 }
1283
1284 /* Allocate a buffer to encrypt to if there is enough input. */
1285 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1286 ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
1287 if (ct_buf == NULL) {
1288 return (CRYPTO_HOST_MEMORY);
1289 }
1290 }
1291
1292 /* If we completed an incomplete block, encrypt and write it out. */
1293 if (ctx->gcm_remainder_len > 0) {
1294 kfpu_begin();
1295 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1296 (const uint32_t *)cb, (uint32_t *)tmp);
1297
1298 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
1299 GHASH_AVX(ctx, tmp, block_size);
1300 clear_fpu_regs();
1301 kfpu_end();
1302 rv = crypto_put_output_data(tmp, out, block_size);
1303 out->cd_offset += block_size;
1304 gcm_incr_counter_block(ctx);
1305 ctx->gcm_processed_data_len += block_size;
1306 bleft -= need;
1307 datap += need;
1308 ctx->gcm_remainder_len = 0;
1309 }
1310
1311 /* Do the bulk encryption in chunk_size blocks. */
1312 for (; bleft >= chunk_size; bleft -= chunk_size) {
1313 kfpu_begin();
1314 done = encrypt_blocks(
1315 datap, ct_buf, chunk_size, key, cb, htable, ghash);
1316
1317 clear_fpu_regs();
1318 kfpu_end();
1319 if (done != chunk_size) {
1320 rv = CRYPTO_FAILED;
1321 goto out_nofpu;
1322 }
1323 rv = crypto_put_output_data(ct_buf, out, chunk_size);
1324 if (rv != CRYPTO_SUCCESS) {
1325 goto out_nofpu;
1326 }
1327 out->cd_offset += chunk_size;
1328 datap += chunk_size;
1329 ctx->gcm_processed_data_len += chunk_size;
1330 }
1331 /* Check if we are already done. */
1332 if (bleft == 0) {
1333 goto out_nofpu;
1334 }
1335 /* Bulk encrypt the remaining data. */
1336 kfpu_begin();
1337 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1338 done = encrypt_blocks(datap, ct_buf, bleft, key, cb, htable,
1339 ghash);
1340 if (done == 0) {
1341 rv = CRYPTO_FAILED;
1342 goto out;
1343 }
1344 rv = crypto_put_output_data(ct_buf, out, done);
1345 if (rv != CRYPTO_SUCCESS) {
1346 goto out;
1347 }
1348 out->cd_offset += done;
1349 ctx->gcm_processed_data_len += done;
1350 datap += done;
1351 bleft -= done;
1352
1353 }
1354 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1355 while (bleft > 0) {
1356 if (bleft < block_size) {
1357 memcpy(ctx->gcm_remainder, datap, bleft);
1358 ctx->gcm_remainder_len = bleft;
1359 ctx->gcm_copy_to = datap;
1360 goto out;
1361 }
1362 /* Encrypt, hash and write out. */
1363 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1364 (const uint32_t *)cb, (uint32_t *)tmp);
1365
1366 gcm_xor_avx(datap, tmp);
1367 GHASH_AVX(ctx, tmp, block_size);
1368 rv = crypto_put_output_data(tmp, out, block_size);
1369 if (rv != CRYPTO_SUCCESS) {
1370 goto out;
1371 }
1372 out->cd_offset += block_size;
1373 gcm_incr_counter_block(ctx);
1374 ctx->gcm_processed_data_len += block_size;
1375 datap += block_size;
1376 bleft -= block_size;
1377 }
1378 out:
1379 clear_fpu_regs();
1380 kfpu_end();
1381 out_nofpu:
1382 if (ct_buf != NULL) {
1383 vmem_free(ct_buf, chunk_size);
1384 }
1385 return (rv);
1386 }
1387
1388 /*
1389 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1390 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1391 */
1392 static int
gcm_encrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1393 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1394 {
1395 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
1396 uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
1397 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
1398 size_t rem_len = ctx->gcm_remainder_len;
1399 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1400 int aes_rounds = ((aes_key_t *)keysched)->nr;
1401 int rv;
1402
1403 ASSERT(block_size == GCM_BLOCK_LEN);
1404 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1405 B_FALSE);
1406
1407 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
1408 return (CRYPTO_DATA_LEN_RANGE);
1409 }
1410
1411 kfpu_begin();
1412 /* Pad last incomplete block with zeros, encrypt and hash. */
1413 if (rem_len > 0) {
1414 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1415 const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1416
1417 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
1418 memset(remainder + rem_len, 0, block_size - rem_len);
1419 for (int i = 0; i < rem_len; i++) {
1420 remainder[i] ^= tmp[i];
1421 }
1422 GHASH_AVX(ctx, remainder, block_size);
1423 ctx->gcm_processed_data_len += rem_len;
1424 /* No need to increment counter_block, it's the last block. */
1425 }
1426 /* Finish tag. */
1427 ctx->gcm_len_a_len_c[1] =
1428 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
1429 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
1430 aes_encrypt_intel(keysched, aes_rounds, J0, J0);
1431
1432 gcm_xor_avx((uint8_t *)J0, ghash);
1433 clear_fpu_regs();
1434 kfpu_end();
1435
1436 /* Output remainder. */
1437 if (rem_len > 0) {
1438 rv = crypto_put_output_data(remainder, out, rem_len);
1439 if (rv != CRYPTO_SUCCESS)
1440 return (rv);
1441 }
1442 out->cd_offset += rem_len;
1443 ctx->gcm_remainder_len = 0;
1444 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
1445 if (rv != CRYPTO_SUCCESS)
1446 return (rv);
1447
1448 out->cd_offset += ctx->gcm_tag_len;
1449 return (CRYPTO_SUCCESS);
1450 }
1451
aesni_gcm_decrypt_avx(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1452 static size_t aesni_gcm_decrypt_avx(const uint8_t *in, uint8_t *out,
1453 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1454 uint64_t *Xip)
1455 {
1456 (void) Htable;
1457 return (aesni_gcm_decrypt(in, out, len, key, iv, Xip));
1458 }
1459
1460 #if CAN_USE_GCM_ASM >= 2
aesni_gcm_decrypt_avx2(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1461 static size_t aesni_gcm_decrypt_avx2(const uint8_t *in, uint8_t *out,
1462 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1463 uint64_t *Xip)
1464 {
1465 uint8_t *ivec = (uint8_t *)iv;
1466 len &= kSizeTWithoutLower4Bits;
1467 aes_gcm_dec_update_vaes_avx2(in, out, len, key, ivec,
1468 (const uint128_t *)Htable, (uint8_t *)Xip);
1469 CRYPTO_store_u32_be(&ivec[12],
1470 CRYPTO_load_u32_be(&ivec[12]) + len / 16);
1471 return (len);
1472 }
1473 #endif /* if CAN_USE_GCM_ASM >= 2 */
1474
1475 /*
1476 * Finalize decryption: We just have accumulated crypto text, so now we
1477 * decrypt it here inplace.
1478 */
1479 static int
gcm_decrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1480 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1481 {
1482 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
1483 ASSERT3U(block_size, ==, 16);
1484 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1485 B_FALSE);
1486
1487 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1488 aesni_gcm_decrypt_impl *decrypt_blocks =
1489 #if CAN_USE_GCM_ASM >= 2
1490 ctx->impl == GCM_IMPL_AVX2 ?
1491 aesni_gcm_decrypt_avx2 :
1492 #endif
1493 aesni_gcm_decrypt_avx;
1494 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
1495 uint8_t *datap = ctx->gcm_pt_buf;
1496 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1497 uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1498 uint64_t *htable = ctx->gcm_Htable;
1499 uint64_t *ghash = ctx->gcm_ghash;
1500 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
1501 int rv = CRYPTO_SUCCESS;
1502 size_t bleft, done;
1503
1504 /*
1505 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1506 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1507 * GCM_AVX_MIN_DECRYPT_BYTES.
1508 */
1509 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
1510 kfpu_begin();
1511 done = decrypt_blocks(datap, datap, chunk_size,
1512 (const void *)key, ctx->gcm_cb, htable, ghash);
1513 clear_fpu_regs();
1514 kfpu_end();
1515 if (done != chunk_size) {
1516 return (CRYPTO_FAILED);
1517 }
1518 datap += done;
1519 }
1520 /* Decrypt remainder, which is less than chunk size, in one go. */
1521 kfpu_begin();
1522 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
1523 done = decrypt_blocks(datap, datap, bleft,
1524 (const void *)key, ctx->gcm_cb, htable, ghash);
1525 if (done == 0) {
1526 clear_fpu_regs();
1527 kfpu_end();
1528 return (CRYPTO_FAILED);
1529 }
1530 datap += done;
1531 bleft -= done;
1532 }
1533 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
1534
1535 /*
1536 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1537 * decrypt them block by block.
1538 */
1539 while (bleft > 0) {
1540 /* Incomplete last block. */
1541 if (bleft < block_size) {
1542 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
1543
1544 memset(lastb, 0, block_size);
1545 memcpy(lastb, datap, bleft);
1546 /* The GCM processing. */
1547 GHASH_AVX(ctx, lastb, block_size);
1548 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1549 for (size_t i = 0; i < bleft; i++) {
1550 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
1551 }
1552 break;
1553 }
1554 /* The GCM processing. */
1555 GHASH_AVX(ctx, datap, block_size);
1556 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1557 gcm_xor_avx((uint8_t *)tmp, datap);
1558 gcm_incr_counter_block(ctx);
1559
1560 datap += block_size;
1561 bleft -= block_size;
1562 }
1563 if (rv != CRYPTO_SUCCESS) {
1564 clear_fpu_regs();
1565 kfpu_end();
1566 return (rv);
1567 }
1568 /* Decryption done, finish the tag. */
1569 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
1570 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
1571 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
1572 (uint32_t *)ctx->gcm_J0);
1573
1574 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
1575
1576 /* We are done with the FPU, restore its state. */
1577 clear_fpu_regs();
1578 kfpu_end();
1579
1580 /* Compare the input authentication tag with what we calculated. */
1581 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
1582 /* They don't match. */
1583 return (CRYPTO_INVALID_MAC);
1584 }
1585 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
1586 if (rv != CRYPTO_SUCCESS) {
1587 return (rv);
1588 }
1589 out->cd_offset += pt_len;
1590 return (CRYPTO_SUCCESS);
1591 }
1592
1593 /*
1594 * Initialize the GCM params H, Htabtle and the counter block. Save the
1595 * initial counter block.
1596 */
1597 static int
gcm_init_avx(gcm_ctx_t * ctx,const uint8_t * iv,size_t iv_len,const uint8_t * auth_data,size_t auth_data_len,size_t block_size)1598 gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
1599 const uint8_t *auth_data, size_t auth_data_len, size_t block_size)
1600 {
1601 uint8_t *cb = (uint8_t *)ctx->gcm_cb;
1602 uint64_t *H = ctx->gcm_H;
1603 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1604 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
1605 const uint8_t *datap = auth_data;
1606 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1607 size_t bleft;
1608
1609 ASSERT(block_size == GCM_BLOCK_LEN);
1610 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1611 B_FALSE);
1612
1613 size_t htab_len = 0;
1614 #if CAN_USE_GCM_ASM >= 2
1615 if (ctx->impl == GCM_IMPL_AVX2) {
1616 /*
1617 * BoringSSL's API specifies uint128_t[16] for htab; but only
1618 * uint128_t[12] are used.
1619 * See https://github.com/google/boringssl/blob/
1620 * 813840dd094f9e9c1b00a7368aa25e656554221f1/crypto/fipsmodule/
1621 * modes/asm/aes-gcm-avx2-x86_64.pl#L198-L200
1622 */
1623 htab_len = (2 * 8 * sizeof (uint128_t));
1624 } else
1625 #endif /* CAN_USE_GCM_ASM >= 2 */
1626 {
1627 htab_len = (2 * 6 * sizeof (uint128_t));
1628 }
1629
1630 ctx->gcm_Htable = kmem_alloc(htab_len, KM_SLEEP);
1631 if (ctx->gcm_Htable == NULL) {
1632 return (CRYPTO_HOST_MEMORY);
1633 }
1634
1635 /* Init H (encrypt zero block) and create the initial counter block. */
1636 memset(H, 0, sizeof (ctx->gcm_H));
1637 kfpu_begin();
1638 aes_encrypt_intel(keysched, aes_rounds,
1639 (const uint32_t *)H, (uint32_t *)H);
1640
1641 #if CAN_USE_GCM_ASM >= 2
1642 if (ctx->impl == GCM_IMPL_AVX2) {
1643 gcm_init_vpclmulqdq_avx2((uint128_t *)ctx->gcm_Htable, H);
1644 } else
1645 #endif /* if CAN_USE_GCM_ASM >= 2 */
1646 {
1647 gcm_init_htab_avx(ctx->gcm_Htable, H);
1648 }
1649
1650 if (iv_len == 12) {
1651 memcpy(cb, iv, 12);
1652 cb[12] = 0;
1653 cb[13] = 0;
1654 cb[14] = 0;
1655 cb[15] = 1;
1656 /* We need the ICB later. */
1657 memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
1658 } else {
1659 /*
1660 * Most consumers use 12 byte IVs, so it's OK to use the
1661 * original routines for other IV sizes, just avoid nesting
1662 * kfpu_begin calls.
1663 */
1664 clear_fpu_regs();
1665 kfpu_end();
1666 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
1667 aes_copy_block, aes_xor_block);
1668 kfpu_begin();
1669 }
1670
1671 memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
1672
1673 /* Openssl post increments the counter, adjust for that. */
1674 gcm_incr_counter_block(ctx);
1675
1676 /* Ghash AAD in chunk_size blocks. */
1677 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
1678 GHASH_AVX(ctx, datap, chunk_size);
1679 datap += chunk_size;
1680 clear_fpu_regs();
1681 kfpu_end();
1682 kfpu_begin();
1683 }
1684 /* Ghash the remainder and handle possible incomplete GCM block. */
1685 if (bleft > 0) {
1686 size_t incomp = bleft % block_size;
1687
1688 bleft -= incomp;
1689 if (bleft > 0) {
1690 GHASH_AVX(ctx, datap, bleft);
1691 datap += bleft;
1692 }
1693 if (incomp > 0) {
1694 /* Zero pad and hash incomplete last block. */
1695 uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
1696
1697 memset(authp, 0, block_size);
1698 memcpy(authp, datap, incomp);
1699 GHASH_AVX(ctx, authp, block_size);
1700 }
1701 }
1702 clear_fpu_regs();
1703 kfpu_end();
1704 return (CRYPTO_SUCCESS);
1705 }
1706
1707 #if defined(_KERNEL)
1708 static int
icp_gcm_avx_set_chunk_size(const char * buf,zfs_kernel_param_t * kp)1709 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
1710 {
1711 unsigned long val;
1712 char val_rounded[16];
1713 int error = 0;
1714
1715 error = kstrtoul(buf, 0, &val);
1716 if (error)
1717 return (error);
1718
1719 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1720
1721 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
1722 return (-EINVAL);
1723
1724 snprintf(val_rounded, 16, "%u", (uint32_t)val);
1725 error = param_set_uint(val_rounded, kp);
1726 return (error);
1727 }
1728
1729 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
1730 param_get_uint, &gcm_avx_chunk_size, 0644);
1731
1732 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
1733 "How many bytes to process while owning the FPU");
1734
1735 #endif /* defined(__KERNEL) */
1736 #endif /* ifdef CAN_USE_GCM_ASM */
1737