1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
14
15 #include "idxd.h"
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
18
19 #ifdef pr_fmt
20 #undef pr_fmt
21 #endif
22
23 #define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
24
25 #define IAA_ALG_PRIORITY 300
26
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa;
29 static unsigned int nr_cpus;
30 static unsigned int nr_nodes;
31 static unsigned int nr_cpus_per_node;
32
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa;
35
36 static struct crypto_comp *deflate_generic_tfm;
37
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu *wq_table;
40
wq_table_next_wq(int cpu)41 static struct idxd_wq *wq_table_next_wq(int cpu)
42 {
43 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
44
45 if (++entry->cur_wq >= entry->n_wqs)
46 entry->cur_wq = 0;
47
48 if (!entry->wqs[entry->cur_wq])
49 return NULL;
50
51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 entry->wqs[entry->cur_wq]->id, cpu);
54
55 return entry->wqs[entry->cur_wq];
56 }
57
wq_table_add(int cpu,struct idxd_wq * wq)58 static void wq_table_add(int cpu, struct idxd_wq *wq)
59 {
60 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
61
62 if (WARN_ON(entry->n_wqs == entry->max_wqs))
63 return;
64
65 entry->wqs[entry->n_wqs++] = wq;
66
67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
70 }
71
wq_table_free_entry(int cpu)72 static void wq_table_free_entry(int cpu)
73 {
74 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
75
76 kfree(entry->wqs);
77 memset(entry, 0, sizeof(*entry));
78 }
79
wq_table_clear_entry(int cpu)80 static void wq_table_clear_entry(int cpu)
81 {
82 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
83
84 entry->n_wqs = 0;
85 entry->cur_wq = 0;
86 memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
87 }
88
89 LIST_HEAD(iaa_devices);
90 DEFINE_MUTEX(iaa_devices_lock);
91
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled;
94 static bool iaa_crypto_registered;
95
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress = true;
98
verify_compress_show(struct device_driver * driver,char * buf)99 static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
100 {
101 return sprintf(buf, "%d\n", iaa_verify_compress);
102 }
103
verify_compress_store(struct device_driver * driver,const char * buf,size_t count)104 static ssize_t verify_compress_store(struct device_driver *driver,
105 const char *buf, size_t count)
106 {
107 int ret = -EBUSY;
108
109 mutex_lock(&iaa_devices_lock);
110
111 if (iaa_crypto_enabled)
112 goto out;
113
114 ret = kstrtobool(buf, &iaa_verify_compress);
115 if (ret)
116 goto out;
117
118 ret = count;
119 out:
120 mutex_unlock(&iaa_devices_lock);
121
122 return ret;
123 }
124 static DRIVER_ATTR_RW(verify_compress);
125
126 /*
127 * The iaa crypto driver supports three 'sync' methods determining how
128 * compressions and decompressions are performed:
129 *
130 * - sync: the compression or decompression completes before
131 * returning. This is the mode used by the async crypto
132 * interface when the sync mode is set to 'sync' and by
133 * the sync crypto interface regardless of setting.
134 *
135 * - async: the compression or decompression is submitted and returns
136 * immediately. Completion interrupts are not used so
137 * the caller is responsible for polling the descriptor
138 * for completion. This mode is applicable to only the
139 * async crypto interface and is ignored for anything
140 * else.
141 *
142 * - async_irq: the compression or decompression is submitted and
143 * returns immediately. Completion interrupts are
144 * enabled so the caller can wait for the completion and
145 * yield to other threads. When the compression or
146 * decompression completes, the completion is signaled
147 * and the caller awakened. This mode is applicable to
148 * only the async crypto interface and is ignored for
149 * anything else.
150 *
151 * These modes can be set using the iaa_crypto sync_mode driver
152 * attribute.
153 */
154
155 /* Use async mode */
156 static bool async_mode;
157 /* Use interrupts */
158 static bool use_irq;
159
160 /**
161 * set_iaa_sync_mode - Set IAA sync mode
162 * @name: The name of the sync mode
163 *
164 * Make the IAA sync mode named @name the current sync mode used by
165 * compression/decompression.
166 */
167
set_iaa_sync_mode(const char * name)168 static int set_iaa_sync_mode(const char *name)
169 {
170 int ret = 0;
171
172 if (sysfs_streq(name, "sync")) {
173 async_mode = false;
174 use_irq = false;
175 } else if (sysfs_streq(name, "async")) {
176 async_mode = true;
177 use_irq = false;
178 } else if (sysfs_streq(name, "async_irq")) {
179 async_mode = true;
180 use_irq = true;
181 } else {
182 ret = -EINVAL;
183 }
184
185 return ret;
186 }
187
sync_mode_show(struct device_driver * driver,char * buf)188 static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
189 {
190 int ret = 0;
191
192 if (!async_mode && !use_irq)
193 ret = sprintf(buf, "%s\n", "sync");
194 else if (async_mode && !use_irq)
195 ret = sprintf(buf, "%s\n", "async");
196 else if (async_mode && use_irq)
197 ret = sprintf(buf, "%s\n", "async_irq");
198
199 return ret;
200 }
201
sync_mode_store(struct device_driver * driver,const char * buf,size_t count)202 static ssize_t sync_mode_store(struct device_driver *driver,
203 const char *buf, size_t count)
204 {
205 int ret = -EBUSY;
206
207 mutex_lock(&iaa_devices_lock);
208
209 if (iaa_crypto_enabled)
210 goto out;
211
212 ret = set_iaa_sync_mode(buf);
213 if (ret == 0)
214 ret = count;
215 out:
216 mutex_unlock(&iaa_devices_lock);
217
218 return ret;
219 }
220 static DRIVER_ATTR_RW(sync_mode);
221
222 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
223
find_empty_iaa_compression_mode(void)224 static int find_empty_iaa_compression_mode(void)
225 {
226 int i = -EINVAL;
227
228 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 if (iaa_compression_modes[i])
230 continue;
231 break;
232 }
233
234 return i;
235 }
236
find_iaa_compression_mode(const char * name,int * idx)237 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
238 {
239 struct iaa_compression_mode *mode;
240 int i;
241
242 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 mode = iaa_compression_modes[i];
244 if (!mode)
245 continue;
246
247 if (!strcmp(mode->name, name)) {
248 *idx = i;
249 return iaa_compression_modes[i];
250 }
251 }
252
253 return NULL;
254 }
255
free_iaa_compression_mode(struct iaa_compression_mode * mode)256 static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
257 {
258 kfree(mode->name);
259 kfree(mode->ll_table);
260 kfree(mode->d_table);
261
262 kfree(mode);
263 }
264
265 /*
266 * IAA Compression modes are defined by an ll_table and a d_table.
267 * These tables are typically generated and captured using statistics
268 * collected from running actual compress/decompress workloads.
269 *
270 * A module or other kernel code can add and remove compression modes
271 * with a given name using the exported @add_iaa_compression_mode()
272 * and @remove_iaa_compression_mode functions.
273 *
274 * When a new compression mode is added, the tables are saved in a
275 * global compression mode list. When IAA devices are added, a
276 * per-IAA device dma mapping is created for each IAA device, for each
277 * compression mode. These are the tables used to do the actual
278 * compression/deccompression and are unmapped if/when the devices are
279 * removed. Currently, compression modes must be added before any
280 * device is added, and removed after all devices have been removed.
281 */
282
283 /**
284 * remove_iaa_compression_mode - Remove an IAA compression mode
285 * @name: The name the compression mode will be known as
286 *
287 * Remove the IAA compression mode named @name.
288 */
remove_iaa_compression_mode(const char * name)289 void remove_iaa_compression_mode(const char *name)
290 {
291 struct iaa_compression_mode *mode;
292 int idx;
293
294 mutex_lock(&iaa_devices_lock);
295
296 if (!list_empty(&iaa_devices))
297 goto out;
298
299 mode = find_iaa_compression_mode(name, &idx);
300 if (mode) {
301 free_iaa_compression_mode(mode);
302 iaa_compression_modes[idx] = NULL;
303 }
304 out:
305 mutex_unlock(&iaa_devices_lock);
306 }
307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
308
309 /**
310 * add_iaa_compression_mode - Add an IAA compression mode
311 * @name: The name the compression mode will be known as
312 * @ll_table: The ll table
313 * @ll_table_size: The ll table size in bytes
314 * @d_table: The d table
315 * @d_table_size: The d table size in bytes
316 * @init: Optional callback function to init the compression mode data
317 * @free: Optional callback function to free the compression mode data
318 *
319 * Add a new IAA compression mode named @name.
320 *
321 * Returns 0 if successful, errcode otherwise.
322 */
add_iaa_compression_mode(const char * name,const u32 * ll_table,int ll_table_size,const u32 * d_table,int d_table_size,iaa_dev_comp_init_fn_t init,iaa_dev_comp_free_fn_t free)323 int add_iaa_compression_mode(const char *name,
324 const u32 *ll_table,
325 int ll_table_size,
326 const u32 *d_table,
327 int d_table_size,
328 iaa_dev_comp_init_fn_t init,
329 iaa_dev_comp_free_fn_t free)
330 {
331 struct iaa_compression_mode *mode;
332 int idx, ret = -ENOMEM;
333
334 mutex_lock(&iaa_devices_lock);
335
336 if (!list_empty(&iaa_devices)) {
337 ret = -EBUSY;
338 goto out;
339 }
340
341 mode = kzalloc(sizeof(*mode), GFP_KERNEL);
342 if (!mode)
343 goto out;
344
345 mode->name = kstrdup(name, GFP_KERNEL);
346 if (!mode->name)
347 goto free;
348
349 if (ll_table) {
350 mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL);
351 if (!mode->ll_table)
352 goto free;
353 mode->ll_table_size = ll_table_size;
354 }
355
356 if (d_table) {
357 mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL);
358 if (!mode->d_table)
359 goto free;
360 mode->d_table_size = d_table_size;
361 }
362
363 mode->init = init;
364 mode->free = free;
365
366 idx = find_empty_iaa_compression_mode();
367 if (idx < 0)
368 goto free;
369
370 pr_debug("IAA compression mode %s added at idx %d\n",
371 mode->name, idx);
372
373 iaa_compression_modes[idx] = mode;
374
375 ret = 0;
376 out:
377 mutex_unlock(&iaa_devices_lock);
378
379 return ret;
380 free:
381 free_iaa_compression_mode(mode);
382 goto out;
383 }
384 EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
385
386 static struct iaa_device_compression_mode *
get_iaa_device_compression_mode(struct iaa_device * iaa_device,int idx)387 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
388 {
389 return iaa_device->compression_modes[idx];
390 }
391
free_device_compression_mode(struct iaa_device * iaa_device,struct iaa_device_compression_mode * device_mode)392 static void free_device_compression_mode(struct iaa_device *iaa_device,
393 struct iaa_device_compression_mode *device_mode)
394 {
395 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
396 struct device *dev = &iaa_device->idxd->pdev->dev;
397
398 kfree(device_mode->name);
399
400 if (device_mode->aecs_comp_table)
401 dma_free_coherent(dev, size, device_mode->aecs_comp_table,
402 device_mode->aecs_comp_table_dma_addr);
403 kfree(device_mode);
404 }
405
406 #define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
407 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
408 #define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
409 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
410 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
411 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
412 IDXD_OP_FLAG_AECS_RW_TGLS)
413
414 static int check_completion(struct device *dev,
415 struct iax_completion_record *comp,
416 bool compress,
417 bool only_once);
418
init_device_compression_mode(struct iaa_device * iaa_device,struct iaa_compression_mode * mode,int idx,struct idxd_wq * wq)419 static int init_device_compression_mode(struct iaa_device *iaa_device,
420 struct iaa_compression_mode *mode,
421 int idx, struct idxd_wq *wq)
422 {
423 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
424 struct device *dev = &iaa_device->idxd->pdev->dev;
425 struct iaa_device_compression_mode *device_mode;
426 int ret = -ENOMEM;
427
428 device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
429 if (!device_mode)
430 return -ENOMEM;
431
432 device_mode->name = kstrdup(mode->name, GFP_KERNEL);
433 if (!device_mode->name)
434 goto free;
435
436 device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
437 &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
438 if (!device_mode->aecs_comp_table)
439 goto free;
440
441 /* Add Huffman table to aecs */
442 memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
443 memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
444 memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
445
446 if (mode->init) {
447 ret = mode->init(device_mode);
448 if (ret)
449 goto free;
450 }
451
452 /* mode index should match iaa_compression_modes idx */
453 iaa_device->compression_modes[idx] = device_mode;
454
455 pr_debug("IAA %s compression mode initialized for iaa device %d\n",
456 mode->name, iaa_device->idxd->id);
457
458 ret = 0;
459 out:
460 return ret;
461 free:
462 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
463 mode->name, iaa_device->idxd->id);
464
465 free_device_compression_mode(iaa_device, device_mode);
466 goto out;
467 }
468
init_device_compression_modes(struct iaa_device * iaa_device,struct idxd_wq * wq)469 static int init_device_compression_modes(struct iaa_device *iaa_device,
470 struct idxd_wq *wq)
471 {
472 struct iaa_compression_mode *mode;
473 int i, ret = 0;
474
475 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
476 mode = iaa_compression_modes[i];
477 if (!mode)
478 continue;
479
480 ret = init_device_compression_mode(iaa_device, mode, i, wq);
481 if (ret)
482 break;
483 }
484
485 return ret;
486 }
487
remove_device_compression_modes(struct iaa_device * iaa_device)488 static void remove_device_compression_modes(struct iaa_device *iaa_device)
489 {
490 struct iaa_device_compression_mode *device_mode;
491 int i;
492
493 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
494 device_mode = iaa_device->compression_modes[i];
495 if (!device_mode)
496 continue;
497
498 if (iaa_compression_modes[i]->free)
499 iaa_compression_modes[i]->free(device_mode);
500 free_device_compression_mode(iaa_device, device_mode);
501 iaa_device->compression_modes[i] = NULL;
502 }
503 }
504
iaa_device_alloc(void)505 static struct iaa_device *iaa_device_alloc(void)
506 {
507 struct iaa_device *iaa_device;
508
509 iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
510 if (!iaa_device)
511 return NULL;
512
513 INIT_LIST_HEAD(&iaa_device->wqs);
514
515 return iaa_device;
516 }
517
iaa_has_wq(struct iaa_device * iaa_device,struct idxd_wq * wq)518 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
519 {
520 struct iaa_wq *iaa_wq;
521
522 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
523 if (iaa_wq->wq == wq)
524 return true;
525 }
526
527 return false;
528 }
529
add_iaa_device(struct idxd_device * idxd)530 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
531 {
532 struct iaa_device *iaa_device;
533
534 iaa_device = iaa_device_alloc();
535 if (!iaa_device)
536 return NULL;
537
538 iaa_device->idxd = idxd;
539
540 list_add_tail(&iaa_device->list, &iaa_devices);
541
542 nr_iaa++;
543
544 return iaa_device;
545 }
546
init_iaa_device(struct iaa_device * iaa_device,struct iaa_wq * iaa_wq)547 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
548 {
549 int ret = 0;
550
551 ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
552 if (ret)
553 return ret;
554
555 return ret;
556 }
557
del_iaa_device(struct iaa_device * iaa_device)558 static void del_iaa_device(struct iaa_device *iaa_device)
559 {
560 list_del(&iaa_device->list);
561
562 nr_iaa--;
563 }
564
add_iaa_wq(struct iaa_device * iaa_device,struct idxd_wq * wq,struct iaa_wq ** new_wq)565 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
566 struct iaa_wq **new_wq)
567 {
568 struct idxd_device *idxd = iaa_device->idxd;
569 struct pci_dev *pdev = idxd->pdev;
570 struct device *dev = &pdev->dev;
571 struct iaa_wq *iaa_wq;
572
573 iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
574 if (!iaa_wq)
575 return -ENOMEM;
576
577 iaa_wq->wq = wq;
578 iaa_wq->iaa_device = iaa_device;
579 idxd_wq_set_private(wq, iaa_wq);
580
581 list_add_tail(&iaa_wq->list, &iaa_device->wqs);
582
583 iaa_device->n_wq++;
584
585 if (new_wq)
586 *new_wq = iaa_wq;
587
588 dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
589 wq->id, iaa_device->idxd->id, iaa_device->n_wq);
590
591 return 0;
592 }
593
del_iaa_wq(struct iaa_device * iaa_device,struct idxd_wq * wq)594 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
595 {
596 struct idxd_device *idxd = iaa_device->idxd;
597 struct pci_dev *pdev = idxd->pdev;
598 struct device *dev = &pdev->dev;
599 struct iaa_wq *iaa_wq;
600
601 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
602 if (iaa_wq->wq == wq) {
603 list_del(&iaa_wq->list);
604 iaa_device->n_wq--;
605
606 dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
607 wq->id, iaa_device->idxd->id,
608 iaa_device->n_wq, nr_iaa);
609
610 if (iaa_device->n_wq == 0)
611 del_iaa_device(iaa_device);
612 break;
613 }
614 }
615 }
616
clear_wq_table(void)617 static void clear_wq_table(void)
618 {
619 int cpu;
620
621 for (cpu = 0; cpu < nr_cpus; cpu++)
622 wq_table_clear_entry(cpu);
623
624 pr_debug("cleared wq table\n");
625 }
626
free_iaa_device(struct iaa_device * iaa_device)627 static void free_iaa_device(struct iaa_device *iaa_device)
628 {
629 if (!iaa_device)
630 return;
631
632 remove_device_compression_modes(iaa_device);
633 kfree(iaa_device);
634 }
635
__free_iaa_wq(struct iaa_wq * iaa_wq)636 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
637 {
638 struct iaa_device *iaa_device;
639
640 if (!iaa_wq)
641 return;
642
643 iaa_device = iaa_wq->iaa_device;
644 if (iaa_device->n_wq == 0)
645 free_iaa_device(iaa_wq->iaa_device);
646 }
647
free_iaa_wq(struct iaa_wq * iaa_wq)648 static void free_iaa_wq(struct iaa_wq *iaa_wq)
649 {
650 struct idxd_wq *wq;
651
652 __free_iaa_wq(iaa_wq);
653
654 wq = iaa_wq->wq;
655
656 kfree(iaa_wq);
657 idxd_wq_set_private(wq, NULL);
658 }
659
iaa_wq_get(struct idxd_wq * wq)660 static int iaa_wq_get(struct idxd_wq *wq)
661 {
662 struct idxd_device *idxd = wq->idxd;
663 struct iaa_wq *iaa_wq;
664 int ret = 0;
665
666 spin_lock(&idxd->dev_lock);
667 iaa_wq = idxd_wq_get_private(wq);
668 if (iaa_wq && !iaa_wq->remove) {
669 iaa_wq->ref++;
670 idxd_wq_get(wq);
671 } else {
672 ret = -ENODEV;
673 }
674 spin_unlock(&idxd->dev_lock);
675
676 return ret;
677 }
678
iaa_wq_put(struct idxd_wq * wq)679 static int iaa_wq_put(struct idxd_wq *wq)
680 {
681 struct idxd_device *idxd = wq->idxd;
682 struct iaa_wq *iaa_wq;
683 bool free = false;
684 int ret = 0;
685
686 spin_lock(&idxd->dev_lock);
687 iaa_wq = idxd_wq_get_private(wq);
688 if (iaa_wq) {
689 iaa_wq->ref--;
690 if (iaa_wq->ref == 0 && iaa_wq->remove) {
691 idxd_wq_set_private(wq, NULL);
692 free = true;
693 }
694 idxd_wq_put(wq);
695 } else {
696 ret = -ENODEV;
697 }
698 spin_unlock(&idxd->dev_lock);
699 if (free) {
700 __free_iaa_wq(iaa_wq);
701 kfree(iaa_wq);
702 }
703
704 return ret;
705 }
706
free_wq_table(void)707 static void free_wq_table(void)
708 {
709 int cpu;
710
711 for (cpu = 0; cpu < nr_cpus; cpu++)
712 wq_table_free_entry(cpu);
713
714 free_percpu(wq_table);
715
716 pr_debug("freed wq table\n");
717 }
718
alloc_wq_table(int max_wqs)719 static int alloc_wq_table(int max_wqs)
720 {
721 struct wq_table_entry *entry;
722 int cpu;
723
724 wq_table = alloc_percpu(struct wq_table_entry);
725 if (!wq_table)
726 return -ENOMEM;
727
728 for (cpu = 0; cpu < nr_cpus; cpu++) {
729 entry = per_cpu_ptr(wq_table, cpu);
730 entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
731 if (!entry->wqs) {
732 free_wq_table();
733 return -ENOMEM;
734 }
735
736 entry->max_wqs = max_wqs;
737 }
738
739 pr_debug("initialized wq table\n");
740
741 return 0;
742 }
743
save_iaa_wq(struct idxd_wq * wq)744 static int save_iaa_wq(struct idxd_wq *wq)
745 {
746 struct iaa_device *iaa_device, *found = NULL;
747 struct idxd_device *idxd;
748 struct pci_dev *pdev;
749 struct device *dev;
750 int ret = 0;
751
752 list_for_each_entry(iaa_device, &iaa_devices, list) {
753 if (iaa_device->idxd == wq->idxd) {
754 idxd = iaa_device->idxd;
755 pdev = idxd->pdev;
756 dev = &pdev->dev;
757 /*
758 * Check to see that we don't already have this wq.
759 * Shouldn't happen but we don't control probing.
760 */
761 if (iaa_has_wq(iaa_device, wq)) {
762 dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
763 iaa_device);
764 goto out;
765 }
766
767 found = iaa_device;
768
769 ret = add_iaa_wq(iaa_device, wq, NULL);
770 if (ret)
771 goto out;
772
773 break;
774 }
775 }
776
777 if (!found) {
778 struct iaa_device *new_device;
779 struct iaa_wq *new_wq;
780
781 new_device = add_iaa_device(wq->idxd);
782 if (!new_device) {
783 ret = -ENOMEM;
784 goto out;
785 }
786
787 ret = add_iaa_wq(new_device, wq, &new_wq);
788 if (ret) {
789 del_iaa_device(new_device);
790 free_iaa_device(new_device);
791 goto out;
792 }
793
794 ret = init_iaa_device(new_device, new_wq);
795 if (ret) {
796 del_iaa_wq(new_device, new_wq->wq);
797 del_iaa_device(new_device);
798 free_iaa_wq(new_wq);
799 goto out;
800 }
801 }
802
803 if (WARN_ON(nr_iaa == 0))
804 return -EINVAL;
805
806 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
807 if (!cpus_per_iaa)
808 cpus_per_iaa = 1;
809 out:
810 return 0;
811 }
812
remove_iaa_wq(struct idxd_wq * wq)813 static void remove_iaa_wq(struct idxd_wq *wq)
814 {
815 struct iaa_device *iaa_device;
816
817 list_for_each_entry(iaa_device, &iaa_devices, list) {
818 if (iaa_has_wq(iaa_device, wq)) {
819 del_iaa_wq(iaa_device, wq);
820 break;
821 }
822 }
823
824 if (nr_iaa) {
825 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
826 if (!cpus_per_iaa)
827 cpus_per_iaa = 1;
828 } else
829 cpus_per_iaa = 1;
830 }
831
wq_table_add_wqs(int iaa,int cpu)832 static int wq_table_add_wqs(int iaa, int cpu)
833 {
834 struct iaa_device *iaa_device, *found_device = NULL;
835 int ret = 0, cur_iaa = 0, n_wqs_added = 0;
836 struct idxd_device *idxd;
837 struct iaa_wq *iaa_wq;
838 struct pci_dev *pdev;
839 struct device *dev;
840
841 list_for_each_entry(iaa_device, &iaa_devices, list) {
842 idxd = iaa_device->idxd;
843 pdev = idxd->pdev;
844 dev = &pdev->dev;
845
846 if (cur_iaa != iaa) {
847 cur_iaa++;
848 continue;
849 }
850
851 found_device = iaa_device;
852 dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
853 found_device->idxd->id, cur_iaa);
854 break;
855 }
856
857 if (!found_device) {
858 found_device = list_first_entry_or_null(&iaa_devices,
859 struct iaa_device, list);
860 if (!found_device) {
861 pr_debug("couldn't find any iaa devices with wqs!\n");
862 ret = -EINVAL;
863 goto out;
864 }
865 cur_iaa = 0;
866
867 idxd = found_device->idxd;
868 pdev = idxd->pdev;
869 dev = &pdev->dev;
870 dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
871 found_device->idxd->id, cur_iaa);
872 }
873
874 list_for_each_entry(iaa_wq, &found_device->wqs, list) {
875 wq_table_add(cpu, iaa_wq->wq);
876 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
877 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
878 n_wqs_added++;
879 }
880
881 if (!n_wqs_added) {
882 pr_debug("couldn't find any iaa wqs!\n");
883 ret = -EINVAL;
884 goto out;
885 }
886 out:
887 return ret;
888 }
889
890 /*
891 * Rebalance the wq table so that given a cpu, it's easy to find the
892 * closest IAA instance. The idea is to try to choose the most
893 * appropriate IAA instance for a caller and spread available
894 * workqueues around to clients.
895 */
rebalance_wq_table(void)896 static void rebalance_wq_table(void)
897 {
898 const struct cpumask *node_cpus;
899 int node, cpu, iaa = -1;
900
901 if (nr_iaa == 0)
902 return;
903
904 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
905 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
906
907 clear_wq_table();
908
909 if (nr_iaa == 1) {
910 for (cpu = 0; cpu < nr_cpus; cpu++) {
911 if (WARN_ON(wq_table_add_wqs(0, cpu))) {
912 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
913 return;
914 }
915 }
916
917 return;
918 }
919
920 for_each_node_with_cpus(node) {
921 node_cpus = cpumask_of_node(node);
922
923 for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
924 int node_cpu = cpumask_nth(cpu, node_cpus);
925
926 if (WARN_ON(node_cpu >= nr_cpu_ids)) {
927 pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
928 return;
929 }
930
931 if ((cpu % cpus_per_iaa) == 0)
932 iaa++;
933
934 if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
935 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
936 return;
937 }
938 }
939 }
940 }
941
check_completion(struct device * dev,struct iax_completion_record * comp,bool compress,bool only_once)942 static inline int check_completion(struct device *dev,
943 struct iax_completion_record *comp,
944 bool compress,
945 bool only_once)
946 {
947 char *op_str = compress ? "compress" : "decompress";
948 int status_checks = 0;
949 int ret = 0;
950
951 while (!comp->status) {
952 if (only_once)
953 return -EAGAIN;
954 cpu_relax();
955 if (status_checks++ >= IAA_COMPLETION_TIMEOUT) {
956 /* Something is wrong with the hw, disable it. */
957 dev_err(dev, "%s completion timed out - "
958 "assuming broken hw, iaa_crypto now DISABLED\n",
959 op_str);
960 iaa_crypto_enabled = false;
961 ret = -ETIMEDOUT;
962 goto out;
963 }
964 }
965
966 if (comp->status != IAX_COMP_SUCCESS) {
967 if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
968 ret = -ETIMEDOUT;
969 dev_dbg(dev, "%s timed out, size=0x%x\n",
970 op_str, comp->output_size);
971 update_completion_timeout_errs();
972 goto out;
973 }
974
975 if (comp->status == IAA_ANALYTICS_ERROR &&
976 comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
977 ret = -E2BIG;
978 dev_dbg(dev, "compressed > uncompressed size,"
979 " not compressing, size=0x%x\n",
980 comp->output_size);
981 update_completion_comp_buf_overflow_errs();
982 goto out;
983 }
984
985 if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
986 ret = -EOVERFLOW;
987 goto out;
988 }
989
990 ret = -EINVAL;
991 dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
992 op_str, comp->status, comp->error_code, comp->output_size);
993 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
994 update_completion_einval_errs();
995
996 goto out;
997 }
998 out:
999 return ret;
1000 }
1001
deflate_generic_decompress(struct acomp_req * req)1002 static int deflate_generic_decompress(struct acomp_req *req)
1003 {
1004 void *src, *dst;
1005 int ret;
1006
1007 src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1008 dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1009
1010 ret = crypto_comp_decompress(deflate_generic_tfm,
1011 src, req->slen, dst, &req->dlen);
1012
1013 kunmap_local(src);
1014 kunmap_local(dst);
1015
1016 update_total_sw_decomp_calls();
1017
1018 return ret;
1019 }
1020
1021 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1022 struct acomp_req *req,
1023 dma_addr_t *src_addr, dma_addr_t *dst_addr);
1024
1025 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1026 struct idxd_wq *wq,
1027 dma_addr_t src_addr, unsigned int slen,
1028 dma_addr_t dst_addr, unsigned int *dlen,
1029 u32 compression_crc);
1030
iaa_desc_complete(struct idxd_desc * idxd_desc,enum idxd_complete_type comp_type,bool free_desc,void * __ctx,u32 * status)1031 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1032 enum idxd_complete_type comp_type,
1033 bool free_desc, void *__ctx,
1034 u32 *status)
1035 {
1036 struct iaa_device_compression_mode *active_compression_mode;
1037 struct iaa_compression_ctx *compression_ctx;
1038 struct crypto_ctx *ctx = __ctx;
1039 struct iaa_device *iaa_device;
1040 struct idxd_device *idxd;
1041 struct iaa_wq *iaa_wq;
1042 struct pci_dev *pdev;
1043 struct device *dev;
1044 int ret, err = 0;
1045
1046 compression_ctx = crypto_tfm_ctx(ctx->tfm);
1047
1048 iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1049 iaa_device = iaa_wq->iaa_device;
1050 idxd = iaa_device->idxd;
1051 pdev = idxd->pdev;
1052 dev = &pdev->dev;
1053
1054 active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1055 compression_ctx->mode);
1056 dev_dbg(dev, "%s: compression mode %s,"
1057 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1058 active_compression_mode->name,
1059 ctx->src_addr, ctx->dst_addr);
1060
1061 ret = check_completion(dev, idxd_desc->iax_completion,
1062 ctx->compress, false);
1063 if (ret) {
1064 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1065 if (!ctx->compress &&
1066 idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1067 pr_warn("%s: falling back to deflate-generic decompress, "
1068 "analytics error code %x\n", __func__,
1069 idxd_desc->iax_completion->error_code);
1070 ret = deflate_generic_decompress(ctx->req);
1071 if (ret) {
1072 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1073 __func__, ret);
1074 err = -EIO;
1075 goto err;
1076 }
1077 } else {
1078 err = -EIO;
1079 goto err;
1080 }
1081 } else {
1082 ctx->req->dlen = idxd_desc->iax_completion->output_size;
1083 }
1084
1085 /* Update stats */
1086 if (ctx->compress) {
1087 update_total_comp_bytes_out(ctx->req->dlen);
1088 update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1089 } else {
1090 update_total_decomp_bytes_in(ctx->req->slen);
1091 update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen);
1092 }
1093
1094 if (ctx->compress && compression_ctx->verify_compress) {
1095 dma_addr_t src_addr, dst_addr;
1096 u32 compression_crc;
1097
1098 compression_crc = idxd_desc->iax_completion->crc;
1099
1100 ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1101 if (ret) {
1102 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1103 err = -EIO;
1104 goto out;
1105 }
1106
1107 ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1108 ctx->req->slen, dst_addr, &ctx->req->dlen,
1109 compression_crc);
1110 if (ret) {
1111 dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1112 err = -EIO;
1113 }
1114
1115 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1116 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1117
1118 goto out;
1119 }
1120 err:
1121 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1122 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1123 out:
1124 if (ret != 0)
1125 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1126
1127 if (ctx->req->base.complete)
1128 acomp_request_complete(ctx->req, err);
1129
1130 if (free_desc)
1131 idxd_free_desc(idxd_desc->wq, idxd_desc);
1132 iaa_wq_put(idxd_desc->wq);
1133 }
1134
iaa_compress(struct crypto_tfm * tfm,struct acomp_req * req,struct idxd_wq * wq,dma_addr_t src_addr,unsigned int slen,dma_addr_t dst_addr,unsigned int * dlen,u32 * compression_crc,bool disable_async)1135 static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
1136 struct idxd_wq *wq,
1137 dma_addr_t src_addr, unsigned int slen,
1138 dma_addr_t dst_addr, unsigned int *dlen,
1139 u32 *compression_crc,
1140 bool disable_async)
1141 {
1142 struct iaa_device_compression_mode *active_compression_mode;
1143 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1144 struct iaa_device *iaa_device;
1145 struct idxd_desc *idxd_desc;
1146 struct iax_hw_desc *desc;
1147 struct idxd_device *idxd;
1148 struct iaa_wq *iaa_wq;
1149 struct pci_dev *pdev;
1150 struct device *dev;
1151 int ret = 0;
1152
1153 iaa_wq = idxd_wq_get_private(wq);
1154 iaa_device = iaa_wq->iaa_device;
1155 idxd = iaa_device->idxd;
1156 pdev = idxd->pdev;
1157 dev = &pdev->dev;
1158
1159 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1160
1161 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1162 if (IS_ERR(idxd_desc)) {
1163 dev_dbg(dev, "idxd descriptor allocation failed\n");
1164 dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1165 return PTR_ERR(idxd_desc);
1166 }
1167 desc = idxd_desc->iax_hw;
1168
1169 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1170 IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1171 desc->opcode = IAX_OPCODE_COMPRESS;
1172 desc->compr_flags = IAA_COMP_FLAGS;
1173 desc->priv = 0;
1174
1175 desc->src1_addr = (u64)src_addr;
1176 desc->src1_size = slen;
1177 desc->dst_addr = (u64)dst_addr;
1178 desc->max_dst_size = *dlen;
1179 desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1180 desc->src2_size = sizeof(struct aecs_comp_table_record);
1181 desc->completion_addr = idxd_desc->compl_dma;
1182
1183 if (ctx->use_irq && !disable_async) {
1184 desc->flags |= IDXD_OP_FLAG_RCI;
1185
1186 idxd_desc->crypto.req = req;
1187 idxd_desc->crypto.tfm = tfm;
1188 idxd_desc->crypto.src_addr = src_addr;
1189 idxd_desc->crypto.dst_addr = dst_addr;
1190 idxd_desc->crypto.compress = true;
1191
1192 dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1193 " src_addr %llx, dst_addr %llx\n", __func__,
1194 active_compression_mode->name,
1195 src_addr, dst_addr);
1196 } else if (ctx->async_mode && !disable_async)
1197 req->base.data = idxd_desc;
1198
1199 dev_dbg(dev, "%s: compression mode %s,"
1200 " desc->src1_addr %llx, desc->src1_size %d,"
1201 " desc->dst_addr %llx, desc->max_dst_size %d,"
1202 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1203 active_compression_mode->name,
1204 desc->src1_addr, desc->src1_size, desc->dst_addr,
1205 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1206
1207 ret = idxd_submit_desc(wq, idxd_desc);
1208 if (ret) {
1209 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1210 goto err;
1211 }
1212
1213 /* Update stats */
1214 update_total_comp_calls();
1215 update_wq_comp_calls(wq);
1216
1217 if (ctx->async_mode && !disable_async) {
1218 ret = -EINPROGRESS;
1219 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1220 goto out;
1221 }
1222
1223 ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1224 if (ret) {
1225 dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1226 goto err;
1227 }
1228
1229 *dlen = idxd_desc->iax_completion->output_size;
1230
1231 /* Update stats */
1232 update_total_comp_bytes_out(*dlen);
1233 update_wq_comp_bytes(wq, *dlen);
1234
1235 *compression_crc = idxd_desc->iax_completion->crc;
1236
1237 if (!ctx->async_mode || disable_async)
1238 idxd_free_desc(wq, idxd_desc);
1239 out:
1240 return ret;
1241 err:
1242 idxd_free_desc(wq, idxd_desc);
1243 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1244
1245 goto out;
1246 }
1247
iaa_remap_for_verify(struct device * dev,struct iaa_wq * iaa_wq,struct acomp_req * req,dma_addr_t * src_addr,dma_addr_t * dst_addr)1248 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1249 struct acomp_req *req,
1250 dma_addr_t *src_addr, dma_addr_t *dst_addr)
1251 {
1252 int ret = 0;
1253 int nr_sgs;
1254
1255 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1256 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1257
1258 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1259 if (nr_sgs <= 0 || nr_sgs > 1) {
1260 dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1261 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1262 iaa_wq->wq->id, ret);
1263 ret = -EIO;
1264 goto out;
1265 }
1266 *src_addr = sg_dma_address(req->src);
1267 dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1268 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1269 req->src, req->slen, sg_dma_len(req->src));
1270
1271 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1272 if (nr_sgs <= 0 || nr_sgs > 1) {
1273 dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1274 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1275 iaa_wq->wq->id, ret);
1276 ret = -EIO;
1277 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1278 goto out;
1279 }
1280 *dst_addr = sg_dma_address(req->dst);
1281 dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1282 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1283 req->dst, req->dlen, sg_dma_len(req->dst));
1284 out:
1285 return ret;
1286 }
1287
iaa_compress_verify(struct crypto_tfm * tfm,struct acomp_req * req,struct idxd_wq * wq,dma_addr_t src_addr,unsigned int slen,dma_addr_t dst_addr,unsigned int * dlen,u32 compression_crc)1288 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1289 struct idxd_wq *wq,
1290 dma_addr_t src_addr, unsigned int slen,
1291 dma_addr_t dst_addr, unsigned int *dlen,
1292 u32 compression_crc)
1293 {
1294 struct iaa_device_compression_mode *active_compression_mode;
1295 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1296 struct iaa_device *iaa_device;
1297 struct idxd_desc *idxd_desc;
1298 struct iax_hw_desc *desc;
1299 struct idxd_device *idxd;
1300 struct iaa_wq *iaa_wq;
1301 struct pci_dev *pdev;
1302 struct device *dev;
1303 int ret = 0;
1304
1305 iaa_wq = idxd_wq_get_private(wq);
1306 iaa_device = iaa_wq->iaa_device;
1307 idxd = iaa_device->idxd;
1308 pdev = idxd->pdev;
1309 dev = &pdev->dev;
1310
1311 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1312
1313 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1314 if (IS_ERR(idxd_desc)) {
1315 dev_dbg(dev, "idxd descriptor allocation failed\n");
1316 dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1317 PTR_ERR(idxd_desc));
1318 return PTR_ERR(idxd_desc);
1319 }
1320 desc = idxd_desc->iax_hw;
1321
1322 /* Verify (optional) - decompress and check crc, suppress dest write */
1323
1324 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1325 desc->opcode = IAX_OPCODE_DECOMPRESS;
1326 desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1327 desc->priv = 0;
1328
1329 desc->src1_addr = (u64)dst_addr;
1330 desc->src1_size = *dlen;
1331 desc->dst_addr = (u64)src_addr;
1332 desc->max_dst_size = slen;
1333 desc->completion_addr = idxd_desc->compl_dma;
1334
1335 dev_dbg(dev, "(verify) compression mode %s,"
1336 " desc->src1_addr %llx, desc->src1_size %d,"
1337 " desc->dst_addr %llx, desc->max_dst_size %d,"
1338 " desc->src2_addr %llx, desc->src2_size %d\n",
1339 active_compression_mode->name,
1340 desc->src1_addr, desc->src1_size, desc->dst_addr,
1341 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1342
1343 ret = idxd_submit_desc(wq, idxd_desc);
1344 if (ret) {
1345 dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1346 goto err;
1347 }
1348
1349 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1350 if (ret) {
1351 dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1352 goto err;
1353 }
1354
1355 if (compression_crc != idxd_desc->iax_completion->crc) {
1356 ret = -EINVAL;
1357 dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1358 " comp=0x%x, decomp=0x%x\n", compression_crc,
1359 idxd_desc->iax_completion->crc);
1360 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1361 8, 1, idxd_desc->iax_completion, 64, 0);
1362 goto err;
1363 }
1364
1365 idxd_free_desc(wq, idxd_desc);
1366 out:
1367 return ret;
1368 err:
1369 idxd_free_desc(wq, idxd_desc);
1370 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1371
1372 goto out;
1373 }
1374
iaa_decompress(struct crypto_tfm * tfm,struct acomp_req * req,struct idxd_wq * wq,dma_addr_t src_addr,unsigned int slen,dma_addr_t dst_addr,unsigned int * dlen,bool disable_async)1375 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1376 struct idxd_wq *wq,
1377 dma_addr_t src_addr, unsigned int slen,
1378 dma_addr_t dst_addr, unsigned int *dlen,
1379 bool disable_async)
1380 {
1381 struct iaa_device_compression_mode *active_compression_mode;
1382 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1383 struct iaa_device *iaa_device;
1384 struct idxd_desc *idxd_desc;
1385 struct iax_hw_desc *desc;
1386 struct idxd_device *idxd;
1387 struct iaa_wq *iaa_wq;
1388 struct pci_dev *pdev;
1389 struct device *dev;
1390 int ret = 0;
1391
1392 iaa_wq = idxd_wq_get_private(wq);
1393 iaa_device = iaa_wq->iaa_device;
1394 idxd = iaa_device->idxd;
1395 pdev = idxd->pdev;
1396 dev = &pdev->dev;
1397
1398 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1399
1400 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1401 if (IS_ERR(idxd_desc)) {
1402 dev_dbg(dev, "idxd descriptor allocation failed\n");
1403 dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1404 PTR_ERR(idxd_desc));
1405 return PTR_ERR(idxd_desc);
1406 }
1407 desc = idxd_desc->iax_hw;
1408
1409 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1410 desc->opcode = IAX_OPCODE_DECOMPRESS;
1411 desc->max_dst_size = PAGE_SIZE;
1412 desc->decompr_flags = IAA_DECOMP_FLAGS;
1413 desc->priv = 0;
1414
1415 desc->src1_addr = (u64)src_addr;
1416 desc->dst_addr = (u64)dst_addr;
1417 desc->max_dst_size = *dlen;
1418 desc->src1_size = slen;
1419 desc->completion_addr = idxd_desc->compl_dma;
1420
1421 if (ctx->use_irq && !disable_async) {
1422 desc->flags |= IDXD_OP_FLAG_RCI;
1423
1424 idxd_desc->crypto.req = req;
1425 idxd_desc->crypto.tfm = tfm;
1426 idxd_desc->crypto.src_addr = src_addr;
1427 idxd_desc->crypto.dst_addr = dst_addr;
1428 idxd_desc->crypto.compress = false;
1429
1430 dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1431 " src_addr %llx, dst_addr %llx\n", __func__,
1432 active_compression_mode->name,
1433 src_addr, dst_addr);
1434 } else if (ctx->async_mode && !disable_async)
1435 req->base.data = idxd_desc;
1436
1437 dev_dbg(dev, "%s: decompression mode %s,"
1438 " desc->src1_addr %llx, desc->src1_size %d,"
1439 " desc->dst_addr %llx, desc->max_dst_size %d,"
1440 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1441 active_compression_mode->name,
1442 desc->src1_addr, desc->src1_size, desc->dst_addr,
1443 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1444
1445 ret = idxd_submit_desc(wq, idxd_desc);
1446 if (ret) {
1447 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1448 goto err;
1449 }
1450
1451 /* Update stats */
1452 update_total_decomp_calls();
1453 update_wq_decomp_calls(wq);
1454
1455 if (ctx->async_mode && !disable_async) {
1456 ret = -EINPROGRESS;
1457 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1458 goto out;
1459 }
1460
1461 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1462 if (ret) {
1463 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1464 if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1465 pr_warn("%s: falling back to deflate-generic decompress, "
1466 "analytics error code %x\n", __func__,
1467 idxd_desc->iax_completion->error_code);
1468 ret = deflate_generic_decompress(req);
1469 if (ret) {
1470 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1471 __func__, ret);
1472 goto err;
1473 }
1474 } else {
1475 goto err;
1476 }
1477 } else {
1478 req->dlen = idxd_desc->iax_completion->output_size;
1479 }
1480
1481 *dlen = req->dlen;
1482
1483 if (!ctx->async_mode || disable_async)
1484 idxd_free_desc(wq, idxd_desc);
1485
1486 /* Update stats */
1487 update_total_decomp_bytes_in(slen);
1488 update_wq_decomp_bytes(wq, slen);
1489 out:
1490 return ret;
1491 err:
1492 idxd_free_desc(wq, idxd_desc);
1493 dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1494
1495 goto out;
1496 }
1497
iaa_comp_acompress(struct acomp_req * req)1498 static int iaa_comp_acompress(struct acomp_req *req)
1499 {
1500 struct iaa_compression_ctx *compression_ctx;
1501 struct crypto_tfm *tfm = req->base.tfm;
1502 dma_addr_t src_addr, dst_addr;
1503 bool disable_async = false;
1504 int nr_sgs, cpu, ret = 0;
1505 struct iaa_wq *iaa_wq;
1506 u32 compression_crc;
1507 struct idxd_wq *wq;
1508 struct device *dev;
1509 int order = -1;
1510
1511 compression_ctx = crypto_tfm_ctx(tfm);
1512
1513 if (!iaa_crypto_enabled) {
1514 pr_debug("iaa_crypto disabled, not compressing\n");
1515 return -ENODEV;
1516 }
1517
1518 if (!req->src || !req->slen) {
1519 pr_debug("invalid src, not compressing\n");
1520 return -EINVAL;
1521 }
1522
1523 cpu = get_cpu();
1524 wq = wq_table_next_wq(cpu);
1525 put_cpu();
1526 if (!wq) {
1527 pr_debug("no wq configured for cpu=%d\n", cpu);
1528 return -ENODEV;
1529 }
1530
1531 ret = iaa_wq_get(wq);
1532 if (ret) {
1533 pr_debug("no wq available for cpu=%d\n", cpu);
1534 return -ENODEV;
1535 }
1536
1537 iaa_wq = idxd_wq_get_private(wq);
1538
1539 if (!req->dst) {
1540 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1541
1542 /* incompressible data will always be < 2 * slen */
1543 req->dlen = 2 * req->slen;
1544 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1545 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1546 if (!req->dst) {
1547 ret = -ENOMEM;
1548 order = -1;
1549 goto out;
1550 }
1551 disable_async = true;
1552 }
1553
1554 dev = &wq->idxd->pdev->dev;
1555
1556 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1557 if (nr_sgs <= 0 || nr_sgs > 1) {
1558 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1559 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1560 iaa_wq->wq->id, ret);
1561 ret = -EIO;
1562 goto out;
1563 }
1564 src_addr = sg_dma_address(req->src);
1565 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1566 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1567 req->src, req->slen, sg_dma_len(req->src));
1568
1569 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1570 if (nr_sgs <= 0 || nr_sgs > 1) {
1571 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1572 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1573 iaa_wq->wq->id, ret);
1574 ret = -EIO;
1575 goto err_map_dst;
1576 }
1577 dst_addr = sg_dma_address(req->dst);
1578 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1579 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1580 req->dst, req->dlen, sg_dma_len(req->dst));
1581
1582 ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1583 &req->dlen, &compression_crc, disable_async);
1584 if (ret == -EINPROGRESS)
1585 return ret;
1586
1587 if (!ret && compression_ctx->verify_compress) {
1588 ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1589 if (ret) {
1590 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1591 goto out;
1592 }
1593
1594 ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1595 dst_addr, &req->dlen, compression_crc);
1596 if (ret)
1597 dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1598
1599 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1600 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1601
1602 goto out;
1603 }
1604
1605 if (ret)
1606 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1607
1608 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1609 err_map_dst:
1610 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1611 out:
1612 iaa_wq_put(wq);
1613
1614 if (order >= 0)
1615 sgl_free_order(req->dst, order);
1616
1617 return ret;
1618 }
1619
iaa_comp_adecompress_alloc_dest(struct acomp_req * req)1620 static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1621 {
1622 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1623 GFP_KERNEL : GFP_ATOMIC;
1624 struct crypto_tfm *tfm = req->base.tfm;
1625 dma_addr_t src_addr, dst_addr;
1626 int nr_sgs, cpu, ret = 0;
1627 struct iaa_wq *iaa_wq;
1628 struct device *dev;
1629 struct idxd_wq *wq;
1630 int order = -1;
1631
1632 cpu = get_cpu();
1633 wq = wq_table_next_wq(cpu);
1634 put_cpu();
1635 if (!wq) {
1636 pr_debug("no wq configured for cpu=%d\n", cpu);
1637 return -ENODEV;
1638 }
1639
1640 ret = iaa_wq_get(wq);
1641 if (ret) {
1642 pr_debug("no wq available for cpu=%d\n", cpu);
1643 return -ENODEV;
1644 }
1645
1646 iaa_wq = idxd_wq_get_private(wq);
1647
1648 dev = &wq->idxd->pdev->dev;
1649
1650 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1651 if (nr_sgs <= 0 || nr_sgs > 1) {
1652 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1653 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1654 iaa_wq->wq->id, ret);
1655 ret = -EIO;
1656 goto out;
1657 }
1658 src_addr = sg_dma_address(req->src);
1659 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1660 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1661 req->src, req->slen, sg_dma_len(req->src));
1662
1663 req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1664 alloc_dest:
1665 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1666 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1667 if (!req->dst) {
1668 ret = -ENOMEM;
1669 order = -1;
1670 goto out;
1671 }
1672
1673 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1674 if (nr_sgs <= 0 || nr_sgs > 1) {
1675 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1676 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1677 iaa_wq->wq->id, ret);
1678 ret = -EIO;
1679 goto err_map_dst;
1680 }
1681
1682 dst_addr = sg_dma_address(req->dst);
1683 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1684 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1685 req->dst, req->dlen, sg_dma_len(req->dst));
1686 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1687 dst_addr, &req->dlen, true);
1688 if (ret == -EOVERFLOW) {
1689 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1690 req->dlen *= 2;
1691 if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1692 goto err_map_dst;
1693 goto alloc_dest;
1694 }
1695
1696 if (ret != 0)
1697 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1698
1699 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1700 err_map_dst:
1701 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1702 out:
1703 iaa_wq_put(wq);
1704
1705 if (order >= 0)
1706 sgl_free_order(req->dst, order);
1707
1708 return ret;
1709 }
1710
iaa_comp_adecompress(struct acomp_req * req)1711 static int iaa_comp_adecompress(struct acomp_req *req)
1712 {
1713 struct crypto_tfm *tfm = req->base.tfm;
1714 dma_addr_t src_addr, dst_addr;
1715 int nr_sgs, cpu, ret = 0;
1716 struct iaa_wq *iaa_wq;
1717 struct device *dev;
1718 struct idxd_wq *wq;
1719
1720 if (!iaa_crypto_enabled) {
1721 pr_debug("iaa_crypto disabled, not decompressing\n");
1722 return -ENODEV;
1723 }
1724
1725 if (!req->src || !req->slen) {
1726 pr_debug("invalid src, not decompressing\n");
1727 return -EINVAL;
1728 }
1729
1730 if (!req->dst)
1731 return iaa_comp_adecompress_alloc_dest(req);
1732
1733 cpu = get_cpu();
1734 wq = wq_table_next_wq(cpu);
1735 put_cpu();
1736 if (!wq) {
1737 pr_debug("no wq configured for cpu=%d\n", cpu);
1738 return -ENODEV;
1739 }
1740
1741 ret = iaa_wq_get(wq);
1742 if (ret) {
1743 pr_debug("no wq available for cpu=%d\n", cpu);
1744 return -ENODEV;
1745 }
1746
1747 iaa_wq = idxd_wq_get_private(wq);
1748
1749 dev = &wq->idxd->pdev->dev;
1750
1751 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1752 if (nr_sgs <= 0 || nr_sgs > 1) {
1753 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1754 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1755 iaa_wq->wq->id, ret);
1756 ret = -EIO;
1757 goto out;
1758 }
1759 src_addr = sg_dma_address(req->src);
1760 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1761 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1762 req->src, req->slen, sg_dma_len(req->src));
1763
1764 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1765 if (nr_sgs <= 0 || nr_sgs > 1) {
1766 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1767 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1768 iaa_wq->wq->id, ret);
1769 ret = -EIO;
1770 goto err_map_dst;
1771 }
1772 dst_addr = sg_dma_address(req->dst);
1773 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1775 req->dst, req->dlen, sg_dma_len(req->dst));
1776
1777 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1778 dst_addr, &req->dlen, false);
1779 if (ret == -EINPROGRESS)
1780 return ret;
1781
1782 if (ret != 0)
1783 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1784
1785 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1786 err_map_dst:
1787 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1788 out:
1789 iaa_wq_put(wq);
1790
1791 return ret;
1792 }
1793
compression_ctx_init(struct iaa_compression_ctx * ctx)1794 static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1795 {
1796 ctx->verify_compress = iaa_verify_compress;
1797 ctx->async_mode = async_mode;
1798 ctx->use_irq = use_irq;
1799 }
1800
iaa_comp_init_fixed(struct crypto_acomp * acomp_tfm)1801 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1802 {
1803 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1804 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1805
1806 compression_ctx_init(ctx);
1807
1808 ctx->mode = IAA_MODE_FIXED;
1809
1810 return 0;
1811 }
1812
dst_free(struct scatterlist * sgl)1813 static void dst_free(struct scatterlist *sgl)
1814 {
1815 /*
1816 * Called for req->dst = NULL cases but we free elsewhere
1817 * using sgl_free_order().
1818 */
1819 }
1820
1821 static struct acomp_alg iaa_acomp_fixed_deflate = {
1822 .init = iaa_comp_init_fixed,
1823 .compress = iaa_comp_acompress,
1824 .decompress = iaa_comp_adecompress,
1825 .dst_free = dst_free,
1826 .base = {
1827 .cra_name = "deflate",
1828 .cra_driver_name = "deflate-iaa",
1829 .cra_flags = CRYPTO_ALG_ASYNC,
1830 .cra_ctxsize = sizeof(struct iaa_compression_ctx),
1831 .cra_module = THIS_MODULE,
1832 .cra_priority = IAA_ALG_PRIORITY,
1833 }
1834 };
1835
iaa_register_compression_device(void)1836 static int iaa_register_compression_device(void)
1837 {
1838 int ret;
1839
1840 ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1841 if (ret) {
1842 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1843 goto out;
1844 }
1845
1846 iaa_crypto_registered = true;
1847 out:
1848 return ret;
1849 }
1850
iaa_unregister_compression_device(void)1851 static int iaa_unregister_compression_device(void)
1852 {
1853 if (iaa_crypto_registered)
1854 crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1855
1856 return 0;
1857 }
1858
iaa_crypto_probe(struct idxd_dev * idxd_dev)1859 static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1860 {
1861 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1862 struct idxd_device *idxd = wq->idxd;
1863 struct idxd_driver_data *data = idxd->data;
1864 struct device *dev = &idxd_dev->conf_dev;
1865 bool first_wq = false;
1866 int ret = 0;
1867
1868 if (idxd->state != IDXD_DEV_ENABLED)
1869 return -ENXIO;
1870
1871 if (data->type != IDXD_TYPE_IAX)
1872 return -ENODEV;
1873
1874 mutex_lock(&wq->wq_lock);
1875
1876 if (idxd_wq_get_private(wq)) {
1877 mutex_unlock(&wq->wq_lock);
1878 return -EBUSY;
1879 }
1880
1881 if (!idxd_wq_driver_name_match(wq, dev)) {
1882 dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1883 idxd->id, wq->id, wq->driver_name, dev->driver->name);
1884 idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1885 ret = -ENODEV;
1886 goto err;
1887 }
1888
1889 wq->type = IDXD_WQT_KERNEL;
1890
1891 ret = idxd_drv_enable_wq(wq);
1892 if (ret < 0) {
1893 dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1894 idxd->id, wq->id, ret);
1895 ret = -ENXIO;
1896 goto err;
1897 }
1898
1899 mutex_lock(&iaa_devices_lock);
1900
1901 if (list_empty(&iaa_devices)) {
1902 ret = alloc_wq_table(wq->idxd->max_wqs);
1903 if (ret)
1904 goto err_alloc;
1905 first_wq = true;
1906 }
1907
1908 ret = save_iaa_wq(wq);
1909 if (ret)
1910 goto err_save;
1911
1912 rebalance_wq_table();
1913
1914 if (first_wq) {
1915 iaa_crypto_enabled = true;
1916 ret = iaa_register_compression_device();
1917 if (ret != 0) {
1918 iaa_crypto_enabled = false;
1919 dev_dbg(dev, "IAA compression device registration failed\n");
1920 goto err_register;
1921 }
1922 try_module_get(THIS_MODULE);
1923
1924 pr_info("iaa_crypto now ENABLED\n");
1925 }
1926
1927 mutex_unlock(&iaa_devices_lock);
1928 out:
1929 mutex_unlock(&wq->wq_lock);
1930
1931 return ret;
1932
1933 err_register:
1934 remove_iaa_wq(wq);
1935 free_iaa_wq(idxd_wq_get_private(wq));
1936 err_save:
1937 if (first_wq)
1938 free_wq_table();
1939 err_alloc:
1940 mutex_unlock(&iaa_devices_lock);
1941 idxd_drv_disable_wq(wq);
1942 err:
1943 wq->type = IDXD_WQT_NONE;
1944
1945 goto out;
1946 }
1947
iaa_crypto_remove(struct idxd_dev * idxd_dev)1948 static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1949 {
1950 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1951 struct idxd_device *idxd = wq->idxd;
1952 struct iaa_wq *iaa_wq;
1953 bool free = false;
1954
1955 idxd_wq_quiesce(wq);
1956
1957 mutex_lock(&wq->wq_lock);
1958 mutex_lock(&iaa_devices_lock);
1959
1960 remove_iaa_wq(wq);
1961
1962 spin_lock(&idxd->dev_lock);
1963 iaa_wq = idxd_wq_get_private(wq);
1964 if (!iaa_wq) {
1965 spin_unlock(&idxd->dev_lock);
1966 pr_err("%s: no iaa_wq available to remove\n", __func__);
1967 goto out;
1968 }
1969
1970 if (iaa_wq->ref) {
1971 iaa_wq->remove = true;
1972 } else {
1973 wq = iaa_wq->wq;
1974 idxd_wq_set_private(wq, NULL);
1975 free = true;
1976 }
1977 spin_unlock(&idxd->dev_lock);
1978 if (free) {
1979 __free_iaa_wq(iaa_wq);
1980 kfree(iaa_wq);
1981 }
1982
1983 idxd_drv_disable_wq(wq);
1984 rebalance_wq_table();
1985
1986 if (nr_iaa == 0) {
1987 iaa_crypto_enabled = false;
1988 free_wq_table();
1989 module_put(THIS_MODULE);
1990
1991 pr_info("iaa_crypto now DISABLED\n");
1992 }
1993 out:
1994 mutex_unlock(&iaa_devices_lock);
1995 mutex_unlock(&wq->wq_lock);
1996 }
1997
1998 static enum idxd_dev_type dev_types[] = {
1999 IDXD_DEV_WQ,
2000 IDXD_DEV_NONE,
2001 };
2002
2003 static struct idxd_device_driver iaa_crypto_driver = {
2004 .probe = iaa_crypto_probe,
2005 .remove = iaa_crypto_remove,
2006 .name = IDXD_SUBDRIVER_NAME,
2007 .type = dev_types,
2008 .desc_complete = iaa_desc_complete,
2009 };
2010
iaa_crypto_init_module(void)2011 static int __init iaa_crypto_init_module(void)
2012 {
2013 int ret = 0;
2014 int node;
2015
2016 nr_cpus = num_possible_cpus();
2017 for_each_node_with_cpus(node)
2018 nr_nodes++;
2019 if (!nr_nodes) {
2020 pr_err("IAA couldn't find any nodes with cpus\n");
2021 return -ENODEV;
2022 }
2023 nr_cpus_per_node = nr_cpus / nr_nodes;
2024
2025 if (crypto_has_comp("deflate-generic", 0, 0))
2026 deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2027
2028 if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2029 pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2030 "deflate-generic", PTR_ERR(deflate_generic_tfm));
2031 return -ENOMEM;
2032 }
2033
2034 ret = iaa_aecs_init_fixed();
2035 if (ret < 0) {
2036 pr_debug("IAA fixed compression mode init failed\n");
2037 goto err_aecs_init;
2038 }
2039
2040 ret = idxd_driver_register(&iaa_crypto_driver);
2041 if (ret) {
2042 pr_debug("IAA wq sub-driver registration failed\n");
2043 goto err_driver_reg;
2044 }
2045
2046 ret = driver_create_file(&iaa_crypto_driver.drv,
2047 &driver_attr_verify_compress);
2048 if (ret) {
2049 pr_debug("IAA verify_compress attr creation failed\n");
2050 goto err_verify_attr_create;
2051 }
2052
2053 ret = driver_create_file(&iaa_crypto_driver.drv,
2054 &driver_attr_sync_mode);
2055 if (ret) {
2056 pr_debug("IAA sync mode attr creation failed\n");
2057 goto err_sync_attr_create;
2058 }
2059
2060 if (iaa_crypto_debugfs_init())
2061 pr_warn("debugfs init failed, stats not available\n");
2062
2063 pr_debug("initialized\n");
2064 out:
2065 return ret;
2066
2067 err_sync_attr_create:
2068 driver_remove_file(&iaa_crypto_driver.drv,
2069 &driver_attr_verify_compress);
2070 err_verify_attr_create:
2071 idxd_driver_unregister(&iaa_crypto_driver);
2072 err_driver_reg:
2073 iaa_aecs_cleanup_fixed();
2074 err_aecs_init:
2075 crypto_free_comp(deflate_generic_tfm);
2076
2077 goto out;
2078 }
2079
iaa_crypto_cleanup_module(void)2080 static void __exit iaa_crypto_cleanup_module(void)
2081 {
2082 if (iaa_unregister_compression_device())
2083 pr_debug("IAA compression device unregister failed\n");
2084
2085 iaa_crypto_debugfs_cleanup();
2086 driver_remove_file(&iaa_crypto_driver.drv,
2087 &driver_attr_sync_mode);
2088 driver_remove_file(&iaa_crypto_driver.drv,
2089 &driver_attr_verify_compress);
2090 idxd_driver_unregister(&iaa_crypto_driver);
2091 iaa_aecs_cleanup_fixed();
2092 crypto_free_comp(deflate_generic_tfm);
2093
2094 pr_debug("cleaned up\n");
2095 }
2096
2097 MODULE_IMPORT_NS("IDXD");
2098 MODULE_LICENSE("GPL");
2099 MODULE_ALIAS_IDXD_DEVICE(0);
2100 MODULE_AUTHOR("Intel Corporation");
2101 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2102
2103 module_init(iaa_crypto_init_module);
2104 module_exit(iaa_crypto_cleanup_module);
2105