xref: /linux/drivers/crypto/intel/iaa/iaa_crypto_main.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3 
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
14 
15 #include "idxd.h"
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
18 
19 #ifdef pr_fmt
20 #undef pr_fmt
21 #endif
22 
23 #define pr_fmt(fmt)			"idxd: " IDXD_SUBDRIVER_NAME ": " fmt
24 
25 #define IAA_ALG_PRIORITY               300
26 
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa;
29 static unsigned int nr_cpus;
30 static unsigned int nr_nodes;
31 static unsigned int nr_cpus_per_node;
32 
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa;
35 
36 static struct crypto_comp *deflate_generic_tfm;
37 
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu *wq_table;
40 
41 static struct idxd_wq *wq_table_next_wq(int cpu)
42 {
43 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
44 
45 	if (++entry->cur_wq >= entry->n_wqs)
46 		entry->cur_wq = 0;
47 
48 	if (!entry->wqs[entry->cur_wq])
49 		return NULL;
50 
51 	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 		 entry->wqs[entry->cur_wq]->id, cpu);
54 
55 	return entry->wqs[entry->cur_wq];
56 }
57 
58 static void wq_table_add(int cpu, struct idxd_wq *wq)
59 {
60 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
61 
62 	if (WARN_ON(entry->n_wqs == entry->max_wqs))
63 		return;
64 
65 	entry->wqs[entry->n_wqs++] = wq;
66 
67 	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 		 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
70 }
71 
72 static void wq_table_free_entry(int cpu)
73 {
74 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
75 
76 	kfree(entry->wqs);
77 	memset(entry, 0, sizeof(*entry));
78 }
79 
80 static void wq_table_clear_entry(int cpu)
81 {
82 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
83 
84 	entry->n_wqs = 0;
85 	entry->cur_wq = 0;
86 	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
87 }
88 
89 LIST_HEAD(iaa_devices);
90 DEFINE_MUTEX(iaa_devices_lock);
91 
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled;
94 static bool iaa_crypto_registered;
95 
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress = true;
98 
99 static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
100 {
101 	return sprintf(buf, "%d\n", iaa_verify_compress);
102 }
103 
104 static ssize_t verify_compress_store(struct device_driver *driver,
105 				     const char *buf, size_t count)
106 {
107 	int ret = -EBUSY;
108 
109 	mutex_lock(&iaa_devices_lock);
110 
111 	if (iaa_crypto_enabled)
112 		goto out;
113 
114 	ret = kstrtobool(buf, &iaa_verify_compress);
115 	if (ret)
116 		goto out;
117 
118 	ret = count;
119 out:
120 	mutex_unlock(&iaa_devices_lock);
121 
122 	return ret;
123 }
124 static DRIVER_ATTR_RW(verify_compress);
125 
126 /*
127  * The iaa crypto driver supports three 'sync' methods determining how
128  * compressions and decompressions are performed:
129  *
130  * - sync:      the compression or decompression completes before
131  *              returning.  This is the mode used by the async crypto
132  *              interface when the sync mode is set to 'sync' and by
133  *              the sync crypto interface regardless of setting.
134  *
135  * - async:     the compression or decompression is submitted and returns
136  *              immediately.  Completion interrupts are not used so
137  *              the caller is responsible for polling the descriptor
138  *              for completion.  This mode is applicable to only the
139  *              async crypto interface and is ignored for anything
140  *              else.
141  *
142  * - async_irq: the compression or decompression is submitted and
143  *              returns immediately.  Completion interrupts are
144  *              enabled so the caller can wait for the completion and
145  *              yield to other threads.  When the compression or
146  *              decompression completes, the completion is signaled
147  *              and the caller awakened.  This mode is applicable to
148  *              only the async crypto interface and is ignored for
149  *              anything else.
150  *
151  * These modes can be set using the iaa_crypto sync_mode driver
152  * attribute.
153  */
154 
155 /* Use async mode */
156 static bool async_mode;
157 /* Use interrupts */
158 static bool use_irq;
159 
160 /**
161  * set_iaa_sync_mode - Set IAA sync mode
162  * @name: The name of the sync mode
163  *
164  * Make the IAA sync mode named @name the current sync mode used by
165  * compression/decompression.
166  */
167 
168 static int set_iaa_sync_mode(const char *name)
169 {
170 	int ret = 0;
171 
172 	if (sysfs_streq(name, "sync")) {
173 		async_mode = false;
174 		use_irq = false;
175 	} else if (sysfs_streq(name, "async")) {
176 		async_mode = true;
177 		use_irq = false;
178 	} else if (sysfs_streq(name, "async_irq")) {
179 		async_mode = true;
180 		use_irq = true;
181 	} else {
182 		ret = -EINVAL;
183 	}
184 
185 	return ret;
186 }
187 
188 static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
189 {
190 	int ret = 0;
191 
192 	if (!async_mode && !use_irq)
193 		ret = sprintf(buf, "%s\n", "sync");
194 	else if (async_mode && !use_irq)
195 		ret = sprintf(buf, "%s\n", "async");
196 	else if (async_mode && use_irq)
197 		ret = sprintf(buf, "%s\n", "async_irq");
198 
199 	return ret;
200 }
201 
202 static ssize_t sync_mode_store(struct device_driver *driver,
203 			       const char *buf, size_t count)
204 {
205 	int ret = -EBUSY;
206 
207 	mutex_lock(&iaa_devices_lock);
208 
209 	if (iaa_crypto_enabled)
210 		goto out;
211 
212 	ret = set_iaa_sync_mode(buf);
213 	if (ret == 0)
214 		ret = count;
215 out:
216 	mutex_unlock(&iaa_devices_lock);
217 
218 	return ret;
219 }
220 static DRIVER_ATTR_RW(sync_mode);
221 
222 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
223 
224 static int find_empty_iaa_compression_mode(void)
225 {
226 	int i = -EINVAL;
227 
228 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 		if (iaa_compression_modes[i])
230 			continue;
231 		break;
232 	}
233 
234 	return i;
235 }
236 
237 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
238 {
239 	struct iaa_compression_mode *mode;
240 	int i;
241 
242 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 		mode = iaa_compression_modes[i];
244 		if (!mode)
245 			continue;
246 
247 		if (!strcmp(mode->name, name)) {
248 			*idx = i;
249 			return iaa_compression_modes[i];
250 		}
251 	}
252 
253 	return NULL;
254 }
255 
256 static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
257 {
258 	kfree(mode->name);
259 	kfree(mode->ll_table);
260 	kfree(mode->d_table);
261 
262 	kfree(mode);
263 }
264 
265 /*
266  * IAA Compression modes are defined by an ll_table and a d_table.
267  * These tables are typically generated and captured using statistics
268  * collected from running actual compress/decompress workloads.
269  *
270  * A module or other kernel code can add and remove compression modes
271  * with a given name using the exported @add_iaa_compression_mode()
272  * and @remove_iaa_compression_mode functions.
273  *
274  * When a new compression mode is added, the tables are saved in a
275  * global compression mode list.  When IAA devices are added, a
276  * per-IAA device dma mapping is created for each IAA device, for each
277  * compression mode.  These are the tables used to do the actual
278  * compression/deccompression and are unmapped if/when the devices are
279  * removed.  Currently, compression modes must be added before any
280  * device is added, and removed after all devices have been removed.
281  */
282 
283 /**
284  * remove_iaa_compression_mode - Remove an IAA compression mode
285  * @name: The name the compression mode will be known as
286  *
287  * Remove the IAA compression mode named @name.
288  */
289 void remove_iaa_compression_mode(const char *name)
290 {
291 	struct iaa_compression_mode *mode;
292 	int idx;
293 
294 	mutex_lock(&iaa_devices_lock);
295 
296 	if (!list_empty(&iaa_devices))
297 		goto out;
298 
299 	mode = find_iaa_compression_mode(name, &idx);
300 	if (mode) {
301 		free_iaa_compression_mode(mode);
302 		iaa_compression_modes[idx] = NULL;
303 	}
304 out:
305 	mutex_unlock(&iaa_devices_lock);
306 }
307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
308 
309 /**
310  * add_iaa_compression_mode - Add an IAA compression mode
311  * @name: The name the compression mode will be known as
312  * @ll_table: The ll table
313  * @ll_table_size: The ll table size in bytes
314  * @d_table: The d table
315  * @d_table_size: The d table size in bytes
316  * @init: Optional callback function to init the compression mode data
317  * @free: Optional callback function to free the compression mode data
318  *
319  * Add a new IAA compression mode named @name.
320  *
321  * Returns 0 if successful, errcode otherwise.
322  */
323 int add_iaa_compression_mode(const char *name,
324 			     const u32 *ll_table,
325 			     int ll_table_size,
326 			     const u32 *d_table,
327 			     int d_table_size,
328 			     iaa_dev_comp_init_fn_t init,
329 			     iaa_dev_comp_free_fn_t free)
330 {
331 	struct iaa_compression_mode *mode;
332 	int idx, ret = -ENOMEM;
333 
334 	mutex_lock(&iaa_devices_lock);
335 
336 	if (!list_empty(&iaa_devices)) {
337 		ret = -EBUSY;
338 		goto out;
339 	}
340 
341 	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
342 	if (!mode)
343 		goto out;
344 
345 	mode->name = kstrdup(name, GFP_KERNEL);
346 	if (!mode->name)
347 		goto free;
348 
349 	if (ll_table) {
350 		mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL);
351 		if (!mode->ll_table)
352 			goto free;
353 		mode->ll_table_size = ll_table_size;
354 	}
355 
356 	if (d_table) {
357 		mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL);
358 		if (!mode->d_table)
359 			goto free;
360 		mode->d_table_size = d_table_size;
361 	}
362 
363 	mode->init = init;
364 	mode->free = free;
365 
366 	idx = find_empty_iaa_compression_mode();
367 	if (idx < 0)
368 		goto free;
369 
370 	pr_debug("IAA compression mode %s added at idx %d\n",
371 		 mode->name, idx);
372 
373 	iaa_compression_modes[idx] = mode;
374 
375 	ret = 0;
376 out:
377 	mutex_unlock(&iaa_devices_lock);
378 
379 	return ret;
380 free:
381 	free_iaa_compression_mode(mode);
382 	goto out;
383 }
384 EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
385 
386 static struct iaa_device_compression_mode *
387 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
388 {
389 	return iaa_device->compression_modes[idx];
390 }
391 
392 static void free_device_compression_mode(struct iaa_device *iaa_device,
393 					 struct iaa_device_compression_mode *device_mode)
394 {
395 	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
396 	struct device *dev = &iaa_device->idxd->pdev->dev;
397 
398 	kfree(device_mode->name);
399 
400 	if (device_mode->aecs_comp_table)
401 		dma_free_coherent(dev, size, device_mode->aecs_comp_table,
402 				  device_mode->aecs_comp_table_dma_addr);
403 	kfree(device_mode);
404 }
405 
406 #define IDXD_OP_FLAG_AECS_RW_TGLS       0x400000
407 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
408 #define IAX_AECS_COMPRESS_FLAG	(IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
409 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
410 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
411 						IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
412 						IDXD_OP_FLAG_AECS_RW_TGLS)
413 
414 static int check_completion(struct device *dev,
415 			    struct iax_completion_record *comp,
416 			    bool compress,
417 			    bool only_once);
418 
419 static int init_device_compression_mode(struct iaa_device *iaa_device,
420 					struct iaa_compression_mode *mode,
421 					int idx, struct idxd_wq *wq)
422 {
423 	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
424 	struct device *dev = &iaa_device->idxd->pdev->dev;
425 	struct iaa_device_compression_mode *device_mode;
426 	int ret = -ENOMEM;
427 
428 	device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
429 	if (!device_mode)
430 		return -ENOMEM;
431 
432 	device_mode->name = kstrdup(mode->name, GFP_KERNEL);
433 	if (!device_mode->name)
434 		goto free;
435 
436 	device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
437 							  &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
438 	if (!device_mode->aecs_comp_table)
439 		goto free;
440 
441 	/* Add Huffman table to aecs */
442 	memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
443 	memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
444 	memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
445 
446 	if (mode->init) {
447 		ret = mode->init(device_mode);
448 		if (ret)
449 			goto free;
450 	}
451 
452 	/* mode index should match iaa_compression_modes idx */
453 	iaa_device->compression_modes[idx] = device_mode;
454 
455 	pr_debug("IAA %s compression mode initialized for iaa device %d\n",
456 		 mode->name, iaa_device->idxd->id);
457 
458 	ret = 0;
459 out:
460 	return ret;
461 free:
462 	pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
463 		 mode->name, iaa_device->idxd->id);
464 
465 	free_device_compression_mode(iaa_device, device_mode);
466 	goto out;
467 }
468 
469 static int init_device_compression_modes(struct iaa_device *iaa_device,
470 					 struct idxd_wq *wq)
471 {
472 	struct iaa_compression_mode *mode;
473 	int i, ret = 0;
474 
475 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
476 		mode = iaa_compression_modes[i];
477 		if (!mode)
478 			continue;
479 
480 		ret = init_device_compression_mode(iaa_device, mode, i, wq);
481 		if (ret)
482 			break;
483 	}
484 
485 	return ret;
486 }
487 
488 static void remove_device_compression_modes(struct iaa_device *iaa_device)
489 {
490 	struct iaa_device_compression_mode *device_mode;
491 	int i;
492 
493 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
494 		device_mode = iaa_device->compression_modes[i];
495 		if (!device_mode)
496 			continue;
497 
498 		if (iaa_compression_modes[i]->free)
499 			iaa_compression_modes[i]->free(device_mode);
500 		free_device_compression_mode(iaa_device, device_mode);
501 		iaa_device->compression_modes[i] = NULL;
502 	}
503 }
504 
505 static struct iaa_device *iaa_device_alloc(void)
506 {
507 	struct iaa_device *iaa_device;
508 
509 	iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
510 	if (!iaa_device)
511 		return NULL;
512 
513 	INIT_LIST_HEAD(&iaa_device->wqs);
514 
515 	return iaa_device;
516 }
517 
518 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
519 {
520 	struct iaa_wq *iaa_wq;
521 
522 	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
523 		if (iaa_wq->wq == wq)
524 			return true;
525 	}
526 
527 	return false;
528 }
529 
530 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
531 {
532 	struct iaa_device *iaa_device;
533 
534 	iaa_device = iaa_device_alloc();
535 	if (!iaa_device)
536 		return NULL;
537 
538 	iaa_device->idxd = idxd;
539 
540 	list_add_tail(&iaa_device->list, &iaa_devices);
541 
542 	nr_iaa++;
543 
544 	return iaa_device;
545 }
546 
547 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
548 {
549 	int ret = 0;
550 
551 	ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
552 	if (ret)
553 		return ret;
554 
555 	return ret;
556 }
557 
558 static void del_iaa_device(struct iaa_device *iaa_device)
559 {
560 	list_del(&iaa_device->list);
561 
562 	nr_iaa--;
563 }
564 
565 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
566 		      struct iaa_wq **new_wq)
567 {
568 	struct idxd_device *idxd = iaa_device->idxd;
569 	struct pci_dev *pdev = idxd->pdev;
570 	struct device *dev = &pdev->dev;
571 	struct iaa_wq *iaa_wq;
572 
573 	iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
574 	if (!iaa_wq)
575 		return -ENOMEM;
576 
577 	iaa_wq->wq = wq;
578 	iaa_wq->iaa_device = iaa_device;
579 	idxd_wq_set_private(wq, iaa_wq);
580 
581 	list_add_tail(&iaa_wq->list, &iaa_device->wqs);
582 
583 	iaa_device->n_wq++;
584 
585 	if (new_wq)
586 		*new_wq = iaa_wq;
587 
588 	dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
589 		wq->id, iaa_device->idxd->id, iaa_device->n_wq);
590 
591 	return 0;
592 }
593 
594 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
595 {
596 	struct idxd_device *idxd = iaa_device->idxd;
597 	struct pci_dev *pdev = idxd->pdev;
598 	struct device *dev = &pdev->dev;
599 	struct iaa_wq *iaa_wq;
600 
601 	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
602 		if (iaa_wq->wq == wq) {
603 			list_del(&iaa_wq->list);
604 			iaa_device->n_wq--;
605 
606 			dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
607 				wq->id, iaa_device->idxd->id,
608 				iaa_device->n_wq, nr_iaa);
609 
610 			if (iaa_device->n_wq == 0)
611 				del_iaa_device(iaa_device);
612 			break;
613 		}
614 	}
615 }
616 
617 static void clear_wq_table(void)
618 {
619 	int cpu;
620 
621 	for (cpu = 0; cpu < nr_cpus; cpu++)
622 		wq_table_clear_entry(cpu);
623 
624 	pr_debug("cleared wq table\n");
625 }
626 
627 static void free_iaa_device(struct iaa_device *iaa_device)
628 {
629 	if (!iaa_device)
630 		return;
631 
632 	remove_device_compression_modes(iaa_device);
633 	kfree(iaa_device);
634 }
635 
636 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
637 {
638 	struct iaa_device *iaa_device;
639 
640 	if (!iaa_wq)
641 		return;
642 
643 	iaa_device = iaa_wq->iaa_device;
644 	if (iaa_device->n_wq == 0)
645 		free_iaa_device(iaa_wq->iaa_device);
646 }
647 
648 static void free_iaa_wq(struct iaa_wq *iaa_wq)
649 {
650 	struct idxd_wq *wq;
651 
652 	__free_iaa_wq(iaa_wq);
653 
654 	wq = iaa_wq->wq;
655 
656 	kfree(iaa_wq);
657 	idxd_wq_set_private(wq, NULL);
658 }
659 
660 static int iaa_wq_get(struct idxd_wq *wq)
661 {
662 	struct idxd_device *idxd = wq->idxd;
663 	struct iaa_wq *iaa_wq;
664 	int ret = 0;
665 
666 	spin_lock(&idxd->dev_lock);
667 	iaa_wq = idxd_wq_get_private(wq);
668 	if (iaa_wq && !iaa_wq->remove) {
669 		iaa_wq->ref++;
670 		idxd_wq_get(wq);
671 	} else {
672 		ret = -ENODEV;
673 	}
674 	spin_unlock(&idxd->dev_lock);
675 
676 	return ret;
677 }
678 
679 static int iaa_wq_put(struct idxd_wq *wq)
680 {
681 	struct idxd_device *idxd = wq->idxd;
682 	struct iaa_wq *iaa_wq;
683 	bool free = false;
684 	int ret = 0;
685 
686 	spin_lock(&idxd->dev_lock);
687 	iaa_wq = idxd_wq_get_private(wq);
688 	if (iaa_wq) {
689 		iaa_wq->ref--;
690 		if (iaa_wq->ref == 0 && iaa_wq->remove) {
691 			idxd_wq_set_private(wq, NULL);
692 			free = true;
693 		}
694 		idxd_wq_put(wq);
695 	} else {
696 		ret = -ENODEV;
697 	}
698 	spin_unlock(&idxd->dev_lock);
699 	if (free) {
700 		__free_iaa_wq(iaa_wq);
701 		kfree(iaa_wq);
702 	}
703 
704 	return ret;
705 }
706 
707 static void free_wq_table(void)
708 {
709 	int cpu;
710 
711 	for (cpu = 0; cpu < nr_cpus; cpu++)
712 		wq_table_free_entry(cpu);
713 
714 	free_percpu(wq_table);
715 
716 	pr_debug("freed wq table\n");
717 }
718 
719 static int alloc_wq_table(int max_wqs)
720 {
721 	struct wq_table_entry *entry;
722 	int cpu;
723 
724 	wq_table = alloc_percpu(struct wq_table_entry);
725 	if (!wq_table)
726 		return -ENOMEM;
727 
728 	for (cpu = 0; cpu < nr_cpus; cpu++) {
729 		entry = per_cpu_ptr(wq_table, cpu);
730 		entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
731 		if (!entry->wqs) {
732 			free_wq_table();
733 			return -ENOMEM;
734 		}
735 
736 		entry->max_wqs = max_wqs;
737 	}
738 
739 	pr_debug("initialized wq table\n");
740 
741 	return 0;
742 }
743 
744 static int save_iaa_wq(struct idxd_wq *wq)
745 {
746 	struct iaa_device *iaa_device, *found = NULL;
747 	struct idxd_device *idxd;
748 	struct pci_dev *pdev;
749 	struct device *dev;
750 	int ret = 0;
751 
752 	list_for_each_entry(iaa_device, &iaa_devices, list) {
753 		if (iaa_device->idxd == wq->idxd) {
754 			idxd = iaa_device->idxd;
755 			pdev = idxd->pdev;
756 			dev = &pdev->dev;
757 			/*
758 			 * Check to see that we don't already have this wq.
759 			 * Shouldn't happen but we don't control probing.
760 			 */
761 			if (iaa_has_wq(iaa_device, wq)) {
762 				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
763 					iaa_device);
764 				goto out;
765 			}
766 
767 			found = iaa_device;
768 
769 			ret = add_iaa_wq(iaa_device, wq, NULL);
770 			if (ret)
771 				goto out;
772 
773 			break;
774 		}
775 	}
776 
777 	if (!found) {
778 		struct iaa_device *new_device;
779 		struct iaa_wq *new_wq;
780 
781 		new_device = add_iaa_device(wq->idxd);
782 		if (!new_device) {
783 			ret = -ENOMEM;
784 			goto out;
785 		}
786 
787 		ret = add_iaa_wq(new_device, wq, &new_wq);
788 		if (ret) {
789 			del_iaa_device(new_device);
790 			free_iaa_device(new_device);
791 			goto out;
792 		}
793 
794 		ret = init_iaa_device(new_device, new_wq);
795 		if (ret) {
796 			del_iaa_wq(new_device, new_wq->wq);
797 			del_iaa_device(new_device);
798 			free_iaa_wq(new_wq);
799 			goto out;
800 		}
801 	}
802 
803 	if (WARN_ON(nr_iaa == 0))
804 		return -EINVAL;
805 
806 	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
807 	if (!cpus_per_iaa)
808 		cpus_per_iaa = 1;
809 out:
810 	return 0;
811 }
812 
813 static void remove_iaa_wq(struct idxd_wq *wq)
814 {
815 	struct iaa_device *iaa_device;
816 
817 	list_for_each_entry(iaa_device, &iaa_devices, list) {
818 		if (iaa_has_wq(iaa_device, wq)) {
819 			del_iaa_wq(iaa_device, wq);
820 			break;
821 		}
822 	}
823 
824 	if (nr_iaa) {
825 		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
826 		if (!cpus_per_iaa)
827 			cpus_per_iaa = 1;
828 	} else
829 		cpus_per_iaa = 1;
830 }
831 
832 static int wq_table_add_wqs(int iaa, int cpu)
833 {
834 	struct iaa_device *iaa_device, *found_device = NULL;
835 	int ret = 0, cur_iaa = 0, n_wqs_added = 0;
836 	struct idxd_device *idxd;
837 	struct iaa_wq *iaa_wq;
838 	struct pci_dev *pdev;
839 	struct device *dev;
840 
841 	list_for_each_entry(iaa_device, &iaa_devices, list) {
842 		idxd = iaa_device->idxd;
843 		pdev = idxd->pdev;
844 		dev = &pdev->dev;
845 
846 		if (cur_iaa != iaa) {
847 			cur_iaa++;
848 			continue;
849 		}
850 
851 		found_device = iaa_device;
852 		dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
853 			found_device->idxd->id, cur_iaa);
854 		break;
855 	}
856 
857 	if (!found_device) {
858 		found_device = list_first_entry_or_null(&iaa_devices,
859 							struct iaa_device, list);
860 		if (!found_device) {
861 			pr_debug("couldn't find any iaa devices with wqs!\n");
862 			ret = -EINVAL;
863 			goto out;
864 		}
865 		cur_iaa = 0;
866 
867 		idxd = found_device->idxd;
868 		pdev = idxd->pdev;
869 		dev = &pdev->dev;
870 		dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
871 			found_device->idxd->id, cur_iaa);
872 	}
873 
874 	list_for_each_entry(iaa_wq, &found_device->wqs, list) {
875 		wq_table_add(cpu, iaa_wq->wq);
876 		pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
877 			 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
878 		n_wqs_added++;
879 	}
880 
881 	if (!n_wqs_added) {
882 		pr_debug("couldn't find any iaa wqs!\n");
883 		ret = -EINVAL;
884 		goto out;
885 	}
886 out:
887 	return ret;
888 }
889 
890 /*
891  * Rebalance the wq table so that given a cpu, it's easy to find the
892  * closest IAA instance.  The idea is to try to choose the most
893  * appropriate IAA instance for a caller and spread available
894  * workqueues around to clients.
895  */
896 static void rebalance_wq_table(void)
897 {
898 	const struct cpumask *node_cpus;
899 	int node, cpu, iaa = -1;
900 
901 	if (nr_iaa == 0)
902 		return;
903 
904 	pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
905 		 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
906 
907 	clear_wq_table();
908 
909 	if (nr_iaa == 1) {
910 		for (cpu = 0; cpu < nr_cpus; cpu++) {
911 			if (WARN_ON(wq_table_add_wqs(0, cpu))) {
912 				pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
913 				return;
914 			}
915 		}
916 
917 		return;
918 	}
919 
920 	for_each_node_with_cpus(node) {
921 		node_cpus = cpumask_of_node(node);
922 
923 		for (cpu = 0; cpu <  cpumask_weight(node_cpus); cpu++) {
924 			int node_cpu = cpumask_nth(cpu, node_cpus);
925 
926 			if (WARN_ON(node_cpu >= nr_cpu_ids)) {
927 				pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
928 				return;
929 			}
930 
931 			if ((cpu % cpus_per_iaa) == 0)
932 				iaa++;
933 
934 			if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
935 				pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
936 				return;
937 			}
938 		}
939 	}
940 }
941 
942 static inline int check_completion(struct device *dev,
943 				   struct iax_completion_record *comp,
944 				   bool compress,
945 				   bool only_once)
946 {
947 	char *op_str = compress ? "compress" : "decompress";
948 	int status_checks = 0;
949 	int ret = 0;
950 
951 	while (!comp->status) {
952 		if (only_once)
953 			return -EAGAIN;
954 		cpu_relax();
955 		if (status_checks++ >= IAA_COMPLETION_TIMEOUT) {
956 			/* Something is wrong with the hw, disable it. */
957 			dev_err(dev, "%s completion timed out - "
958 				"assuming broken hw, iaa_crypto now DISABLED\n",
959 				op_str);
960 			iaa_crypto_enabled = false;
961 			ret = -ETIMEDOUT;
962 			goto out;
963 		}
964 	}
965 
966 	if (comp->status != IAX_COMP_SUCCESS) {
967 		if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
968 			ret = -ETIMEDOUT;
969 			dev_dbg(dev, "%s timed out, size=0x%x\n",
970 				op_str, comp->output_size);
971 			update_completion_timeout_errs();
972 			goto out;
973 		}
974 
975 		if (comp->status == IAA_ANALYTICS_ERROR &&
976 		    comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
977 			ret = -E2BIG;
978 			dev_dbg(dev, "compressed > uncompressed size,"
979 				" not compressing, size=0x%x\n",
980 				comp->output_size);
981 			update_completion_comp_buf_overflow_errs();
982 			goto out;
983 		}
984 
985 		if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
986 			ret = -EOVERFLOW;
987 			goto out;
988 		}
989 
990 		ret = -EINVAL;
991 		dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
992 			op_str, comp->status, comp->error_code, comp->output_size);
993 		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
994 		update_completion_einval_errs();
995 
996 		goto out;
997 	}
998 out:
999 	return ret;
1000 }
1001 
1002 static int deflate_generic_decompress(struct acomp_req *req)
1003 {
1004 	void *src, *dst;
1005 	int ret;
1006 
1007 	src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1008 	dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1009 
1010 	ret = crypto_comp_decompress(deflate_generic_tfm,
1011 				     src, req->slen, dst, &req->dlen);
1012 
1013 	kunmap_local(src);
1014 	kunmap_local(dst);
1015 
1016 	update_total_sw_decomp_calls();
1017 
1018 	return ret;
1019 }
1020 
1021 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1022 				struct acomp_req *req,
1023 				dma_addr_t *src_addr, dma_addr_t *dst_addr);
1024 
1025 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1026 			       struct idxd_wq *wq,
1027 			       dma_addr_t src_addr, unsigned int slen,
1028 			       dma_addr_t dst_addr, unsigned int *dlen,
1029 			       u32 compression_crc);
1030 
1031 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1032 			      enum idxd_complete_type comp_type,
1033 			      bool free_desc, void *__ctx,
1034 			      u32 *status)
1035 {
1036 	struct iaa_device_compression_mode *active_compression_mode;
1037 	struct iaa_compression_ctx *compression_ctx;
1038 	struct crypto_ctx *ctx = __ctx;
1039 	struct iaa_device *iaa_device;
1040 	struct idxd_device *idxd;
1041 	struct iaa_wq *iaa_wq;
1042 	struct pci_dev *pdev;
1043 	struct device *dev;
1044 	int ret, err = 0;
1045 
1046 	compression_ctx = crypto_tfm_ctx(ctx->tfm);
1047 
1048 	iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1049 	iaa_device = iaa_wq->iaa_device;
1050 	idxd = iaa_device->idxd;
1051 	pdev = idxd->pdev;
1052 	dev = &pdev->dev;
1053 
1054 	active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1055 								  compression_ctx->mode);
1056 	dev_dbg(dev, "%s: compression mode %s,"
1057 		" ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1058 		active_compression_mode->name,
1059 		ctx->src_addr, ctx->dst_addr);
1060 
1061 	ret = check_completion(dev, idxd_desc->iax_completion,
1062 			       ctx->compress, false);
1063 	if (ret) {
1064 		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1065 		if (!ctx->compress &&
1066 		    idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1067 			pr_warn("%s: falling back to deflate-generic decompress, "
1068 				"analytics error code %x\n", __func__,
1069 				idxd_desc->iax_completion->error_code);
1070 			ret = deflate_generic_decompress(ctx->req);
1071 			if (ret) {
1072 				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1073 					__func__, ret);
1074 				err = -EIO;
1075 				goto err;
1076 			}
1077 		} else {
1078 			err = -EIO;
1079 			goto err;
1080 		}
1081 	} else {
1082 		ctx->req->dlen = idxd_desc->iax_completion->output_size;
1083 	}
1084 
1085 	/* Update stats */
1086 	if (ctx->compress) {
1087 		update_total_comp_bytes_out(ctx->req->dlen);
1088 		update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1089 	} else {
1090 		update_total_decomp_bytes_in(ctx->req->slen);
1091 		update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen);
1092 	}
1093 
1094 	if (ctx->compress && compression_ctx->verify_compress) {
1095 		dma_addr_t src_addr, dst_addr;
1096 		u32 compression_crc;
1097 
1098 		compression_crc = idxd_desc->iax_completion->crc;
1099 
1100 		ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1101 		if (ret) {
1102 			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1103 			err = -EIO;
1104 			goto out;
1105 		}
1106 
1107 		ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1108 					  ctx->req->slen, dst_addr, &ctx->req->dlen,
1109 					  compression_crc);
1110 		if (ret) {
1111 			dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1112 			err = -EIO;
1113 		}
1114 
1115 		dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1116 		dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1117 
1118 		goto out;
1119 	}
1120 err:
1121 	dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1122 	dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1123 out:
1124 	if (ret != 0)
1125 		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1126 
1127 	if (ctx->req->base.complete)
1128 		acomp_request_complete(ctx->req, err);
1129 
1130 	if (free_desc)
1131 		idxd_free_desc(idxd_desc->wq, idxd_desc);
1132 	iaa_wq_put(idxd_desc->wq);
1133 }
1134 
1135 static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
1136 			struct idxd_wq *wq,
1137 			dma_addr_t src_addr, unsigned int slen,
1138 			dma_addr_t dst_addr, unsigned int *dlen,
1139 			u32 *compression_crc,
1140 			bool disable_async)
1141 {
1142 	struct iaa_device_compression_mode *active_compression_mode;
1143 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1144 	struct iaa_device *iaa_device;
1145 	struct idxd_desc *idxd_desc;
1146 	struct iax_hw_desc *desc;
1147 	struct idxd_device *idxd;
1148 	struct iaa_wq *iaa_wq;
1149 	struct pci_dev *pdev;
1150 	struct device *dev;
1151 	int ret = 0;
1152 
1153 	iaa_wq = idxd_wq_get_private(wq);
1154 	iaa_device = iaa_wq->iaa_device;
1155 	idxd = iaa_device->idxd;
1156 	pdev = idxd->pdev;
1157 	dev = &pdev->dev;
1158 
1159 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1160 
1161 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1162 	if (IS_ERR(idxd_desc)) {
1163 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1164 		dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1165 		return PTR_ERR(idxd_desc);
1166 	}
1167 	desc = idxd_desc->iax_hw;
1168 
1169 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1170 		IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1171 	desc->opcode = IAX_OPCODE_COMPRESS;
1172 	desc->compr_flags = IAA_COMP_FLAGS;
1173 	desc->priv = 0;
1174 
1175 	desc->src1_addr = (u64)src_addr;
1176 	desc->src1_size = slen;
1177 	desc->dst_addr = (u64)dst_addr;
1178 	desc->max_dst_size = *dlen;
1179 	desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1180 	desc->src2_size = sizeof(struct aecs_comp_table_record);
1181 	desc->completion_addr = idxd_desc->compl_dma;
1182 
1183 	if (ctx->use_irq && !disable_async) {
1184 		desc->flags |= IDXD_OP_FLAG_RCI;
1185 
1186 		idxd_desc->crypto.req = req;
1187 		idxd_desc->crypto.tfm = tfm;
1188 		idxd_desc->crypto.src_addr = src_addr;
1189 		idxd_desc->crypto.dst_addr = dst_addr;
1190 		idxd_desc->crypto.compress = true;
1191 
1192 		dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1193 			" src_addr %llx, dst_addr %llx\n", __func__,
1194 			active_compression_mode->name,
1195 			src_addr, dst_addr);
1196 	} else if (ctx->async_mode && !disable_async)
1197 		req->base.data = idxd_desc;
1198 
1199 	dev_dbg(dev, "%s: compression mode %s,"
1200 		" desc->src1_addr %llx, desc->src1_size %d,"
1201 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1202 		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1203 		active_compression_mode->name,
1204 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1205 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1206 
1207 	ret = idxd_submit_desc(wq, idxd_desc);
1208 	if (ret) {
1209 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1210 		goto err;
1211 	}
1212 
1213 	/* Update stats */
1214 	update_total_comp_calls();
1215 	update_wq_comp_calls(wq);
1216 
1217 	if (ctx->async_mode && !disable_async) {
1218 		ret = -EINPROGRESS;
1219 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1220 		goto out;
1221 	}
1222 
1223 	ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1224 	if (ret) {
1225 		dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1226 		goto err;
1227 	}
1228 
1229 	*dlen = idxd_desc->iax_completion->output_size;
1230 
1231 	/* Update stats */
1232 	update_total_comp_bytes_out(*dlen);
1233 	update_wq_comp_bytes(wq, *dlen);
1234 
1235 	*compression_crc = idxd_desc->iax_completion->crc;
1236 
1237 	if (!ctx->async_mode || disable_async)
1238 		idxd_free_desc(wq, idxd_desc);
1239 out:
1240 	return ret;
1241 err:
1242 	idxd_free_desc(wq, idxd_desc);
1243 	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1244 
1245 	goto out;
1246 }
1247 
1248 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1249 				struct acomp_req *req,
1250 				dma_addr_t *src_addr, dma_addr_t *dst_addr)
1251 {
1252 	int ret = 0;
1253 	int nr_sgs;
1254 
1255 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1256 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1257 
1258 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1259 	if (nr_sgs <= 0 || nr_sgs > 1) {
1260 		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1261 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1262 			iaa_wq->wq->id, ret);
1263 		ret = -EIO;
1264 		goto out;
1265 	}
1266 	*src_addr = sg_dma_address(req->src);
1267 	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1268 		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1269 		req->src, req->slen, sg_dma_len(req->src));
1270 
1271 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1272 	if (nr_sgs <= 0 || nr_sgs > 1) {
1273 		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1274 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1275 			iaa_wq->wq->id, ret);
1276 		ret = -EIO;
1277 		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1278 		goto out;
1279 	}
1280 	*dst_addr = sg_dma_address(req->dst);
1281 	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1282 		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1283 		req->dst, req->dlen, sg_dma_len(req->dst));
1284 out:
1285 	return ret;
1286 }
1287 
1288 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1289 			       struct idxd_wq *wq,
1290 			       dma_addr_t src_addr, unsigned int slen,
1291 			       dma_addr_t dst_addr, unsigned int *dlen,
1292 			       u32 compression_crc)
1293 {
1294 	struct iaa_device_compression_mode *active_compression_mode;
1295 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1296 	struct iaa_device *iaa_device;
1297 	struct idxd_desc *idxd_desc;
1298 	struct iax_hw_desc *desc;
1299 	struct idxd_device *idxd;
1300 	struct iaa_wq *iaa_wq;
1301 	struct pci_dev *pdev;
1302 	struct device *dev;
1303 	int ret = 0;
1304 
1305 	iaa_wq = idxd_wq_get_private(wq);
1306 	iaa_device = iaa_wq->iaa_device;
1307 	idxd = iaa_device->idxd;
1308 	pdev = idxd->pdev;
1309 	dev = &pdev->dev;
1310 
1311 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1312 
1313 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1314 	if (IS_ERR(idxd_desc)) {
1315 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1316 		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1317 			PTR_ERR(idxd_desc));
1318 		return PTR_ERR(idxd_desc);
1319 	}
1320 	desc = idxd_desc->iax_hw;
1321 
1322 	/* Verify (optional) - decompress and check crc, suppress dest write */
1323 
1324 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1325 	desc->opcode = IAX_OPCODE_DECOMPRESS;
1326 	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1327 	desc->priv = 0;
1328 
1329 	desc->src1_addr = (u64)dst_addr;
1330 	desc->src1_size = *dlen;
1331 	desc->dst_addr = (u64)src_addr;
1332 	desc->max_dst_size = slen;
1333 	desc->completion_addr = idxd_desc->compl_dma;
1334 
1335 	dev_dbg(dev, "(verify) compression mode %s,"
1336 		" desc->src1_addr %llx, desc->src1_size %d,"
1337 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1338 		" desc->src2_addr %llx, desc->src2_size %d\n",
1339 		active_compression_mode->name,
1340 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1341 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1342 
1343 	ret = idxd_submit_desc(wq, idxd_desc);
1344 	if (ret) {
1345 		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1346 		goto err;
1347 	}
1348 
1349 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1350 	if (ret) {
1351 		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1352 		goto err;
1353 	}
1354 
1355 	if (compression_crc != idxd_desc->iax_completion->crc) {
1356 		ret = -EINVAL;
1357 		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1358 			" comp=0x%x, decomp=0x%x\n", compression_crc,
1359 			idxd_desc->iax_completion->crc);
1360 		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1361 			       8, 1, idxd_desc->iax_completion, 64, 0);
1362 		goto err;
1363 	}
1364 
1365 	idxd_free_desc(wq, idxd_desc);
1366 out:
1367 	return ret;
1368 err:
1369 	idxd_free_desc(wq, idxd_desc);
1370 	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1371 
1372 	goto out;
1373 }
1374 
1375 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1376 			  struct idxd_wq *wq,
1377 			  dma_addr_t src_addr, unsigned int slen,
1378 			  dma_addr_t dst_addr, unsigned int *dlen,
1379 			  bool disable_async)
1380 {
1381 	struct iaa_device_compression_mode *active_compression_mode;
1382 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1383 	struct iaa_device *iaa_device;
1384 	struct idxd_desc *idxd_desc;
1385 	struct iax_hw_desc *desc;
1386 	struct idxd_device *idxd;
1387 	struct iaa_wq *iaa_wq;
1388 	struct pci_dev *pdev;
1389 	struct device *dev;
1390 	int ret = 0;
1391 
1392 	iaa_wq = idxd_wq_get_private(wq);
1393 	iaa_device = iaa_wq->iaa_device;
1394 	idxd = iaa_device->idxd;
1395 	pdev = idxd->pdev;
1396 	dev = &pdev->dev;
1397 
1398 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1399 
1400 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1401 	if (IS_ERR(idxd_desc)) {
1402 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1403 		dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1404 			PTR_ERR(idxd_desc));
1405 		return PTR_ERR(idxd_desc);
1406 	}
1407 	desc = idxd_desc->iax_hw;
1408 
1409 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1410 	desc->opcode = IAX_OPCODE_DECOMPRESS;
1411 	desc->max_dst_size = PAGE_SIZE;
1412 	desc->decompr_flags = IAA_DECOMP_FLAGS;
1413 	desc->priv = 0;
1414 
1415 	desc->src1_addr = (u64)src_addr;
1416 	desc->dst_addr = (u64)dst_addr;
1417 	desc->max_dst_size = *dlen;
1418 	desc->src1_size = slen;
1419 	desc->completion_addr = idxd_desc->compl_dma;
1420 
1421 	if (ctx->use_irq && !disable_async) {
1422 		desc->flags |= IDXD_OP_FLAG_RCI;
1423 
1424 		idxd_desc->crypto.req = req;
1425 		idxd_desc->crypto.tfm = tfm;
1426 		idxd_desc->crypto.src_addr = src_addr;
1427 		idxd_desc->crypto.dst_addr = dst_addr;
1428 		idxd_desc->crypto.compress = false;
1429 
1430 		dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1431 			" src_addr %llx, dst_addr %llx\n", __func__,
1432 			active_compression_mode->name,
1433 			src_addr, dst_addr);
1434 	} else if (ctx->async_mode && !disable_async)
1435 		req->base.data = idxd_desc;
1436 
1437 	dev_dbg(dev, "%s: decompression mode %s,"
1438 		" desc->src1_addr %llx, desc->src1_size %d,"
1439 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1440 		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1441 		active_compression_mode->name,
1442 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1443 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1444 
1445 	ret = idxd_submit_desc(wq, idxd_desc);
1446 	if (ret) {
1447 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1448 		goto err;
1449 	}
1450 
1451 	/* Update stats */
1452 	update_total_decomp_calls();
1453 	update_wq_decomp_calls(wq);
1454 
1455 	if (ctx->async_mode && !disable_async) {
1456 		ret = -EINPROGRESS;
1457 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1458 		goto out;
1459 	}
1460 
1461 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1462 	if (ret) {
1463 		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1464 		if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1465 			pr_warn("%s: falling back to deflate-generic decompress, "
1466 				"analytics error code %x\n", __func__,
1467 				idxd_desc->iax_completion->error_code);
1468 			ret = deflate_generic_decompress(req);
1469 			if (ret) {
1470 				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1471 					__func__, ret);
1472 				goto err;
1473 			}
1474 		} else {
1475 			goto err;
1476 		}
1477 	} else {
1478 		req->dlen = idxd_desc->iax_completion->output_size;
1479 	}
1480 
1481 	*dlen = req->dlen;
1482 
1483 	if (!ctx->async_mode || disable_async)
1484 		idxd_free_desc(wq, idxd_desc);
1485 
1486 	/* Update stats */
1487 	update_total_decomp_bytes_in(slen);
1488 	update_wq_decomp_bytes(wq, slen);
1489 out:
1490 	return ret;
1491 err:
1492 	idxd_free_desc(wq, idxd_desc);
1493 	dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1494 
1495 	goto out;
1496 }
1497 
1498 static int iaa_comp_acompress(struct acomp_req *req)
1499 {
1500 	struct iaa_compression_ctx *compression_ctx;
1501 	struct crypto_tfm *tfm = req->base.tfm;
1502 	dma_addr_t src_addr, dst_addr;
1503 	bool disable_async = false;
1504 	int nr_sgs, cpu, ret = 0;
1505 	struct iaa_wq *iaa_wq;
1506 	u32 compression_crc;
1507 	struct idxd_wq *wq;
1508 	struct device *dev;
1509 	int order = -1;
1510 
1511 	compression_ctx = crypto_tfm_ctx(tfm);
1512 
1513 	if (!iaa_crypto_enabled) {
1514 		pr_debug("iaa_crypto disabled, not compressing\n");
1515 		return -ENODEV;
1516 	}
1517 
1518 	if (!req->src || !req->slen) {
1519 		pr_debug("invalid src, not compressing\n");
1520 		return -EINVAL;
1521 	}
1522 
1523 	cpu = get_cpu();
1524 	wq = wq_table_next_wq(cpu);
1525 	put_cpu();
1526 	if (!wq) {
1527 		pr_debug("no wq configured for cpu=%d\n", cpu);
1528 		return -ENODEV;
1529 	}
1530 
1531 	ret = iaa_wq_get(wq);
1532 	if (ret) {
1533 		pr_debug("no wq available for cpu=%d\n", cpu);
1534 		return -ENODEV;
1535 	}
1536 
1537 	iaa_wq = idxd_wq_get_private(wq);
1538 
1539 	if (!req->dst) {
1540 		gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1541 
1542 		/* incompressible data will always be < 2 * slen */
1543 		req->dlen = 2 * req->slen;
1544 		order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1545 		req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1546 		if (!req->dst) {
1547 			ret = -ENOMEM;
1548 			order = -1;
1549 			goto out;
1550 		}
1551 		disable_async = true;
1552 	}
1553 
1554 	dev = &wq->idxd->pdev->dev;
1555 
1556 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1557 	if (nr_sgs <= 0 || nr_sgs > 1) {
1558 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1559 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1560 			iaa_wq->wq->id, ret);
1561 		ret = -EIO;
1562 		goto out;
1563 	}
1564 	src_addr = sg_dma_address(req->src);
1565 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1566 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1567 		req->src, req->slen, sg_dma_len(req->src));
1568 
1569 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1570 	if (nr_sgs <= 0 || nr_sgs > 1) {
1571 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1572 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1573 			iaa_wq->wq->id, ret);
1574 		ret = -EIO;
1575 		goto err_map_dst;
1576 	}
1577 	dst_addr = sg_dma_address(req->dst);
1578 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1579 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1580 		req->dst, req->dlen, sg_dma_len(req->dst));
1581 
1582 	ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1583 			   &req->dlen, &compression_crc, disable_async);
1584 	if (ret == -EINPROGRESS)
1585 		return ret;
1586 
1587 	if (!ret && compression_ctx->verify_compress) {
1588 		ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1589 		if (ret) {
1590 			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1591 			goto out;
1592 		}
1593 
1594 		ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1595 					  dst_addr, &req->dlen, compression_crc);
1596 		if (ret)
1597 			dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1598 
1599 		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1600 		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1601 
1602 		goto out;
1603 	}
1604 
1605 	if (ret)
1606 		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1607 
1608 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1609 err_map_dst:
1610 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1611 out:
1612 	iaa_wq_put(wq);
1613 
1614 	if (order >= 0)
1615 		sgl_free_order(req->dst, order);
1616 
1617 	return ret;
1618 }
1619 
1620 static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1621 {
1622 	gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1623 		GFP_KERNEL : GFP_ATOMIC;
1624 	struct crypto_tfm *tfm = req->base.tfm;
1625 	dma_addr_t src_addr, dst_addr;
1626 	int nr_sgs, cpu, ret = 0;
1627 	struct iaa_wq *iaa_wq;
1628 	struct device *dev;
1629 	struct idxd_wq *wq;
1630 	int order = -1;
1631 
1632 	cpu = get_cpu();
1633 	wq = wq_table_next_wq(cpu);
1634 	put_cpu();
1635 	if (!wq) {
1636 		pr_debug("no wq configured for cpu=%d\n", cpu);
1637 		return -ENODEV;
1638 	}
1639 
1640 	ret = iaa_wq_get(wq);
1641 	if (ret) {
1642 		pr_debug("no wq available for cpu=%d\n", cpu);
1643 		return -ENODEV;
1644 	}
1645 
1646 	iaa_wq = idxd_wq_get_private(wq);
1647 
1648 	dev = &wq->idxd->pdev->dev;
1649 
1650 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1651 	if (nr_sgs <= 0 || nr_sgs > 1) {
1652 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1653 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1654 			iaa_wq->wq->id, ret);
1655 		ret = -EIO;
1656 		goto out;
1657 	}
1658 	src_addr = sg_dma_address(req->src);
1659 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1660 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1661 		req->src, req->slen, sg_dma_len(req->src));
1662 
1663 	req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1664 alloc_dest:
1665 	order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1666 	req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1667 	if (!req->dst) {
1668 		ret = -ENOMEM;
1669 		order = -1;
1670 		goto out;
1671 	}
1672 
1673 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1674 	if (nr_sgs <= 0 || nr_sgs > 1) {
1675 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1676 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1677 			iaa_wq->wq->id, ret);
1678 		ret = -EIO;
1679 		goto err_map_dst;
1680 	}
1681 
1682 	dst_addr = sg_dma_address(req->dst);
1683 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1684 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1685 		req->dst, req->dlen, sg_dma_len(req->dst));
1686 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1687 			     dst_addr, &req->dlen, true);
1688 	if (ret == -EOVERFLOW) {
1689 		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1690 		req->dlen *= 2;
1691 		if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1692 			goto err_map_dst;
1693 		goto alloc_dest;
1694 	}
1695 
1696 	if (ret != 0)
1697 		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1698 
1699 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1700 err_map_dst:
1701 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1702 out:
1703 	iaa_wq_put(wq);
1704 
1705 	if (order >= 0)
1706 		sgl_free_order(req->dst, order);
1707 
1708 	return ret;
1709 }
1710 
1711 static int iaa_comp_adecompress(struct acomp_req *req)
1712 {
1713 	struct crypto_tfm *tfm = req->base.tfm;
1714 	dma_addr_t src_addr, dst_addr;
1715 	int nr_sgs, cpu, ret = 0;
1716 	struct iaa_wq *iaa_wq;
1717 	struct device *dev;
1718 	struct idxd_wq *wq;
1719 
1720 	if (!iaa_crypto_enabled) {
1721 		pr_debug("iaa_crypto disabled, not decompressing\n");
1722 		return -ENODEV;
1723 	}
1724 
1725 	if (!req->src || !req->slen) {
1726 		pr_debug("invalid src, not decompressing\n");
1727 		return -EINVAL;
1728 	}
1729 
1730 	if (!req->dst)
1731 		return iaa_comp_adecompress_alloc_dest(req);
1732 
1733 	cpu = get_cpu();
1734 	wq = wq_table_next_wq(cpu);
1735 	put_cpu();
1736 	if (!wq) {
1737 		pr_debug("no wq configured for cpu=%d\n", cpu);
1738 		return -ENODEV;
1739 	}
1740 
1741 	ret = iaa_wq_get(wq);
1742 	if (ret) {
1743 		pr_debug("no wq available for cpu=%d\n", cpu);
1744 		return -ENODEV;
1745 	}
1746 
1747 	iaa_wq = idxd_wq_get_private(wq);
1748 
1749 	dev = &wq->idxd->pdev->dev;
1750 
1751 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1752 	if (nr_sgs <= 0 || nr_sgs > 1) {
1753 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1754 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1755 			iaa_wq->wq->id, ret);
1756 		ret = -EIO;
1757 		goto out;
1758 	}
1759 	src_addr = sg_dma_address(req->src);
1760 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1761 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1762 		req->src, req->slen, sg_dma_len(req->src));
1763 
1764 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1765 	if (nr_sgs <= 0 || nr_sgs > 1) {
1766 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1767 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1768 			iaa_wq->wq->id, ret);
1769 		ret = -EIO;
1770 		goto err_map_dst;
1771 	}
1772 	dst_addr = sg_dma_address(req->dst);
1773 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1775 		req->dst, req->dlen, sg_dma_len(req->dst));
1776 
1777 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1778 			     dst_addr, &req->dlen, false);
1779 	if (ret == -EINPROGRESS)
1780 		return ret;
1781 
1782 	if (ret != 0)
1783 		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1784 
1785 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1786 err_map_dst:
1787 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1788 out:
1789 	iaa_wq_put(wq);
1790 
1791 	return ret;
1792 }
1793 
1794 static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1795 {
1796 	ctx->verify_compress = iaa_verify_compress;
1797 	ctx->async_mode = async_mode;
1798 	ctx->use_irq = use_irq;
1799 }
1800 
1801 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1802 {
1803 	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1804 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1805 
1806 	compression_ctx_init(ctx);
1807 
1808 	ctx->mode = IAA_MODE_FIXED;
1809 
1810 	return 0;
1811 }
1812 
1813 static void dst_free(struct scatterlist *sgl)
1814 {
1815 	/*
1816 	 * Called for req->dst = NULL cases but we free elsewhere
1817 	 * using sgl_free_order().
1818 	 */
1819 }
1820 
1821 static struct acomp_alg iaa_acomp_fixed_deflate = {
1822 	.init			= iaa_comp_init_fixed,
1823 	.compress		= iaa_comp_acompress,
1824 	.decompress		= iaa_comp_adecompress,
1825 	.dst_free               = dst_free,
1826 	.base			= {
1827 		.cra_name		= "deflate",
1828 		.cra_driver_name	= "deflate-iaa",
1829 		.cra_flags		= CRYPTO_ALG_ASYNC,
1830 		.cra_ctxsize		= sizeof(struct iaa_compression_ctx),
1831 		.cra_module		= THIS_MODULE,
1832 		.cra_priority		= IAA_ALG_PRIORITY,
1833 	}
1834 };
1835 
1836 static int iaa_register_compression_device(void)
1837 {
1838 	int ret;
1839 
1840 	ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1841 	if (ret) {
1842 		pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1843 		goto out;
1844 	}
1845 
1846 	iaa_crypto_registered = true;
1847 out:
1848 	return ret;
1849 }
1850 
1851 static int iaa_unregister_compression_device(void)
1852 {
1853 	if (iaa_crypto_registered)
1854 		crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1855 
1856 	return 0;
1857 }
1858 
1859 static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1860 {
1861 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1862 	struct idxd_device *idxd = wq->idxd;
1863 	struct idxd_driver_data *data = idxd->data;
1864 	struct device *dev = &idxd_dev->conf_dev;
1865 	bool first_wq = false;
1866 	int ret = 0;
1867 
1868 	if (idxd->state != IDXD_DEV_ENABLED)
1869 		return -ENXIO;
1870 
1871 	if (data->type != IDXD_TYPE_IAX)
1872 		return -ENODEV;
1873 
1874 	mutex_lock(&wq->wq_lock);
1875 
1876 	if (idxd_wq_get_private(wq)) {
1877 		mutex_unlock(&wq->wq_lock);
1878 		return -EBUSY;
1879 	}
1880 
1881 	if (!idxd_wq_driver_name_match(wq, dev)) {
1882 		dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1883 			idxd->id, wq->id, wq->driver_name, dev->driver->name);
1884 		idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1885 		ret = -ENODEV;
1886 		goto err;
1887 	}
1888 
1889 	wq->type = IDXD_WQT_KERNEL;
1890 
1891 	ret = idxd_drv_enable_wq(wq);
1892 	if (ret < 0) {
1893 		dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1894 			idxd->id, wq->id, ret);
1895 		ret = -ENXIO;
1896 		goto err;
1897 	}
1898 
1899 	mutex_lock(&iaa_devices_lock);
1900 
1901 	if (list_empty(&iaa_devices)) {
1902 		ret = alloc_wq_table(wq->idxd->max_wqs);
1903 		if (ret)
1904 			goto err_alloc;
1905 		first_wq = true;
1906 	}
1907 
1908 	ret = save_iaa_wq(wq);
1909 	if (ret)
1910 		goto err_save;
1911 
1912 	rebalance_wq_table();
1913 
1914 	if (first_wq) {
1915 		iaa_crypto_enabled = true;
1916 		ret = iaa_register_compression_device();
1917 		if (ret != 0) {
1918 			iaa_crypto_enabled = false;
1919 			dev_dbg(dev, "IAA compression device registration failed\n");
1920 			goto err_register;
1921 		}
1922 		try_module_get(THIS_MODULE);
1923 
1924 		pr_info("iaa_crypto now ENABLED\n");
1925 	}
1926 
1927 	mutex_unlock(&iaa_devices_lock);
1928 out:
1929 	mutex_unlock(&wq->wq_lock);
1930 
1931 	return ret;
1932 
1933 err_register:
1934 	remove_iaa_wq(wq);
1935 	free_iaa_wq(idxd_wq_get_private(wq));
1936 err_save:
1937 	if (first_wq)
1938 		free_wq_table();
1939 err_alloc:
1940 	mutex_unlock(&iaa_devices_lock);
1941 	idxd_drv_disable_wq(wq);
1942 err:
1943 	wq->type = IDXD_WQT_NONE;
1944 
1945 	goto out;
1946 }
1947 
1948 static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1949 {
1950 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1951 	struct idxd_device *idxd = wq->idxd;
1952 	struct iaa_wq *iaa_wq;
1953 	bool free = false;
1954 
1955 	idxd_wq_quiesce(wq);
1956 
1957 	mutex_lock(&wq->wq_lock);
1958 	mutex_lock(&iaa_devices_lock);
1959 
1960 	remove_iaa_wq(wq);
1961 
1962 	spin_lock(&idxd->dev_lock);
1963 	iaa_wq = idxd_wq_get_private(wq);
1964 	if (!iaa_wq) {
1965 		spin_unlock(&idxd->dev_lock);
1966 		pr_err("%s: no iaa_wq available to remove\n", __func__);
1967 		goto out;
1968 	}
1969 
1970 	if (iaa_wq->ref) {
1971 		iaa_wq->remove = true;
1972 	} else {
1973 		wq = iaa_wq->wq;
1974 		idxd_wq_set_private(wq, NULL);
1975 		free = true;
1976 	}
1977 	spin_unlock(&idxd->dev_lock);
1978 	if (free) {
1979 		__free_iaa_wq(iaa_wq);
1980 		kfree(iaa_wq);
1981 	}
1982 
1983 	idxd_drv_disable_wq(wq);
1984 	rebalance_wq_table();
1985 
1986 	if (nr_iaa == 0) {
1987 		iaa_crypto_enabled = false;
1988 		free_wq_table();
1989 		module_put(THIS_MODULE);
1990 
1991 		pr_info("iaa_crypto now DISABLED\n");
1992 	}
1993 out:
1994 	mutex_unlock(&iaa_devices_lock);
1995 	mutex_unlock(&wq->wq_lock);
1996 }
1997 
1998 static enum idxd_dev_type dev_types[] = {
1999 	IDXD_DEV_WQ,
2000 	IDXD_DEV_NONE,
2001 };
2002 
2003 static struct idxd_device_driver iaa_crypto_driver = {
2004 	.probe = iaa_crypto_probe,
2005 	.remove = iaa_crypto_remove,
2006 	.name = IDXD_SUBDRIVER_NAME,
2007 	.type = dev_types,
2008 	.desc_complete = iaa_desc_complete,
2009 };
2010 
2011 static int __init iaa_crypto_init_module(void)
2012 {
2013 	int ret = 0;
2014 	int node;
2015 
2016 	nr_cpus = num_possible_cpus();
2017 	for_each_node_with_cpus(node)
2018 		nr_nodes++;
2019 	if (!nr_nodes) {
2020 		pr_err("IAA couldn't find any nodes with cpus\n");
2021 		return -ENODEV;
2022 	}
2023 	nr_cpus_per_node = nr_cpus / nr_nodes;
2024 
2025 	if (crypto_has_comp("deflate-generic", 0, 0))
2026 		deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2027 
2028 	if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2029 		pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2030 		       "deflate-generic", PTR_ERR(deflate_generic_tfm));
2031 		return -ENOMEM;
2032 	}
2033 
2034 	ret = iaa_aecs_init_fixed();
2035 	if (ret < 0) {
2036 		pr_debug("IAA fixed compression mode init failed\n");
2037 		goto err_aecs_init;
2038 	}
2039 
2040 	ret = idxd_driver_register(&iaa_crypto_driver);
2041 	if (ret) {
2042 		pr_debug("IAA wq sub-driver registration failed\n");
2043 		goto err_driver_reg;
2044 	}
2045 
2046 	ret = driver_create_file(&iaa_crypto_driver.drv,
2047 				 &driver_attr_verify_compress);
2048 	if (ret) {
2049 		pr_debug("IAA verify_compress attr creation failed\n");
2050 		goto err_verify_attr_create;
2051 	}
2052 
2053 	ret = driver_create_file(&iaa_crypto_driver.drv,
2054 				 &driver_attr_sync_mode);
2055 	if (ret) {
2056 		pr_debug("IAA sync mode attr creation failed\n");
2057 		goto err_sync_attr_create;
2058 	}
2059 
2060 	if (iaa_crypto_debugfs_init())
2061 		pr_warn("debugfs init failed, stats not available\n");
2062 
2063 	pr_debug("initialized\n");
2064 out:
2065 	return ret;
2066 
2067 err_sync_attr_create:
2068 	driver_remove_file(&iaa_crypto_driver.drv,
2069 			   &driver_attr_verify_compress);
2070 err_verify_attr_create:
2071 	idxd_driver_unregister(&iaa_crypto_driver);
2072 err_driver_reg:
2073 	iaa_aecs_cleanup_fixed();
2074 err_aecs_init:
2075 	crypto_free_comp(deflate_generic_tfm);
2076 
2077 	goto out;
2078 }
2079 
2080 static void __exit iaa_crypto_cleanup_module(void)
2081 {
2082 	if (iaa_unregister_compression_device())
2083 		pr_debug("IAA compression device unregister failed\n");
2084 
2085 	iaa_crypto_debugfs_cleanup();
2086 	driver_remove_file(&iaa_crypto_driver.drv,
2087 			   &driver_attr_sync_mode);
2088 	driver_remove_file(&iaa_crypto_driver.drv,
2089 			   &driver_attr_verify_compress);
2090 	idxd_driver_unregister(&iaa_crypto_driver);
2091 	iaa_aecs_cleanup_fixed();
2092 	crypto_free_comp(deflate_generic_tfm);
2093 
2094 	pr_debug("cleaned up\n");
2095 }
2096 
2097 MODULE_IMPORT_NS(IDXD);
2098 MODULE_LICENSE("GPL");
2099 MODULE_ALIAS_IDXD_DEVICE(0);
2100 MODULE_AUTHOR("Intel Corporation");
2101 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2102 
2103 module_init(iaa_crypto_init_module);
2104 module_exit(iaa_crypto_cleanup_module);
2105