xref: /linux/drivers/crypto/intel/iaa/iaa_crypto_main.c (revision 71dfa617ea9f18e4585fe78364217cd32b1fc382)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3 
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
14 
15 #include "idxd.h"
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
18 
19 #ifdef pr_fmt
20 #undef pr_fmt
21 #endif
22 
23 #define pr_fmt(fmt)			"idxd: " IDXD_SUBDRIVER_NAME ": " fmt
24 
25 #define IAA_ALG_PRIORITY               300
26 
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa;
29 static unsigned int nr_cpus;
30 static unsigned int nr_nodes;
31 static unsigned int nr_cpus_per_node;
32 
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa;
35 
36 static struct crypto_comp *deflate_generic_tfm;
37 
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu *wq_table;
40 
41 static struct idxd_wq *wq_table_next_wq(int cpu)
42 {
43 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
44 
45 	if (++entry->cur_wq >= entry->n_wqs)
46 		entry->cur_wq = 0;
47 
48 	if (!entry->wqs[entry->cur_wq])
49 		return NULL;
50 
51 	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 		 entry->wqs[entry->cur_wq]->id, cpu);
54 
55 	return entry->wqs[entry->cur_wq];
56 }
57 
58 static void wq_table_add(int cpu, struct idxd_wq *wq)
59 {
60 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
61 
62 	if (WARN_ON(entry->n_wqs == entry->max_wqs))
63 		return;
64 
65 	entry->wqs[entry->n_wqs++] = wq;
66 
67 	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 		 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
70 }
71 
72 static void wq_table_free_entry(int cpu)
73 {
74 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
75 
76 	kfree(entry->wqs);
77 	memset(entry, 0, sizeof(*entry));
78 }
79 
80 static void wq_table_clear_entry(int cpu)
81 {
82 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
83 
84 	entry->n_wqs = 0;
85 	entry->cur_wq = 0;
86 	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
87 }
88 
89 LIST_HEAD(iaa_devices);
90 DEFINE_MUTEX(iaa_devices_lock);
91 
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled;
94 static bool iaa_crypto_registered;
95 
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress = true;
98 
99 static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
100 {
101 	return sprintf(buf, "%d\n", iaa_verify_compress);
102 }
103 
104 static ssize_t verify_compress_store(struct device_driver *driver,
105 				     const char *buf, size_t count)
106 {
107 	int ret = -EBUSY;
108 
109 	mutex_lock(&iaa_devices_lock);
110 
111 	if (iaa_crypto_enabled)
112 		goto out;
113 
114 	ret = kstrtobool(buf, &iaa_verify_compress);
115 	if (ret)
116 		goto out;
117 
118 	ret = count;
119 out:
120 	mutex_unlock(&iaa_devices_lock);
121 
122 	return ret;
123 }
124 static DRIVER_ATTR_RW(verify_compress);
125 
126 /*
127  * The iaa crypto driver supports three 'sync' methods determining how
128  * compressions and decompressions are performed:
129  *
130  * - sync:      the compression or decompression completes before
131  *              returning.  This is the mode used by the async crypto
132  *              interface when the sync mode is set to 'sync' and by
133  *              the sync crypto interface regardless of setting.
134  *
135  * - async:     the compression or decompression is submitted and returns
136  *              immediately.  Completion interrupts are not used so
137  *              the caller is responsible for polling the descriptor
138  *              for completion.  This mode is applicable to only the
139  *              async crypto interface and is ignored for anything
140  *              else.
141  *
142  * - async_irq: the compression or decompression is submitted and
143  *              returns immediately.  Completion interrupts are
144  *              enabled so the caller can wait for the completion and
145  *              yield to other threads.  When the compression or
146  *              decompression completes, the completion is signaled
147  *              and the caller awakened.  This mode is applicable to
148  *              only the async crypto interface and is ignored for
149  *              anything else.
150  *
151  * These modes can be set using the iaa_crypto sync_mode driver
152  * attribute.
153  */
154 
155 /* Use async mode */
156 static bool async_mode;
157 /* Use interrupts */
158 static bool use_irq;
159 
160 /**
161  * set_iaa_sync_mode - Set IAA sync mode
162  * @name: The name of the sync mode
163  *
164  * Make the IAA sync mode named @name the current sync mode used by
165  * compression/decompression.
166  */
167 
168 static int set_iaa_sync_mode(const char *name)
169 {
170 	int ret = 0;
171 
172 	if (sysfs_streq(name, "sync")) {
173 		async_mode = false;
174 		use_irq = false;
175 	} else if (sysfs_streq(name, "async")) {
176 		async_mode = true;
177 		use_irq = false;
178 	} else if (sysfs_streq(name, "async_irq")) {
179 		async_mode = true;
180 		use_irq = true;
181 	} else {
182 		ret = -EINVAL;
183 	}
184 
185 	return ret;
186 }
187 
188 static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
189 {
190 	int ret = 0;
191 
192 	if (!async_mode && !use_irq)
193 		ret = sprintf(buf, "%s\n", "sync");
194 	else if (async_mode && !use_irq)
195 		ret = sprintf(buf, "%s\n", "async");
196 	else if (async_mode && use_irq)
197 		ret = sprintf(buf, "%s\n", "async_irq");
198 
199 	return ret;
200 }
201 
202 static ssize_t sync_mode_store(struct device_driver *driver,
203 			       const char *buf, size_t count)
204 {
205 	int ret = -EBUSY;
206 
207 	mutex_lock(&iaa_devices_lock);
208 
209 	if (iaa_crypto_enabled)
210 		goto out;
211 
212 	ret = set_iaa_sync_mode(buf);
213 	if (ret == 0)
214 		ret = count;
215 out:
216 	mutex_unlock(&iaa_devices_lock);
217 
218 	return ret;
219 }
220 static DRIVER_ATTR_RW(sync_mode);
221 
222 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
223 
224 static int find_empty_iaa_compression_mode(void)
225 {
226 	int i = -EINVAL;
227 
228 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 		if (iaa_compression_modes[i])
230 			continue;
231 		break;
232 	}
233 
234 	return i;
235 }
236 
237 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
238 {
239 	struct iaa_compression_mode *mode;
240 	int i;
241 
242 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 		mode = iaa_compression_modes[i];
244 		if (!mode)
245 			continue;
246 
247 		if (!strcmp(mode->name, name)) {
248 			*idx = i;
249 			return iaa_compression_modes[i];
250 		}
251 	}
252 
253 	return NULL;
254 }
255 
256 static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
257 {
258 	kfree(mode->name);
259 	kfree(mode->ll_table);
260 	kfree(mode->d_table);
261 
262 	kfree(mode);
263 }
264 
265 /*
266  * IAA Compression modes are defined by an ll_table and a d_table.
267  * These tables are typically generated and captured using statistics
268  * collected from running actual compress/decompress workloads.
269  *
270  * A module or other kernel code can add and remove compression modes
271  * with a given name using the exported @add_iaa_compression_mode()
272  * and @remove_iaa_compression_mode functions.
273  *
274  * When a new compression mode is added, the tables are saved in a
275  * global compression mode list.  When IAA devices are added, a
276  * per-IAA device dma mapping is created for each IAA device, for each
277  * compression mode.  These are the tables used to do the actual
278  * compression/deccompression and are unmapped if/when the devices are
279  * removed.  Currently, compression modes must be added before any
280  * device is added, and removed after all devices have been removed.
281  */
282 
283 /**
284  * remove_iaa_compression_mode - Remove an IAA compression mode
285  * @name: The name the compression mode will be known as
286  *
287  * Remove the IAA compression mode named @name.
288  */
289 void remove_iaa_compression_mode(const char *name)
290 {
291 	struct iaa_compression_mode *mode;
292 	int idx;
293 
294 	mutex_lock(&iaa_devices_lock);
295 
296 	if (!list_empty(&iaa_devices))
297 		goto out;
298 
299 	mode = find_iaa_compression_mode(name, &idx);
300 	if (mode) {
301 		free_iaa_compression_mode(mode);
302 		iaa_compression_modes[idx] = NULL;
303 	}
304 out:
305 	mutex_unlock(&iaa_devices_lock);
306 }
307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
308 
309 /**
310  * add_iaa_compression_mode - Add an IAA compression mode
311  * @name: The name the compression mode will be known as
312  * @ll_table: The ll table
313  * @ll_table_size: The ll table size in bytes
314  * @d_table: The d table
315  * @d_table_size: The d table size in bytes
316  * @init: Optional callback function to init the compression mode data
317  * @free: Optional callback function to free the compression mode data
318  *
319  * Add a new IAA compression mode named @name.
320  *
321  * Returns 0 if successful, errcode otherwise.
322  */
323 int add_iaa_compression_mode(const char *name,
324 			     const u32 *ll_table,
325 			     int ll_table_size,
326 			     const u32 *d_table,
327 			     int d_table_size,
328 			     iaa_dev_comp_init_fn_t init,
329 			     iaa_dev_comp_free_fn_t free)
330 {
331 	struct iaa_compression_mode *mode;
332 	int idx, ret = -ENOMEM;
333 
334 	mutex_lock(&iaa_devices_lock);
335 
336 	if (!list_empty(&iaa_devices)) {
337 		ret = -EBUSY;
338 		goto out;
339 	}
340 
341 	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
342 	if (!mode)
343 		goto out;
344 
345 	mode->name = kstrdup(name, GFP_KERNEL);
346 	if (!mode->name)
347 		goto free;
348 
349 	if (ll_table) {
350 		mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL);
351 		if (!mode->ll_table)
352 			goto free;
353 		memcpy(mode->ll_table, ll_table, ll_table_size);
354 		mode->ll_table_size = ll_table_size;
355 	}
356 
357 	if (d_table) {
358 		mode->d_table = kzalloc(d_table_size, GFP_KERNEL);
359 		if (!mode->d_table)
360 			goto free;
361 		memcpy(mode->d_table, d_table, d_table_size);
362 		mode->d_table_size = d_table_size;
363 	}
364 
365 	mode->init = init;
366 	mode->free = free;
367 
368 	idx = find_empty_iaa_compression_mode();
369 	if (idx < 0)
370 		goto free;
371 
372 	pr_debug("IAA compression mode %s added at idx %d\n",
373 		 mode->name, idx);
374 
375 	iaa_compression_modes[idx] = mode;
376 
377 	ret = 0;
378 out:
379 	mutex_unlock(&iaa_devices_lock);
380 
381 	return ret;
382 free:
383 	free_iaa_compression_mode(mode);
384 	goto out;
385 }
386 EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
387 
388 static struct iaa_device_compression_mode *
389 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
390 {
391 	return iaa_device->compression_modes[idx];
392 }
393 
394 static void free_device_compression_mode(struct iaa_device *iaa_device,
395 					 struct iaa_device_compression_mode *device_mode)
396 {
397 	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
398 	struct device *dev = &iaa_device->idxd->pdev->dev;
399 
400 	kfree(device_mode->name);
401 
402 	if (device_mode->aecs_comp_table)
403 		dma_free_coherent(dev, size, device_mode->aecs_comp_table,
404 				  device_mode->aecs_comp_table_dma_addr);
405 	kfree(device_mode);
406 }
407 
408 #define IDXD_OP_FLAG_AECS_RW_TGLS       0x400000
409 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
410 #define IAX_AECS_COMPRESS_FLAG	(IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
411 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
412 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
413 						IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
414 						IDXD_OP_FLAG_AECS_RW_TGLS)
415 
416 static int check_completion(struct device *dev,
417 			    struct iax_completion_record *comp,
418 			    bool compress,
419 			    bool only_once);
420 
421 static int init_device_compression_mode(struct iaa_device *iaa_device,
422 					struct iaa_compression_mode *mode,
423 					int idx, struct idxd_wq *wq)
424 {
425 	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
426 	struct device *dev = &iaa_device->idxd->pdev->dev;
427 	struct iaa_device_compression_mode *device_mode;
428 	int ret = -ENOMEM;
429 
430 	device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
431 	if (!device_mode)
432 		return -ENOMEM;
433 
434 	device_mode->name = kstrdup(mode->name, GFP_KERNEL);
435 	if (!device_mode->name)
436 		goto free;
437 
438 	device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
439 							  &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
440 	if (!device_mode->aecs_comp_table)
441 		goto free;
442 
443 	/* Add Huffman table to aecs */
444 	memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
445 	memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
446 	memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
447 
448 	if (mode->init) {
449 		ret = mode->init(device_mode);
450 		if (ret)
451 			goto free;
452 	}
453 
454 	/* mode index should match iaa_compression_modes idx */
455 	iaa_device->compression_modes[idx] = device_mode;
456 
457 	pr_debug("IAA %s compression mode initialized for iaa device %d\n",
458 		 mode->name, iaa_device->idxd->id);
459 
460 	ret = 0;
461 out:
462 	return ret;
463 free:
464 	pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
465 		 mode->name, iaa_device->idxd->id);
466 
467 	free_device_compression_mode(iaa_device, device_mode);
468 	goto out;
469 }
470 
471 static int init_device_compression_modes(struct iaa_device *iaa_device,
472 					 struct idxd_wq *wq)
473 {
474 	struct iaa_compression_mode *mode;
475 	int i, ret = 0;
476 
477 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
478 		mode = iaa_compression_modes[i];
479 		if (!mode)
480 			continue;
481 
482 		ret = init_device_compression_mode(iaa_device, mode, i, wq);
483 		if (ret)
484 			break;
485 	}
486 
487 	return ret;
488 }
489 
490 static void remove_device_compression_modes(struct iaa_device *iaa_device)
491 {
492 	struct iaa_device_compression_mode *device_mode;
493 	int i;
494 
495 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
496 		device_mode = iaa_device->compression_modes[i];
497 		if (!device_mode)
498 			continue;
499 
500 		free_device_compression_mode(iaa_device, device_mode);
501 		iaa_device->compression_modes[i] = NULL;
502 		if (iaa_compression_modes[i]->free)
503 			iaa_compression_modes[i]->free(device_mode);
504 	}
505 }
506 
507 static struct iaa_device *iaa_device_alloc(void)
508 {
509 	struct iaa_device *iaa_device;
510 
511 	iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
512 	if (!iaa_device)
513 		return NULL;
514 
515 	INIT_LIST_HEAD(&iaa_device->wqs);
516 
517 	return iaa_device;
518 }
519 
520 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
521 {
522 	struct iaa_wq *iaa_wq;
523 
524 	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
525 		if (iaa_wq->wq == wq)
526 			return true;
527 	}
528 
529 	return false;
530 }
531 
532 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
533 {
534 	struct iaa_device *iaa_device;
535 
536 	iaa_device = iaa_device_alloc();
537 	if (!iaa_device)
538 		return NULL;
539 
540 	iaa_device->idxd = idxd;
541 
542 	list_add_tail(&iaa_device->list, &iaa_devices);
543 
544 	nr_iaa++;
545 
546 	return iaa_device;
547 }
548 
549 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
550 {
551 	int ret = 0;
552 
553 	ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
554 	if (ret)
555 		return ret;
556 
557 	return ret;
558 }
559 
560 static void del_iaa_device(struct iaa_device *iaa_device)
561 {
562 	list_del(&iaa_device->list);
563 
564 	nr_iaa--;
565 }
566 
567 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
568 		      struct iaa_wq **new_wq)
569 {
570 	struct idxd_device *idxd = iaa_device->idxd;
571 	struct pci_dev *pdev = idxd->pdev;
572 	struct device *dev = &pdev->dev;
573 	struct iaa_wq *iaa_wq;
574 
575 	iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
576 	if (!iaa_wq)
577 		return -ENOMEM;
578 
579 	iaa_wq->wq = wq;
580 	iaa_wq->iaa_device = iaa_device;
581 	idxd_wq_set_private(wq, iaa_wq);
582 
583 	list_add_tail(&iaa_wq->list, &iaa_device->wqs);
584 
585 	iaa_device->n_wq++;
586 
587 	if (new_wq)
588 		*new_wq = iaa_wq;
589 
590 	dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
591 		wq->id, iaa_device->idxd->id, iaa_device->n_wq);
592 
593 	return 0;
594 }
595 
596 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
597 {
598 	struct idxd_device *idxd = iaa_device->idxd;
599 	struct pci_dev *pdev = idxd->pdev;
600 	struct device *dev = &pdev->dev;
601 	struct iaa_wq *iaa_wq;
602 
603 	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
604 		if (iaa_wq->wq == wq) {
605 			list_del(&iaa_wq->list);
606 			iaa_device->n_wq--;
607 
608 			dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
609 				wq->id, iaa_device->idxd->id,
610 				iaa_device->n_wq, nr_iaa);
611 
612 			if (iaa_device->n_wq == 0)
613 				del_iaa_device(iaa_device);
614 			break;
615 		}
616 	}
617 }
618 
619 static void clear_wq_table(void)
620 {
621 	int cpu;
622 
623 	for (cpu = 0; cpu < nr_cpus; cpu++)
624 		wq_table_clear_entry(cpu);
625 
626 	pr_debug("cleared wq table\n");
627 }
628 
629 static void free_iaa_device(struct iaa_device *iaa_device)
630 {
631 	if (!iaa_device)
632 		return;
633 
634 	remove_device_compression_modes(iaa_device);
635 	kfree(iaa_device);
636 }
637 
638 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
639 {
640 	struct iaa_device *iaa_device;
641 
642 	if (!iaa_wq)
643 		return;
644 
645 	iaa_device = iaa_wq->iaa_device;
646 	if (iaa_device->n_wq == 0)
647 		free_iaa_device(iaa_wq->iaa_device);
648 }
649 
650 static void free_iaa_wq(struct iaa_wq *iaa_wq)
651 {
652 	struct idxd_wq *wq;
653 
654 	__free_iaa_wq(iaa_wq);
655 
656 	wq = iaa_wq->wq;
657 
658 	kfree(iaa_wq);
659 	idxd_wq_set_private(wq, NULL);
660 }
661 
662 static int iaa_wq_get(struct idxd_wq *wq)
663 {
664 	struct idxd_device *idxd = wq->idxd;
665 	struct iaa_wq *iaa_wq;
666 	int ret = 0;
667 
668 	spin_lock(&idxd->dev_lock);
669 	iaa_wq = idxd_wq_get_private(wq);
670 	if (iaa_wq && !iaa_wq->remove) {
671 		iaa_wq->ref++;
672 		idxd_wq_get(wq);
673 	} else {
674 		ret = -ENODEV;
675 	}
676 	spin_unlock(&idxd->dev_lock);
677 
678 	return ret;
679 }
680 
681 static int iaa_wq_put(struct idxd_wq *wq)
682 {
683 	struct idxd_device *idxd = wq->idxd;
684 	struct iaa_wq *iaa_wq;
685 	bool free = false;
686 	int ret = 0;
687 
688 	spin_lock(&idxd->dev_lock);
689 	iaa_wq = idxd_wq_get_private(wq);
690 	if (iaa_wq) {
691 		iaa_wq->ref--;
692 		if (iaa_wq->ref == 0 && iaa_wq->remove) {
693 			idxd_wq_set_private(wq, NULL);
694 			free = true;
695 		}
696 		idxd_wq_put(wq);
697 	} else {
698 		ret = -ENODEV;
699 	}
700 	spin_unlock(&idxd->dev_lock);
701 	if (free) {
702 		__free_iaa_wq(iaa_wq);
703 		kfree(iaa_wq);
704 	}
705 
706 	return ret;
707 }
708 
709 static void free_wq_table(void)
710 {
711 	int cpu;
712 
713 	for (cpu = 0; cpu < nr_cpus; cpu++)
714 		wq_table_free_entry(cpu);
715 
716 	free_percpu(wq_table);
717 
718 	pr_debug("freed wq table\n");
719 }
720 
721 static int alloc_wq_table(int max_wqs)
722 {
723 	struct wq_table_entry *entry;
724 	int cpu;
725 
726 	wq_table = alloc_percpu(struct wq_table_entry);
727 	if (!wq_table)
728 		return -ENOMEM;
729 
730 	for (cpu = 0; cpu < nr_cpus; cpu++) {
731 		entry = per_cpu_ptr(wq_table, cpu);
732 		entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
733 		if (!entry->wqs) {
734 			free_wq_table();
735 			return -ENOMEM;
736 		}
737 
738 		entry->max_wqs = max_wqs;
739 	}
740 
741 	pr_debug("initialized wq table\n");
742 
743 	return 0;
744 }
745 
746 static int save_iaa_wq(struct idxd_wq *wq)
747 {
748 	struct iaa_device *iaa_device, *found = NULL;
749 	struct idxd_device *idxd;
750 	struct pci_dev *pdev;
751 	struct device *dev;
752 	int ret = 0;
753 
754 	list_for_each_entry(iaa_device, &iaa_devices, list) {
755 		if (iaa_device->idxd == wq->idxd) {
756 			idxd = iaa_device->idxd;
757 			pdev = idxd->pdev;
758 			dev = &pdev->dev;
759 			/*
760 			 * Check to see that we don't already have this wq.
761 			 * Shouldn't happen but we don't control probing.
762 			 */
763 			if (iaa_has_wq(iaa_device, wq)) {
764 				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
765 					iaa_device);
766 				goto out;
767 			}
768 
769 			found = iaa_device;
770 
771 			ret = add_iaa_wq(iaa_device, wq, NULL);
772 			if (ret)
773 				goto out;
774 
775 			break;
776 		}
777 	}
778 
779 	if (!found) {
780 		struct iaa_device *new_device;
781 		struct iaa_wq *new_wq;
782 
783 		new_device = add_iaa_device(wq->idxd);
784 		if (!new_device) {
785 			ret = -ENOMEM;
786 			goto out;
787 		}
788 
789 		ret = add_iaa_wq(new_device, wq, &new_wq);
790 		if (ret) {
791 			del_iaa_device(new_device);
792 			free_iaa_device(new_device);
793 			goto out;
794 		}
795 
796 		ret = init_iaa_device(new_device, new_wq);
797 		if (ret) {
798 			del_iaa_wq(new_device, new_wq->wq);
799 			del_iaa_device(new_device);
800 			free_iaa_wq(new_wq);
801 			goto out;
802 		}
803 	}
804 
805 	if (WARN_ON(nr_iaa == 0))
806 		return -EINVAL;
807 
808 	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
809 	if (!cpus_per_iaa)
810 		cpus_per_iaa = 1;
811 out:
812 	return 0;
813 }
814 
815 static void remove_iaa_wq(struct idxd_wq *wq)
816 {
817 	struct iaa_device *iaa_device;
818 
819 	list_for_each_entry(iaa_device, &iaa_devices, list) {
820 		if (iaa_has_wq(iaa_device, wq)) {
821 			del_iaa_wq(iaa_device, wq);
822 			break;
823 		}
824 	}
825 
826 	if (nr_iaa) {
827 		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
828 		if (!cpus_per_iaa)
829 			cpus_per_iaa = 1;
830 	} else
831 		cpus_per_iaa = 1;
832 }
833 
834 static int wq_table_add_wqs(int iaa, int cpu)
835 {
836 	struct iaa_device *iaa_device, *found_device = NULL;
837 	int ret = 0, cur_iaa = 0, n_wqs_added = 0;
838 	struct idxd_device *idxd;
839 	struct iaa_wq *iaa_wq;
840 	struct pci_dev *pdev;
841 	struct device *dev;
842 
843 	list_for_each_entry(iaa_device, &iaa_devices, list) {
844 		idxd = iaa_device->idxd;
845 		pdev = idxd->pdev;
846 		dev = &pdev->dev;
847 
848 		if (cur_iaa != iaa) {
849 			cur_iaa++;
850 			continue;
851 		}
852 
853 		found_device = iaa_device;
854 		dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
855 			found_device->idxd->id, cur_iaa);
856 		break;
857 	}
858 
859 	if (!found_device) {
860 		found_device = list_first_entry_or_null(&iaa_devices,
861 							struct iaa_device, list);
862 		if (!found_device) {
863 			pr_debug("couldn't find any iaa devices with wqs!\n");
864 			ret = -EINVAL;
865 			goto out;
866 		}
867 		cur_iaa = 0;
868 
869 		idxd = found_device->idxd;
870 		pdev = idxd->pdev;
871 		dev = &pdev->dev;
872 		dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
873 			found_device->idxd->id, cur_iaa);
874 	}
875 
876 	list_for_each_entry(iaa_wq, &found_device->wqs, list) {
877 		wq_table_add(cpu, iaa_wq->wq);
878 		pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
879 			 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
880 		n_wqs_added++;
881 	}
882 
883 	if (!n_wqs_added) {
884 		pr_debug("couldn't find any iaa wqs!\n");
885 		ret = -EINVAL;
886 		goto out;
887 	}
888 out:
889 	return ret;
890 }
891 
892 /*
893  * Rebalance the wq table so that given a cpu, it's easy to find the
894  * closest IAA instance.  The idea is to try to choose the most
895  * appropriate IAA instance for a caller and spread available
896  * workqueues around to clients.
897  */
898 static void rebalance_wq_table(void)
899 {
900 	const struct cpumask *node_cpus;
901 	int node, cpu, iaa = -1;
902 
903 	if (nr_iaa == 0)
904 		return;
905 
906 	pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
907 		 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
908 
909 	clear_wq_table();
910 
911 	if (nr_iaa == 1) {
912 		for (cpu = 0; cpu < nr_cpus; cpu++) {
913 			if (WARN_ON(wq_table_add_wqs(0, cpu))) {
914 				pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
915 				return;
916 			}
917 		}
918 
919 		return;
920 	}
921 
922 	for_each_node_with_cpus(node) {
923 		node_cpus = cpumask_of_node(node);
924 
925 		for (cpu = 0; cpu < nr_cpus_per_node; cpu++) {
926 			int node_cpu = cpumask_nth(cpu, node_cpus);
927 
928 			if (WARN_ON(node_cpu >= nr_cpu_ids)) {
929 				pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
930 				return;
931 			}
932 
933 			if ((cpu % cpus_per_iaa) == 0)
934 				iaa++;
935 
936 			if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
937 				pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
938 				return;
939 			}
940 		}
941 	}
942 }
943 
944 static inline int check_completion(struct device *dev,
945 				   struct iax_completion_record *comp,
946 				   bool compress,
947 				   bool only_once)
948 {
949 	char *op_str = compress ? "compress" : "decompress";
950 	int ret = 0;
951 
952 	while (!comp->status) {
953 		if (only_once)
954 			return -EAGAIN;
955 		cpu_relax();
956 	}
957 
958 	if (comp->status != IAX_COMP_SUCCESS) {
959 		if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
960 			ret = -ETIMEDOUT;
961 			dev_dbg(dev, "%s timed out, size=0x%x\n",
962 				op_str, comp->output_size);
963 			update_completion_timeout_errs();
964 			goto out;
965 		}
966 
967 		if (comp->status == IAA_ANALYTICS_ERROR &&
968 		    comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
969 			ret = -E2BIG;
970 			dev_dbg(dev, "compressed > uncompressed size,"
971 				" not compressing, size=0x%x\n",
972 				comp->output_size);
973 			update_completion_comp_buf_overflow_errs();
974 			goto out;
975 		}
976 
977 		if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
978 			ret = -EOVERFLOW;
979 			goto out;
980 		}
981 
982 		ret = -EINVAL;
983 		dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
984 			op_str, comp->status, comp->error_code, comp->output_size);
985 		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
986 		update_completion_einval_errs();
987 
988 		goto out;
989 	}
990 out:
991 	return ret;
992 }
993 
994 static int deflate_generic_decompress(struct acomp_req *req)
995 {
996 	void *src, *dst;
997 	int ret;
998 
999 	src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1000 	dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1001 
1002 	ret = crypto_comp_decompress(deflate_generic_tfm,
1003 				     src, req->slen, dst, &req->dlen);
1004 
1005 	kunmap_local(src);
1006 	kunmap_local(dst);
1007 
1008 	update_total_sw_decomp_calls();
1009 
1010 	return ret;
1011 }
1012 
1013 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1014 				struct acomp_req *req,
1015 				dma_addr_t *src_addr, dma_addr_t *dst_addr);
1016 
1017 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1018 			       struct idxd_wq *wq,
1019 			       dma_addr_t src_addr, unsigned int slen,
1020 			       dma_addr_t dst_addr, unsigned int *dlen,
1021 			       u32 compression_crc);
1022 
1023 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1024 			      enum idxd_complete_type comp_type,
1025 			      bool free_desc, void *__ctx,
1026 			      u32 *status)
1027 {
1028 	struct iaa_device_compression_mode *active_compression_mode;
1029 	struct iaa_compression_ctx *compression_ctx;
1030 	struct crypto_ctx *ctx = __ctx;
1031 	struct iaa_device *iaa_device;
1032 	struct idxd_device *idxd;
1033 	struct iaa_wq *iaa_wq;
1034 	struct pci_dev *pdev;
1035 	struct device *dev;
1036 	int ret, err = 0;
1037 
1038 	compression_ctx = crypto_tfm_ctx(ctx->tfm);
1039 
1040 	iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1041 	iaa_device = iaa_wq->iaa_device;
1042 	idxd = iaa_device->idxd;
1043 	pdev = idxd->pdev;
1044 	dev = &pdev->dev;
1045 
1046 	active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1047 								  compression_ctx->mode);
1048 	dev_dbg(dev, "%s: compression mode %s,"
1049 		" ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1050 		active_compression_mode->name,
1051 		ctx->src_addr, ctx->dst_addr);
1052 
1053 	ret = check_completion(dev, idxd_desc->iax_completion,
1054 			       ctx->compress, false);
1055 	if (ret) {
1056 		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1057 		if (!ctx->compress &&
1058 		    idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1059 			pr_warn("%s: falling back to deflate-generic decompress, "
1060 				"analytics error code %x\n", __func__,
1061 				idxd_desc->iax_completion->error_code);
1062 			ret = deflate_generic_decompress(ctx->req);
1063 			if (ret) {
1064 				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1065 					__func__, ret);
1066 				err = -EIO;
1067 				goto err;
1068 			}
1069 		} else {
1070 			err = -EIO;
1071 			goto err;
1072 		}
1073 	} else {
1074 		ctx->req->dlen = idxd_desc->iax_completion->output_size;
1075 	}
1076 
1077 	/* Update stats */
1078 	if (ctx->compress) {
1079 		update_total_comp_bytes_out(ctx->req->dlen);
1080 		update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1081 	} else {
1082 		update_total_decomp_bytes_in(ctx->req->dlen);
1083 		update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen);
1084 	}
1085 
1086 	if (ctx->compress && compression_ctx->verify_compress) {
1087 		dma_addr_t src_addr, dst_addr;
1088 		u32 compression_crc;
1089 
1090 		compression_crc = idxd_desc->iax_completion->crc;
1091 
1092 		ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1093 		if (ret) {
1094 			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1095 			err = -EIO;
1096 			goto out;
1097 		}
1098 
1099 		ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1100 					  ctx->req->slen, dst_addr, &ctx->req->dlen,
1101 					  compression_crc);
1102 		if (ret) {
1103 			dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1104 			err = -EIO;
1105 		}
1106 
1107 		dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1108 		dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1109 
1110 		goto out;
1111 	}
1112 err:
1113 	dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1114 	dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1115 out:
1116 	if (ret != 0)
1117 		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1118 
1119 	if (ctx->req->base.complete)
1120 		acomp_request_complete(ctx->req, err);
1121 
1122 	if (free_desc)
1123 		idxd_free_desc(idxd_desc->wq, idxd_desc);
1124 	iaa_wq_put(idxd_desc->wq);
1125 }
1126 
1127 static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
1128 			struct idxd_wq *wq,
1129 			dma_addr_t src_addr, unsigned int slen,
1130 			dma_addr_t dst_addr, unsigned int *dlen,
1131 			u32 *compression_crc,
1132 			bool disable_async)
1133 {
1134 	struct iaa_device_compression_mode *active_compression_mode;
1135 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1136 	struct iaa_device *iaa_device;
1137 	struct idxd_desc *idxd_desc;
1138 	struct iax_hw_desc *desc;
1139 	struct idxd_device *idxd;
1140 	struct iaa_wq *iaa_wq;
1141 	struct pci_dev *pdev;
1142 	struct device *dev;
1143 	int ret = 0;
1144 
1145 	iaa_wq = idxd_wq_get_private(wq);
1146 	iaa_device = iaa_wq->iaa_device;
1147 	idxd = iaa_device->idxd;
1148 	pdev = idxd->pdev;
1149 	dev = &pdev->dev;
1150 
1151 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1152 
1153 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1154 	if (IS_ERR(idxd_desc)) {
1155 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1156 		dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1157 		return PTR_ERR(idxd_desc);
1158 	}
1159 	desc = idxd_desc->iax_hw;
1160 
1161 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1162 		IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1163 	desc->opcode = IAX_OPCODE_COMPRESS;
1164 	desc->compr_flags = IAA_COMP_FLAGS;
1165 	desc->priv = 0;
1166 
1167 	desc->src1_addr = (u64)src_addr;
1168 	desc->src1_size = slen;
1169 	desc->dst_addr = (u64)dst_addr;
1170 	desc->max_dst_size = *dlen;
1171 	desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1172 	desc->src2_size = sizeof(struct aecs_comp_table_record);
1173 	desc->completion_addr = idxd_desc->compl_dma;
1174 
1175 	if (ctx->use_irq && !disable_async) {
1176 		desc->flags |= IDXD_OP_FLAG_RCI;
1177 
1178 		idxd_desc->crypto.req = req;
1179 		idxd_desc->crypto.tfm = tfm;
1180 		idxd_desc->crypto.src_addr = src_addr;
1181 		idxd_desc->crypto.dst_addr = dst_addr;
1182 		idxd_desc->crypto.compress = true;
1183 
1184 		dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1185 			" src_addr %llx, dst_addr %llx\n", __func__,
1186 			active_compression_mode->name,
1187 			src_addr, dst_addr);
1188 	} else if (ctx->async_mode && !disable_async)
1189 		req->base.data = idxd_desc;
1190 
1191 	dev_dbg(dev, "%s: compression mode %s,"
1192 		" desc->src1_addr %llx, desc->src1_size %d,"
1193 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1194 		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1195 		active_compression_mode->name,
1196 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1197 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1198 
1199 	ret = idxd_submit_desc(wq, idxd_desc);
1200 	if (ret) {
1201 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1202 		goto err;
1203 	}
1204 
1205 	/* Update stats */
1206 	update_total_comp_calls();
1207 	update_wq_comp_calls(wq);
1208 
1209 	if (ctx->async_mode && !disable_async) {
1210 		ret = -EINPROGRESS;
1211 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1212 		goto out;
1213 	}
1214 
1215 	ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1216 	if (ret) {
1217 		dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1218 		goto err;
1219 	}
1220 
1221 	*dlen = idxd_desc->iax_completion->output_size;
1222 
1223 	/* Update stats */
1224 	update_total_comp_bytes_out(*dlen);
1225 	update_wq_comp_bytes(wq, *dlen);
1226 
1227 	*compression_crc = idxd_desc->iax_completion->crc;
1228 
1229 	if (!ctx->async_mode || disable_async)
1230 		idxd_free_desc(wq, idxd_desc);
1231 out:
1232 	return ret;
1233 err:
1234 	idxd_free_desc(wq, idxd_desc);
1235 	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1236 
1237 	goto out;
1238 }
1239 
1240 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1241 				struct acomp_req *req,
1242 				dma_addr_t *src_addr, dma_addr_t *dst_addr)
1243 {
1244 	int ret = 0;
1245 	int nr_sgs;
1246 
1247 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1248 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1249 
1250 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1251 	if (nr_sgs <= 0 || nr_sgs > 1) {
1252 		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1253 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1254 			iaa_wq->wq->id, ret);
1255 		ret = -EIO;
1256 		goto out;
1257 	}
1258 	*src_addr = sg_dma_address(req->src);
1259 	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1260 		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1261 		req->src, req->slen, sg_dma_len(req->src));
1262 
1263 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1264 	if (nr_sgs <= 0 || nr_sgs > 1) {
1265 		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1266 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1267 			iaa_wq->wq->id, ret);
1268 		ret = -EIO;
1269 		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1270 		goto out;
1271 	}
1272 	*dst_addr = sg_dma_address(req->dst);
1273 	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1274 		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1275 		req->dst, req->dlen, sg_dma_len(req->dst));
1276 out:
1277 	return ret;
1278 }
1279 
1280 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1281 			       struct idxd_wq *wq,
1282 			       dma_addr_t src_addr, unsigned int slen,
1283 			       dma_addr_t dst_addr, unsigned int *dlen,
1284 			       u32 compression_crc)
1285 {
1286 	struct iaa_device_compression_mode *active_compression_mode;
1287 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1288 	struct iaa_device *iaa_device;
1289 	struct idxd_desc *idxd_desc;
1290 	struct iax_hw_desc *desc;
1291 	struct idxd_device *idxd;
1292 	struct iaa_wq *iaa_wq;
1293 	struct pci_dev *pdev;
1294 	struct device *dev;
1295 	int ret = 0;
1296 
1297 	iaa_wq = idxd_wq_get_private(wq);
1298 	iaa_device = iaa_wq->iaa_device;
1299 	idxd = iaa_device->idxd;
1300 	pdev = idxd->pdev;
1301 	dev = &pdev->dev;
1302 
1303 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1304 
1305 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1306 	if (IS_ERR(idxd_desc)) {
1307 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1308 		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1309 			PTR_ERR(idxd_desc));
1310 		return PTR_ERR(idxd_desc);
1311 	}
1312 	desc = idxd_desc->iax_hw;
1313 
1314 	/* Verify (optional) - decompress and check crc, suppress dest write */
1315 
1316 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1317 	desc->opcode = IAX_OPCODE_DECOMPRESS;
1318 	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1319 	desc->priv = 0;
1320 
1321 	desc->src1_addr = (u64)dst_addr;
1322 	desc->src1_size = *dlen;
1323 	desc->dst_addr = (u64)src_addr;
1324 	desc->max_dst_size = slen;
1325 	desc->completion_addr = idxd_desc->compl_dma;
1326 
1327 	dev_dbg(dev, "(verify) compression mode %s,"
1328 		" desc->src1_addr %llx, desc->src1_size %d,"
1329 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1330 		" desc->src2_addr %llx, desc->src2_size %d\n",
1331 		active_compression_mode->name,
1332 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1333 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1334 
1335 	ret = idxd_submit_desc(wq, idxd_desc);
1336 	if (ret) {
1337 		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1338 		goto err;
1339 	}
1340 
1341 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1342 	if (ret) {
1343 		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1344 		goto err;
1345 	}
1346 
1347 	if (compression_crc != idxd_desc->iax_completion->crc) {
1348 		ret = -EINVAL;
1349 		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1350 			" comp=0x%x, decomp=0x%x\n", compression_crc,
1351 			idxd_desc->iax_completion->crc);
1352 		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1353 			       8, 1, idxd_desc->iax_completion, 64, 0);
1354 		goto err;
1355 	}
1356 
1357 	idxd_free_desc(wq, idxd_desc);
1358 out:
1359 	return ret;
1360 err:
1361 	idxd_free_desc(wq, idxd_desc);
1362 	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1363 
1364 	goto out;
1365 }
1366 
1367 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1368 			  struct idxd_wq *wq,
1369 			  dma_addr_t src_addr, unsigned int slen,
1370 			  dma_addr_t dst_addr, unsigned int *dlen,
1371 			  bool disable_async)
1372 {
1373 	struct iaa_device_compression_mode *active_compression_mode;
1374 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1375 	struct iaa_device *iaa_device;
1376 	struct idxd_desc *idxd_desc;
1377 	struct iax_hw_desc *desc;
1378 	struct idxd_device *idxd;
1379 	struct iaa_wq *iaa_wq;
1380 	struct pci_dev *pdev;
1381 	struct device *dev;
1382 	int ret = 0;
1383 
1384 	iaa_wq = idxd_wq_get_private(wq);
1385 	iaa_device = iaa_wq->iaa_device;
1386 	idxd = iaa_device->idxd;
1387 	pdev = idxd->pdev;
1388 	dev = &pdev->dev;
1389 
1390 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1391 
1392 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1393 	if (IS_ERR(idxd_desc)) {
1394 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1395 		dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1396 			PTR_ERR(idxd_desc));
1397 		return PTR_ERR(idxd_desc);
1398 	}
1399 	desc = idxd_desc->iax_hw;
1400 
1401 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1402 	desc->opcode = IAX_OPCODE_DECOMPRESS;
1403 	desc->max_dst_size = PAGE_SIZE;
1404 	desc->decompr_flags = IAA_DECOMP_FLAGS;
1405 	desc->priv = 0;
1406 
1407 	desc->src1_addr = (u64)src_addr;
1408 	desc->dst_addr = (u64)dst_addr;
1409 	desc->max_dst_size = *dlen;
1410 	desc->src1_size = slen;
1411 	desc->completion_addr = idxd_desc->compl_dma;
1412 
1413 	if (ctx->use_irq && !disable_async) {
1414 		desc->flags |= IDXD_OP_FLAG_RCI;
1415 
1416 		idxd_desc->crypto.req = req;
1417 		idxd_desc->crypto.tfm = tfm;
1418 		idxd_desc->crypto.src_addr = src_addr;
1419 		idxd_desc->crypto.dst_addr = dst_addr;
1420 		idxd_desc->crypto.compress = false;
1421 
1422 		dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1423 			" src_addr %llx, dst_addr %llx\n", __func__,
1424 			active_compression_mode->name,
1425 			src_addr, dst_addr);
1426 	} else if (ctx->async_mode && !disable_async)
1427 		req->base.data = idxd_desc;
1428 
1429 	dev_dbg(dev, "%s: decompression mode %s,"
1430 		" desc->src1_addr %llx, desc->src1_size %d,"
1431 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1432 		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1433 		active_compression_mode->name,
1434 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1435 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1436 
1437 	ret = idxd_submit_desc(wq, idxd_desc);
1438 	if (ret) {
1439 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1440 		goto err;
1441 	}
1442 
1443 	/* Update stats */
1444 	update_total_decomp_calls();
1445 	update_wq_decomp_calls(wq);
1446 
1447 	if (ctx->async_mode && !disable_async) {
1448 		ret = -EINPROGRESS;
1449 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1450 		goto out;
1451 	}
1452 
1453 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1454 	if (ret) {
1455 		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1456 		if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1457 			pr_warn("%s: falling back to deflate-generic decompress, "
1458 				"analytics error code %x\n", __func__,
1459 				idxd_desc->iax_completion->error_code);
1460 			ret = deflate_generic_decompress(req);
1461 			if (ret) {
1462 				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1463 					__func__, ret);
1464 				goto err;
1465 			}
1466 		} else {
1467 			goto err;
1468 		}
1469 	} else {
1470 		req->dlen = idxd_desc->iax_completion->output_size;
1471 	}
1472 
1473 	*dlen = req->dlen;
1474 
1475 	if (!ctx->async_mode || disable_async)
1476 		idxd_free_desc(wq, idxd_desc);
1477 
1478 	/* Update stats */
1479 	update_total_decomp_bytes_in(slen);
1480 	update_wq_decomp_bytes(wq, slen);
1481 out:
1482 	return ret;
1483 err:
1484 	idxd_free_desc(wq, idxd_desc);
1485 	dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1486 
1487 	goto out;
1488 }
1489 
1490 static int iaa_comp_acompress(struct acomp_req *req)
1491 {
1492 	struct iaa_compression_ctx *compression_ctx;
1493 	struct crypto_tfm *tfm = req->base.tfm;
1494 	dma_addr_t src_addr, dst_addr;
1495 	bool disable_async = false;
1496 	int nr_sgs, cpu, ret = 0;
1497 	struct iaa_wq *iaa_wq;
1498 	u32 compression_crc;
1499 	struct idxd_wq *wq;
1500 	struct device *dev;
1501 	u64 start_time_ns;
1502 	int order = -1;
1503 
1504 	compression_ctx = crypto_tfm_ctx(tfm);
1505 
1506 	if (!iaa_crypto_enabled) {
1507 		pr_debug("iaa_crypto disabled, not compressing\n");
1508 		return -ENODEV;
1509 	}
1510 
1511 	if (!req->src || !req->slen) {
1512 		pr_debug("invalid src, not compressing\n");
1513 		return -EINVAL;
1514 	}
1515 
1516 	cpu = get_cpu();
1517 	wq = wq_table_next_wq(cpu);
1518 	put_cpu();
1519 	if (!wq) {
1520 		pr_debug("no wq configured for cpu=%d\n", cpu);
1521 		return -ENODEV;
1522 	}
1523 
1524 	ret = iaa_wq_get(wq);
1525 	if (ret) {
1526 		pr_debug("no wq available for cpu=%d\n", cpu);
1527 		return -ENODEV;
1528 	}
1529 
1530 	iaa_wq = idxd_wq_get_private(wq);
1531 
1532 	if (!req->dst) {
1533 		gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1534 
1535 		/* incompressible data will always be < 2 * slen */
1536 		req->dlen = 2 * req->slen;
1537 		order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1538 		req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1539 		if (!req->dst) {
1540 			ret = -ENOMEM;
1541 			order = -1;
1542 			goto out;
1543 		}
1544 		disable_async = true;
1545 	}
1546 
1547 	dev = &wq->idxd->pdev->dev;
1548 
1549 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1550 	if (nr_sgs <= 0 || nr_sgs > 1) {
1551 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1552 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1553 			iaa_wq->wq->id, ret);
1554 		ret = -EIO;
1555 		goto out;
1556 	}
1557 	src_addr = sg_dma_address(req->src);
1558 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1559 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1560 		req->src, req->slen, sg_dma_len(req->src));
1561 
1562 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1563 	if (nr_sgs <= 0 || nr_sgs > 1) {
1564 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1565 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1566 			iaa_wq->wq->id, ret);
1567 		ret = -EIO;
1568 		goto err_map_dst;
1569 	}
1570 	dst_addr = sg_dma_address(req->dst);
1571 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1572 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1573 		req->dst, req->dlen, sg_dma_len(req->dst));
1574 
1575 	start_time_ns = iaa_get_ts();
1576 	ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1577 			   &req->dlen, &compression_crc, disable_async);
1578 	update_max_comp_delay_ns(start_time_ns);
1579 	if (ret == -EINPROGRESS)
1580 		return ret;
1581 
1582 	if (!ret && compression_ctx->verify_compress) {
1583 		ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1584 		if (ret) {
1585 			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1586 			goto out;
1587 		}
1588 
1589 		ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1590 					  dst_addr, &req->dlen, compression_crc);
1591 		if (ret)
1592 			dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1593 
1594 		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1595 		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1596 
1597 		goto out;
1598 	}
1599 
1600 	if (ret)
1601 		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1602 
1603 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1604 err_map_dst:
1605 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1606 out:
1607 	iaa_wq_put(wq);
1608 
1609 	if (order >= 0)
1610 		sgl_free_order(req->dst, order);
1611 
1612 	return ret;
1613 }
1614 
1615 static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1616 {
1617 	gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1618 		GFP_KERNEL : GFP_ATOMIC;
1619 	struct crypto_tfm *tfm = req->base.tfm;
1620 	dma_addr_t src_addr, dst_addr;
1621 	int nr_sgs, cpu, ret = 0;
1622 	struct iaa_wq *iaa_wq;
1623 	struct device *dev;
1624 	struct idxd_wq *wq;
1625 	u64 start_time_ns;
1626 	int order = -1;
1627 
1628 	cpu = get_cpu();
1629 	wq = wq_table_next_wq(cpu);
1630 	put_cpu();
1631 	if (!wq) {
1632 		pr_debug("no wq configured for cpu=%d\n", cpu);
1633 		return -ENODEV;
1634 	}
1635 
1636 	ret = iaa_wq_get(wq);
1637 	if (ret) {
1638 		pr_debug("no wq available for cpu=%d\n", cpu);
1639 		return -ENODEV;
1640 	}
1641 
1642 	iaa_wq = idxd_wq_get_private(wq);
1643 
1644 	dev = &wq->idxd->pdev->dev;
1645 
1646 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1647 	if (nr_sgs <= 0 || nr_sgs > 1) {
1648 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1649 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1650 			iaa_wq->wq->id, ret);
1651 		ret = -EIO;
1652 		goto out;
1653 	}
1654 	src_addr = sg_dma_address(req->src);
1655 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1656 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1657 		req->src, req->slen, sg_dma_len(req->src));
1658 
1659 	req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1660 alloc_dest:
1661 	order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1662 	req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1663 	if (!req->dst) {
1664 		ret = -ENOMEM;
1665 		order = -1;
1666 		goto out;
1667 	}
1668 
1669 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1670 	if (nr_sgs <= 0 || nr_sgs > 1) {
1671 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1672 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1673 			iaa_wq->wq->id, ret);
1674 		ret = -EIO;
1675 		goto err_map_dst;
1676 	}
1677 
1678 	dst_addr = sg_dma_address(req->dst);
1679 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1680 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1681 		req->dst, req->dlen, sg_dma_len(req->dst));
1682 	start_time_ns = iaa_get_ts();
1683 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1684 			     dst_addr, &req->dlen, true);
1685 	update_max_decomp_delay_ns(start_time_ns);
1686 	if (ret == -EOVERFLOW) {
1687 		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1688 		req->dlen *= 2;
1689 		if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1690 			goto err_map_dst;
1691 		goto alloc_dest;
1692 	}
1693 
1694 	if (ret != 0)
1695 		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1696 
1697 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1698 err_map_dst:
1699 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1700 out:
1701 	iaa_wq_put(wq);
1702 
1703 	if (order >= 0)
1704 		sgl_free_order(req->dst, order);
1705 
1706 	return ret;
1707 }
1708 
1709 static int iaa_comp_adecompress(struct acomp_req *req)
1710 {
1711 	struct crypto_tfm *tfm = req->base.tfm;
1712 	dma_addr_t src_addr, dst_addr;
1713 	int nr_sgs, cpu, ret = 0;
1714 	struct iaa_wq *iaa_wq;
1715 	struct device *dev;
1716 	u64 start_time_ns;
1717 	struct idxd_wq *wq;
1718 
1719 	if (!iaa_crypto_enabled) {
1720 		pr_debug("iaa_crypto disabled, not decompressing\n");
1721 		return -ENODEV;
1722 	}
1723 
1724 	if (!req->src || !req->slen) {
1725 		pr_debug("invalid src, not decompressing\n");
1726 		return -EINVAL;
1727 	}
1728 
1729 	if (!req->dst)
1730 		return iaa_comp_adecompress_alloc_dest(req);
1731 
1732 	cpu = get_cpu();
1733 	wq = wq_table_next_wq(cpu);
1734 	put_cpu();
1735 	if (!wq) {
1736 		pr_debug("no wq configured for cpu=%d\n", cpu);
1737 		return -ENODEV;
1738 	}
1739 
1740 	ret = iaa_wq_get(wq);
1741 	if (ret) {
1742 		pr_debug("no wq available for cpu=%d\n", cpu);
1743 		return -ENODEV;
1744 	}
1745 
1746 	iaa_wq = idxd_wq_get_private(wq);
1747 
1748 	dev = &wq->idxd->pdev->dev;
1749 
1750 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1751 	if (nr_sgs <= 0 || nr_sgs > 1) {
1752 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1753 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1754 			iaa_wq->wq->id, ret);
1755 		ret = -EIO;
1756 		goto out;
1757 	}
1758 	src_addr = sg_dma_address(req->src);
1759 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1760 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1761 		req->src, req->slen, sg_dma_len(req->src));
1762 
1763 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1764 	if (nr_sgs <= 0 || nr_sgs > 1) {
1765 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1766 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1767 			iaa_wq->wq->id, ret);
1768 		ret = -EIO;
1769 		goto err_map_dst;
1770 	}
1771 	dst_addr = sg_dma_address(req->dst);
1772 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1773 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1774 		req->dst, req->dlen, sg_dma_len(req->dst));
1775 
1776 	start_time_ns = iaa_get_ts();
1777 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1778 			     dst_addr, &req->dlen, false);
1779 	update_max_decomp_delay_ns(start_time_ns);
1780 	if (ret == -EINPROGRESS)
1781 		return ret;
1782 
1783 	if (ret != 0)
1784 		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1785 
1786 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1787 err_map_dst:
1788 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1789 out:
1790 	iaa_wq_put(wq);
1791 
1792 	return ret;
1793 }
1794 
1795 static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1796 {
1797 	ctx->verify_compress = iaa_verify_compress;
1798 	ctx->async_mode = async_mode;
1799 	ctx->use_irq = use_irq;
1800 }
1801 
1802 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1803 {
1804 	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1805 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1806 
1807 	compression_ctx_init(ctx);
1808 
1809 	ctx->mode = IAA_MODE_FIXED;
1810 
1811 	return 0;
1812 }
1813 
1814 static void dst_free(struct scatterlist *sgl)
1815 {
1816 	/*
1817 	 * Called for req->dst = NULL cases but we free elsewhere
1818 	 * using sgl_free_order().
1819 	 */
1820 }
1821 
1822 static struct acomp_alg iaa_acomp_fixed_deflate = {
1823 	.init			= iaa_comp_init_fixed,
1824 	.compress		= iaa_comp_acompress,
1825 	.decompress		= iaa_comp_adecompress,
1826 	.dst_free               = dst_free,
1827 	.base			= {
1828 		.cra_name		= "deflate",
1829 		.cra_driver_name	= "deflate-iaa",
1830 		.cra_flags		= CRYPTO_ALG_ASYNC,
1831 		.cra_ctxsize		= sizeof(struct iaa_compression_ctx),
1832 		.cra_module		= THIS_MODULE,
1833 		.cra_priority		= IAA_ALG_PRIORITY,
1834 	}
1835 };
1836 
1837 static int iaa_register_compression_device(void)
1838 {
1839 	int ret;
1840 
1841 	ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1842 	if (ret) {
1843 		pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1844 		goto out;
1845 	}
1846 
1847 	iaa_crypto_registered = true;
1848 out:
1849 	return ret;
1850 }
1851 
1852 static int iaa_unregister_compression_device(void)
1853 {
1854 	if (iaa_crypto_registered)
1855 		crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1856 
1857 	return 0;
1858 }
1859 
1860 static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1861 {
1862 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1863 	struct idxd_device *idxd = wq->idxd;
1864 	struct idxd_driver_data *data = idxd->data;
1865 	struct device *dev = &idxd_dev->conf_dev;
1866 	bool first_wq = false;
1867 	int ret = 0;
1868 
1869 	if (idxd->state != IDXD_DEV_ENABLED)
1870 		return -ENXIO;
1871 
1872 	if (data->type != IDXD_TYPE_IAX)
1873 		return -ENODEV;
1874 
1875 	mutex_lock(&wq->wq_lock);
1876 
1877 	if (idxd_wq_get_private(wq)) {
1878 		mutex_unlock(&wq->wq_lock);
1879 		return -EBUSY;
1880 	}
1881 
1882 	if (!idxd_wq_driver_name_match(wq, dev)) {
1883 		dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1884 			idxd->id, wq->id, wq->driver_name, dev->driver->name);
1885 		idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1886 		ret = -ENODEV;
1887 		goto err;
1888 	}
1889 
1890 	wq->type = IDXD_WQT_KERNEL;
1891 
1892 	ret = idxd_drv_enable_wq(wq);
1893 	if (ret < 0) {
1894 		dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1895 			idxd->id, wq->id, ret);
1896 		ret = -ENXIO;
1897 		goto err;
1898 	}
1899 
1900 	mutex_lock(&iaa_devices_lock);
1901 
1902 	if (list_empty(&iaa_devices)) {
1903 		ret = alloc_wq_table(wq->idxd->max_wqs);
1904 		if (ret)
1905 			goto err_alloc;
1906 		first_wq = true;
1907 	}
1908 
1909 	ret = save_iaa_wq(wq);
1910 	if (ret)
1911 		goto err_save;
1912 
1913 	rebalance_wq_table();
1914 
1915 	if (first_wq) {
1916 		iaa_crypto_enabled = true;
1917 		ret = iaa_register_compression_device();
1918 		if (ret != 0) {
1919 			iaa_crypto_enabled = false;
1920 			dev_dbg(dev, "IAA compression device registration failed\n");
1921 			goto err_register;
1922 		}
1923 		try_module_get(THIS_MODULE);
1924 
1925 		pr_info("iaa_crypto now ENABLED\n");
1926 	}
1927 
1928 	mutex_unlock(&iaa_devices_lock);
1929 out:
1930 	mutex_unlock(&wq->wq_lock);
1931 
1932 	return ret;
1933 
1934 err_register:
1935 	remove_iaa_wq(wq);
1936 	free_iaa_wq(idxd_wq_get_private(wq));
1937 err_save:
1938 	if (first_wq)
1939 		free_wq_table();
1940 err_alloc:
1941 	mutex_unlock(&iaa_devices_lock);
1942 	idxd_drv_disable_wq(wq);
1943 err:
1944 	wq->type = IDXD_WQT_NONE;
1945 
1946 	goto out;
1947 }
1948 
1949 static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1950 {
1951 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1952 	struct idxd_device *idxd = wq->idxd;
1953 	struct iaa_wq *iaa_wq;
1954 	bool free = false;
1955 
1956 	idxd_wq_quiesce(wq);
1957 
1958 	mutex_lock(&wq->wq_lock);
1959 	mutex_lock(&iaa_devices_lock);
1960 
1961 	remove_iaa_wq(wq);
1962 
1963 	spin_lock(&idxd->dev_lock);
1964 	iaa_wq = idxd_wq_get_private(wq);
1965 	if (!iaa_wq) {
1966 		spin_unlock(&idxd->dev_lock);
1967 		pr_err("%s: no iaa_wq available to remove\n", __func__);
1968 		goto out;
1969 	}
1970 
1971 	if (iaa_wq->ref) {
1972 		iaa_wq->remove = true;
1973 	} else {
1974 		wq = iaa_wq->wq;
1975 		idxd_wq_set_private(wq, NULL);
1976 		free = true;
1977 	}
1978 	spin_unlock(&idxd->dev_lock);
1979 	if (free) {
1980 		__free_iaa_wq(iaa_wq);
1981 		kfree(iaa_wq);
1982 	}
1983 
1984 	idxd_drv_disable_wq(wq);
1985 	rebalance_wq_table();
1986 
1987 	if (nr_iaa == 0) {
1988 		iaa_crypto_enabled = false;
1989 		free_wq_table();
1990 		module_put(THIS_MODULE);
1991 
1992 		pr_info("iaa_crypto now DISABLED\n");
1993 	}
1994 out:
1995 	mutex_unlock(&iaa_devices_lock);
1996 	mutex_unlock(&wq->wq_lock);
1997 }
1998 
1999 static enum idxd_dev_type dev_types[] = {
2000 	IDXD_DEV_WQ,
2001 	IDXD_DEV_NONE,
2002 };
2003 
2004 static struct idxd_device_driver iaa_crypto_driver = {
2005 	.probe = iaa_crypto_probe,
2006 	.remove = iaa_crypto_remove,
2007 	.name = IDXD_SUBDRIVER_NAME,
2008 	.type = dev_types,
2009 	.desc_complete = iaa_desc_complete,
2010 };
2011 
2012 static int __init iaa_crypto_init_module(void)
2013 {
2014 	int ret = 0;
2015 	int node;
2016 
2017 	nr_cpus = num_online_cpus();
2018 	for_each_node_with_cpus(node)
2019 		nr_nodes++;
2020 	if (!nr_nodes) {
2021 		pr_err("IAA couldn't find any nodes with cpus\n");
2022 		return -ENODEV;
2023 	}
2024 	nr_cpus_per_node = nr_cpus / nr_nodes;
2025 
2026 	if (crypto_has_comp("deflate-generic", 0, 0))
2027 		deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2028 
2029 	if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2030 		pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2031 		       "deflate-generic", PTR_ERR(deflate_generic_tfm));
2032 		return -ENOMEM;
2033 	}
2034 
2035 	ret = iaa_aecs_init_fixed();
2036 	if (ret < 0) {
2037 		pr_debug("IAA fixed compression mode init failed\n");
2038 		goto err_aecs_init;
2039 	}
2040 
2041 	ret = idxd_driver_register(&iaa_crypto_driver);
2042 	if (ret) {
2043 		pr_debug("IAA wq sub-driver registration failed\n");
2044 		goto err_driver_reg;
2045 	}
2046 
2047 	ret = driver_create_file(&iaa_crypto_driver.drv,
2048 				 &driver_attr_verify_compress);
2049 	if (ret) {
2050 		pr_debug("IAA verify_compress attr creation failed\n");
2051 		goto err_verify_attr_create;
2052 	}
2053 
2054 	ret = driver_create_file(&iaa_crypto_driver.drv,
2055 				 &driver_attr_sync_mode);
2056 	if (ret) {
2057 		pr_debug("IAA sync mode attr creation failed\n");
2058 		goto err_sync_attr_create;
2059 	}
2060 
2061 	if (iaa_crypto_debugfs_init())
2062 		pr_warn("debugfs init failed, stats not available\n");
2063 
2064 	pr_debug("initialized\n");
2065 out:
2066 	return ret;
2067 
2068 err_sync_attr_create:
2069 	driver_remove_file(&iaa_crypto_driver.drv,
2070 			   &driver_attr_verify_compress);
2071 err_verify_attr_create:
2072 	idxd_driver_unregister(&iaa_crypto_driver);
2073 err_driver_reg:
2074 	iaa_aecs_cleanup_fixed();
2075 err_aecs_init:
2076 	crypto_free_comp(deflate_generic_tfm);
2077 
2078 	goto out;
2079 }
2080 
2081 static void __exit iaa_crypto_cleanup_module(void)
2082 {
2083 	if (iaa_unregister_compression_device())
2084 		pr_debug("IAA compression device unregister failed\n");
2085 
2086 	iaa_crypto_debugfs_cleanup();
2087 	driver_remove_file(&iaa_crypto_driver.drv,
2088 			   &driver_attr_sync_mode);
2089 	driver_remove_file(&iaa_crypto_driver.drv,
2090 			   &driver_attr_verify_compress);
2091 	idxd_driver_unregister(&iaa_crypto_driver);
2092 	iaa_aecs_cleanup_fixed();
2093 	crypto_free_comp(deflate_generic_tfm);
2094 
2095 	pr_debug("cleaned up\n");
2096 }
2097 
2098 MODULE_IMPORT_NS(IDXD);
2099 MODULE_LICENSE("GPL");
2100 MODULE_ALIAS_IDXD_DEVICE(0);
2101 MODULE_AUTHOR("Intel Corporation");
2102 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2103 
2104 module_init(iaa_crypto_init_module);
2105 module_exit(iaa_crypto_cleanup_module);
2106