xref: /linux/drivers/crypto/intel/iaa/iaa_crypto_main.c (revision 508ecc78b6c983a7921bee2f4bd22682f9f0396e)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3 
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
14 
15 #include "idxd.h"
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
18 
19 #ifdef pr_fmt
20 #undef pr_fmt
21 #endif
22 
23 #define pr_fmt(fmt)			"idxd: " IDXD_SUBDRIVER_NAME ": " fmt
24 
25 #define IAA_ALG_PRIORITY               300
26 
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa;
29 static unsigned int nr_cpus;
30 static unsigned int nr_nodes;
31 static unsigned int nr_cpus_per_node;
32 
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa;
35 
36 static struct crypto_comp *deflate_generic_tfm;
37 
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu *wq_table;
40 
41 static struct idxd_wq *wq_table_next_wq(int cpu)
42 {
43 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
44 
45 	if (++entry->cur_wq >= entry->n_wqs)
46 		entry->cur_wq = 0;
47 
48 	if (!entry->wqs[entry->cur_wq])
49 		return NULL;
50 
51 	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 		 entry->wqs[entry->cur_wq]->id, cpu);
54 
55 	return entry->wqs[entry->cur_wq];
56 }
57 
58 static void wq_table_add(int cpu, struct idxd_wq *wq)
59 {
60 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
61 
62 	if (WARN_ON(entry->n_wqs == entry->max_wqs))
63 		return;
64 
65 	entry->wqs[entry->n_wqs++] = wq;
66 
67 	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 		 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
70 }
71 
72 static void wq_table_free_entry(int cpu)
73 {
74 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
75 
76 	kfree(entry->wqs);
77 	memset(entry, 0, sizeof(*entry));
78 }
79 
80 static void wq_table_clear_entry(int cpu)
81 {
82 	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
83 
84 	entry->n_wqs = 0;
85 	entry->cur_wq = 0;
86 	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
87 }
88 
89 LIST_HEAD(iaa_devices);
90 DEFINE_MUTEX(iaa_devices_lock);
91 
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled;
94 static bool iaa_crypto_registered;
95 
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress = true;
98 
99 static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
100 {
101 	return sprintf(buf, "%d\n", iaa_verify_compress);
102 }
103 
104 static ssize_t verify_compress_store(struct device_driver *driver,
105 				     const char *buf, size_t count)
106 {
107 	int ret = -EBUSY;
108 
109 	mutex_lock(&iaa_devices_lock);
110 
111 	if (iaa_crypto_enabled)
112 		goto out;
113 
114 	ret = kstrtobool(buf, &iaa_verify_compress);
115 	if (ret)
116 		goto out;
117 
118 	ret = count;
119 out:
120 	mutex_unlock(&iaa_devices_lock);
121 
122 	return ret;
123 }
124 static DRIVER_ATTR_RW(verify_compress);
125 
126 /*
127  * The iaa crypto driver supports three 'sync' methods determining how
128  * compressions and decompressions are performed:
129  *
130  * - sync:      the compression or decompression completes before
131  *              returning.  This is the mode used by the async crypto
132  *              interface when the sync mode is set to 'sync' and by
133  *              the sync crypto interface regardless of setting.
134  *
135  * - async:     the compression or decompression is submitted and returns
136  *              immediately.  Completion interrupts are not used so
137  *              the caller is responsible for polling the descriptor
138  *              for completion.  This mode is applicable to only the
139  *              async crypto interface and is ignored for anything
140  *              else.
141  *
142  * - async_irq: the compression or decompression is submitted and
143  *              returns immediately.  Completion interrupts are
144  *              enabled so the caller can wait for the completion and
145  *              yield to other threads.  When the compression or
146  *              decompression completes, the completion is signaled
147  *              and the caller awakened.  This mode is applicable to
148  *              only the async crypto interface and is ignored for
149  *              anything else.
150  *
151  * These modes can be set using the iaa_crypto sync_mode driver
152  * attribute.
153  */
154 
155 /* Use async mode */
156 static bool async_mode;
157 /* Use interrupts */
158 static bool use_irq;
159 
160 /**
161  * set_iaa_sync_mode - Set IAA sync mode
162  * @name: The name of the sync mode
163  *
164  * Make the IAA sync mode named @name the current sync mode used by
165  * compression/decompression.
166  */
167 
168 static int set_iaa_sync_mode(const char *name)
169 {
170 	int ret = 0;
171 
172 	if (sysfs_streq(name, "sync")) {
173 		async_mode = false;
174 		use_irq = false;
175 	} else if (sysfs_streq(name, "async")) {
176 		async_mode = true;
177 		use_irq = false;
178 	} else if (sysfs_streq(name, "async_irq")) {
179 		async_mode = true;
180 		use_irq = true;
181 	} else {
182 		ret = -EINVAL;
183 	}
184 
185 	return ret;
186 }
187 
188 static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
189 {
190 	int ret = 0;
191 
192 	if (!async_mode && !use_irq)
193 		ret = sprintf(buf, "%s\n", "sync");
194 	else if (async_mode && !use_irq)
195 		ret = sprintf(buf, "%s\n", "async");
196 	else if (async_mode && use_irq)
197 		ret = sprintf(buf, "%s\n", "async_irq");
198 
199 	return ret;
200 }
201 
202 static ssize_t sync_mode_store(struct device_driver *driver,
203 			       const char *buf, size_t count)
204 {
205 	int ret = -EBUSY;
206 
207 	mutex_lock(&iaa_devices_lock);
208 
209 	if (iaa_crypto_enabled)
210 		goto out;
211 
212 	ret = set_iaa_sync_mode(buf);
213 	if (ret == 0)
214 		ret = count;
215 out:
216 	mutex_unlock(&iaa_devices_lock);
217 
218 	return ret;
219 }
220 static DRIVER_ATTR_RW(sync_mode);
221 
222 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
223 
224 static int find_empty_iaa_compression_mode(void)
225 {
226 	int i = -EINVAL;
227 
228 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 		if (iaa_compression_modes[i])
230 			continue;
231 		break;
232 	}
233 
234 	return i;
235 }
236 
237 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
238 {
239 	struct iaa_compression_mode *mode;
240 	int i;
241 
242 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 		mode = iaa_compression_modes[i];
244 		if (!mode)
245 			continue;
246 
247 		if (!strcmp(mode->name, name)) {
248 			*idx = i;
249 			return iaa_compression_modes[i];
250 		}
251 	}
252 
253 	return NULL;
254 }
255 
256 static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
257 {
258 	kfree(mode->name);
259 	kfree(mode->ll_table);
260 	kfree(mode->d_table);
261 	kfree(mode->header_table);
262 
263 	kfree(mode);
264 }
265 
266 /*
267  * IAA Compression modes are defined by an ll_table, a d_table, and an
268  * optional header_table.  These tables are typically generated and
269  * captured using statistics collected from running actual
270  * compress/decompress workloads.
271  *
272  * A module or other kernel code can add and remove compression modes
273  * with a given name using the exported @add_iaa_compression_mode()
274  * and @remove_iaa_compression_mode functions.
275  *
276  * When a new compression mode is added, the tables are saved in a
277  * global compression mode list.  When IAA devices are added, a
278  * per-IAA device dma mapping is created for each IAA device, for each
279  * compression mode.  These are the tables used to do the actual
280  * compression/deccompression and are unmapped if/when the devices are
281  * removed.  Currently, compression modes must be added before any
282  * device is added, and removed after all devices have been removed.
283  */
284 
285 /**
286  * remove_iaa_compression_mode - Remove an IAA compression mode
287  * @name: The name the compression mode will be known as
288  *
289  * Remove the IAA compression mode named @name.
290  */
291 void remove_iaa_compression_mode(const char *name)
292 {
293 	struct iaa_compression_mode *mode;
294 	int idx;
295 
296 	mutex_lock(&iaa_devices_lock);
297 
298 	if (!list_empty(&iaa_devices))
299 		goto out;
300 
301 	mode = find_iaa_compression_mode(name, &idx);
302 	if (mode) {
303 		free_iaa_compression_mode(mode);
304 		iaa_compression_modes[idx] = NULL;
305 	}
306 out:
307 	mutex_unlock(&iaa_devices_lock);
308 }
309 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
310 
311 /**
312  * add_iaa_compression_mode - Add an IAA compression mode
313  * @name: The name the compression mode will be known as
314  * @ll_table: The ll table
315  * @ll_table_size: The ll table size in bytes
316  * @d_table: The d table
317  * @d_table_size: The d table size in bytes
318  * @header_table: Optional header table
319  * @header_table_size: Optional header table size in bytes
320  * @gen_decomp_table_flags: Otional flags used to generate the decomp table
321  * @init: Optional callback function to init the compression mode data
322  * @free: Optional callback function to free the compression mode data
323  *
324  * Add a new IAA compression mode named @name.
325  *
326  * Returns 0 if successful, errcode otherwise.
327  */
328 int add_iaa_compression_mode(const char *name,
329 			     const u32 *ll_table,
330 			     int ll_table_size,
331 			     const u32 *d_table,
332 			     int d_table_size,
333 			     const u8 *header_table,
334 			     int header_table_size,
335 			     u16 gen_decomp_table_flags,
336 			     iaa_dev_comp_init_fn_t init,
337 			     iaa_dev_comp_free_fn_t free)
338 {
339 	struct iaa_compression_mode *mode;
340 	int idx, ret = -ENOMEM;
341 
342 	mutex_lock(&iaa_devices_lock);
343 
344 	if (!list_empty(&iaa_devices)) {
345 		ret = -EBUSY;
346 		goto out;
347 	}
348 
349 	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
350 	if (!mode)
351 		goto out;
352 
353 	mode->name = kstrdup(name, GFP_KERNEL);
354 	if (!mode->name)
355 		goto free;
356 
357 	if (ll_table) {
358 		mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL);
359 		if (!mode->ll_table)
360 			goto free;
361 		memcpy(mode->ll_table, ll_table, ll_table_size);
362 		mode->ll_table_size = ll_table_size;
363 	}
364 
365 	if (d_table) {
366 		mode->d_table = kzalloc(d_table_size, GFP_KERNEL);
367 		if (!mode->d_table)
368 			goto free;
369 		memcpy(mode->d_table, d_table, d_table_size);
370 		mode->d_table_size = d_table_size;
371 	}
372 
373 	if (header_table) {
374 		mode->header_table = kzalloc(header_table_size, GFP_KERNEL);
375 		if (!mode->header_table)
376 			goto free;
377 		memcpy(mode->header_table, header_table, header_table_size);
378 		mode->header_table_size = header_table_size;
379 	}
380 
381 	mode->gen_decomp_table_flags = gen_decomp_table_flags;
382 
383 	mode->init = init;
384 	mode->free = free;
385 
386 	idx = find_empty_iaa_compression_mode();
387 	if (idx < 0)
388 		goto free;
389 
390 	pr_debug("IAA compression mode %s added at idx %d\n",
391 		 mode->name, idx);
392 
393 	iaa_compression_modes[idx] = mode;
394 
395 	ret = 0;
396 out:
397 	mutex_unlock(&iaa_devices_lock);
398 
399 	return ret;
400 free:
401 	free_iaa_compression_mode(mode);
402 	goto out;
403 }
404 EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
405 
406 static struct iaa_device_compression_mode *
407 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
408 {
409 	return iaa_device->compression_modes[idx];
410 }
411 
412 static void free_device_compression_mode(struct iaa_device *iaa_device,
413 					 struct iaa_device_compression_mode *device_mode)
414 {
415 	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
416 	struct device *dev = &iaa_device->idxd->pdev->dev;
417 
418 	kfree(device_mode->name);
419 
420 	if (device_mode->aecs_comp_table)
421 		dma_free_coherent(dev, size, device_mode->aecs_comp_table,
422 				  device_mode->aecs_comp_table_dma_addr);
423 	if (device_mode->aecs_decomp_table)
424 		dma_free_coherent(dev, size, device_mode->aecs_decomp_table,
425 				  device_mode->aecs_decomp_table_dma_addr);
426 
427 	kfree(device_mode);
428 }
429 
430 #define IDXD_OP_FLAG_AECS_RW_TGLS       0x400000
431 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
432 #define IAX_AECS_COMPRESS_FLAG	(IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
433 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
434 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
435 						IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
436 						IDXD_OP_FLAG_AECS_RW_TGLS)
437 
438 static int check_completion(struct device *dev,
439 			    struct iax_completion_record *comp,
440 			    bool compress,
441 			    bool only_once);
442 
443 static int decompress_header(struct iaa_device_compression_mode *device_mode,
444 			     struct iaa_compression_mode *mode,
445 			     struct idxd_wq *wq)
446 {
447 	dma_addr_t src_addr, src2_addr;
448 	struct idxd_desc *idxd_desc;
449 	struct iax_hw_desc *desc;
450 	struct device *dev;
451 	int ret = 0;
452 
453 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
454 	if (IS_ERR(idxd_desc))
455 		return PTR_ERR(idxd_desc);
456 
457 	desc = idxd_desc->iax_hw;
458 
459 	dev = &wq->idxd->pdev->dev;
460 
461 	src_addr = dma_map_single(dev, (void *)mode->header_table,
462 				  mode->header_table_size, DMA_TO_DEVICE);
463 	dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
464 		__func__, mode->name, src_addr,	dev,
465 		mode->header_table, mode->header_table_size);
466 	if (unlikely(dma_mapping_error(dev, src_addr))) {
467 		dev_dbg(dev, "dma_map_single err, exiting\n");
468 		ret = -ENOMEM;
469 		return ret;
470 	}
471 
472 	desc->flags = IAX_AECS_GEN_FLAG;
473 	desc->opcode = IAX_OPCODE_DECOMPRESS;
474 
475 	desc->src1_addr = (u64)src_addr;
476 	desc->src1_size = mode->header_table_size;
477 
478 	src2_addr = device_mode->aecs_decomp_table_dma_addr;
479 	desc->src2_addr = (u64)src2_addr;
480 	desc->src2_size = 1088;
481 	dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
482 		__func__, mode->name, desc->src2_addr, dev, desc->src2_size);
483 	desc->max_dst_size = 0; // suppressed output
484 
485 	desc->decompr_flags = mode->gen_decomp_table_flags;
486 
487 	desc->priv = 0;
488 
489 	desc->completion_addr = idxd_desc->compl_dma;
490 
491 	ret = idxd_submit_desc(wq, idxd_desc);
492 	if (ret) {
493 		pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
494 		goto out;
495 	}
496 
497 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
498 	if (ret)
499 		dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
500 			__func__, mode->name, ret);
501 	else
502 		dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
503 			mode->name);
504 out:
505 	dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
506 
507 	return ret;
508 }
509 
510 static int init_device_compression_mode(struct iaa_device *iaa_device,
511 					struct iaa_compression_mode *mode,
512 					int idx, struct idxd_wq *wq)
513 {
514 	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
515 	struct device *dev = &iaa_device->idxd->pdev->dev;
516 	struct iaa_device_compression_mode *device_mode;
517 	int ret = -ENOMEM;
518 
519 	device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
520 	if (!device_mode)
521 		return -ENOMEM;
522 
523 	device_mode->name = kstrdup(mode->name, GFP_KERNEL);
524 	if (!device_mode->name)
525 		goto free;
526 
527 	device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
528 							  &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
529 	if (!device_mode->aecs_comp_table)
530 		goto free;
531 
532 	device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size,
533 							    &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL);
534 	if (!device_mode->aecs_decomp_table)
535 		goto free;
536 
537 	/* Add Huffman table to aecs */
538 	memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
539 	memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
540 	memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
541 
542 	if (mode->header_table) {
543 		ret = decompress_header(device_mode, mode, wq);
544 		if (ret) {
545 			pr_debug("iaa header decompression failed: ret=%d\n", ret);
546 			goto free;
547 		}
548 	}
549 
550 	if (mode->init) {
551 		ret = mode->init(device_mode);
552 		if (ret)
553 			goto free;
554 	}
555 
556 	/* mode index should match iaa_compression_modes idx */
557 	iaa_device->compression_modes[idx] = device_mode;
558 
559 	pr_debug("IAA %s compression mode initialized for iaa device %d\n",
560 		 mode->name, iaa_device->idxd->id);
561 
562 	ret = 0;
563 out:
564 	return ret;
565 free:
566 	pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
567 		 mode->name, iaa_device->idxd->id);
568 
569 	free_device_compression_mode(iaa_device, device_mode);
570 	goto out;
571 }
572 
573 static int init_device_compression_modes(struct iaa_device *iaa_device,
574 					 struct idxd_wq *wq)
575 {
576 	struct iaa_compression_mode *mode;
577 	int i, ret = 0;
578 
579 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
580 		mode = iaa_compression_modes[i];
581 		if (!mode)
582 			continue;
583 
584 		ret = init_device_compression_mode(iaa_device, mode, i, wq);
585 		if (ret)
586 			break;
587 	}
588 
589 	return ret;
590 }
591 
592 static void remove_device_compression_modes(struct iaa_device *iaa_device)
593 {
594 	struct iaa_device_compression_mode *device_mode;
595 	int i;
596 
597 	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
598 		device_mode = iaa_device->compression_modes[i];
599 		if (!device_mode)
600 			continue;
601 
602 		free_device_compression_mode(iaa_device, device_mode);
603 		iaa_device->compression_modes[i] = NULL;
604 		if (iaa_compression_modes[i]->free)
605 			iaa_compression_modes[i]->free(device_mode);
606 	}
607 }
608 
609 static struct iaa_device *iaa_device_alloc(void)
610 {
611 	struct iaa_device *iaa_device;
612 
613 	iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
614 	if (!iaa_device)
615 		return NULL;
616 
617 	INIT_LIST_HEAD(&iaa_device->wqs);
618 
619 	return iaa_device;
620 }
621 
622 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
623 {
624 	struct iaa_wq *iaa_wq;
625 
626 	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
627 		if (iaa_wq->wq == wq)
628 			return true;
629 	}
630 
631 	return false;
632 }
633 
634 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
635 {
636 	struct iaa_device *iaa_device;
637 
638 	iaa_device = iaa_device_alloc();
639 	if (!iaa_device)
640 		return NULL;
641 
642 	iaa_device->idxd = idxd;
643 
644 	list_add_tail(&iaa_device->list, &iaa_devices);
645 
646 	nr_iaa++;
647 
648 	return iaa_device;
649 }
650 
651 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
652 {
653 	int ret = 0;
654 
655 	ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
656 	if (ret)
657 		return ret;
658 
659 	return ret;
660 }
661 
662 static void del_iaa_device(struct iaa_device *iaa_device)
663 {
664 	list_del(&iaa_device->list);
665 
666 	nr_iaa--;
667 }
668 
669 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
670 		      struct iaa_wq **new_wq)
671 {
672 	struct idxd_device *idxd = iaa_device->idxd;
673 	struct pci_dev *pdev = idxd->pdev;
674 	struct device *dev = &pdev->dev;
675 	struct iaa_wq *iaa_wq;
676 
677 	iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
678 	if (!iaa_wq)
679 		return -ENOMEM;
680 
681 	iaa_wq->wq = wq;
682 	iaa_wq->iaa_device = iaa_device;
683 	idxd_wq_set_private(wq, iaa_wq);
684 
685 	list_add_tail(&iaa_wq->list, &iaa_device->wqs);
686 
687 	iaa_device->n_wq++;
688 
689 	if (new_wq)
690 		*new_wq = iaa_wq;
691 
692 	dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
693 		wq->id, iaa_device->idxd->id, iaa_device->n_wq);
694 
695 	return 0;
696 }
697 
698 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
699 {
700 	struct idxd_device *idxd = iaa_device->idxd;
701 	struct pci_dev *pdev = idxd->pdev;
702 	struct device *dev = &pdev->dev;
703 	struct iaa_wq *iaa_wq;
704 
705 	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
706 		if (iaa_wq->wq == wq) {
707 			list_del(&iaa_wq->list);
708 			iaa_device->n_wq--;
709 
710 			dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
711 				wq->id, iaa_device->idxd->id,
712 				iaa_device->n_wq, nr_iaa);
713 
714 			if (iaa_device->n_wq == 0)
715 				del_iaa_device(iaa_device);
716 			break;
717 		}
718 	}
719 }
720 
721 static void clear_wq_table(void)
722 {
723 	int cpu;
724 
725 	for (cpu = 0; cpu < nr_cpus; cpu++)
726 		wq_table_clear_entry(cpu);
727 
728 	pr_debug("cleared wq table\n");
729 }
730 
731 static void free_iaa_device(struct iaa_device *iaa_device)
732 {
733 	if (!iaa_device)
734 		return;
735 
736 	remove_device_compression_modes(iaa_device);
737 	kfree(iaa_device);
738 }
739 
740 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
741 {
742 	struct iaa_device *iaa_device;
743 
744 	if (!iaa_wq)
745 		return;
746 
747 	iaa_device = iaa_wq->iaa_device;
748 	if (iaa_device->n_wq == 0)
749 		free_iaa_device(iaa_wq->iaa_device);
750 }
751 
752 static void free_iaa_wq(struct iaa_wq *iaa_wq)
753 {
754 	struct idxd_wq *wq;
755 
756 	__free_iaa_wq(iaa_wq);
757 
758 	wq = iaa_wq->wq;
759 
760 	kfree(iaa_wq);
761 	idxd_wq_set_private(wq, NULL);
762 }
763 
764 static int iaa_wq_get(struct idxd_wq *wq)
765 {
766 	struct idxd_device *idxd = wq->idxd;
767 	struct iaa_wq *iaa_wq;
768 	int ret = 0;
769 
770 	spin_lock(&idxd->dev_lock);
771 	iaa_wq = idxd_wq_get_private(wq);
772 	if (iaa_wq && !iaa_wq->remove) {
773 		iaa_wq->ref++;
774 		idxd_wq_get(wq);
775 	} else {
776 		ret = -ENODEV;
777 	}
778 	spin_unlock(&idxd->dev_lock);
779 
780 	return ret;
781 }
782 
783 static int iaa_wq_put(struct idxd_wq *wq)
784 {
785 	struct idxd_device *idxd = wq->idxd;
786 	struct iaa_wq *iaa_wq;
787 	bool free = false;
788 	int ret = 0;
789 
790 	spin_lock(&idxd->dev_lock);
791 	iaa_wq = idxd_wq_get_private(wq);
792 	if (iaa_wq) {
793 		iaa_wq->ref--;
794 		if (iaa_wq->ref == 0 && iaa_wq->remove) {
795 			idxd_wq_set_private(wq, NULL);
796 			free = true;
797 		}
798 		idxd_wq_put(wq);
799 	} else {
800 		ret = -ENODEV;
801 	}
802 	spin_unlock(&idxd->dev_lock);
803 	if (free) {
804 		__free_iaa_wq(iaa_wq);
805 		kfree(iaa_wq);
806 	}
807 
808 	return ret;
809 }
810 
811 static void free_wq_table(void)
812 {
813 	int cpu;
814 
815 	for (cpu = 0; cpu < nr_cpus; cpu++)
816 		wq_table_free_entry(cpu);
817 
818 	free_percpu(wq_table);
819 
820 	pr_debug("freed wq table\n");
821 }
822 
823 static int alloc_wq_table(int max_wqs)
824 {
825 	struct wq_table_entry *entry;
826 	int cpu;
827 
828 	wq_table = alloc_percpu(struct wq_table_entry);
829 	if (!wq_table)
830 		return -ENOMEM;
831 
832 	for (cpu = 0; cpu < nr_cpus; cpu++) {
833 		entry = per_cpu_ptr(wq_table, cpu);
834 		entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
835 		if (!entry->wqs) {
836 			free_wq_table();
837 			return -ENOMEM;
838 		}
839 
840 		entry->max_wqs = max_wqs;
841 	}
842 
843 	pr_debug("initialized wq table\n");
844 
845 	return 0;
846 }
847 
848 static int save_iaa_wq(struct idxd_wq *wq)
849 {
850 	struct iaa_device *iaa_device, *found = NULL;
851 	struct idxd_device *idxd;
852 	struct pci_dev *pdev;
853 	struct device *dev;
854 	int ret = 0;
855 
856 	list_for_each_entry(iaa_device, &iaa_devices, list) {
857 		if (iaa_device->idxd == wq->idxd) {
858 			idxd = iaa_device->idxd;
859 			pdev = idxd->pdev;
860 			dev = &pdev->dev;
861 			/*
862 			 * Check to see that we don't already have this wq.
863 			 * Shouldn't happen but we don't control probing.
864 			 */
865 			if (iaa_has_wq(iaa_device, wq)) {
866 				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
867 					iaa_device);
868 				goto out;
869 			}
870 
871 			found = iaa_device;
872 
873 			ret = add_iaa_wq(iaa_device, wq, NULL);
874 			if (ret)
875 				goto out;
876 
877 			break;
878 		}
879 	}
880 
881 	if (!found) {
882 		struct iaa_device *new_device;
883 		struct iaa_wq *new_wq;
884 
885 		new_device = add_iaa_device(wq->idxd);
886 		if (!new_device) {
887 			ret = -ENOMEM;
888 			goto out;
889 		}
890 
891 		ret = add_iaa_wq(new_device, wq, &new_wq);
892 		if (ret) {
893 			del_iaa_device(new_device);
894 			free_iaa_device(new_device);
895 			goto out;
896 		}
897 
898 		ret = init_iaa_device(new_device, new_wq);
899 		if (ret) {
900 			del_iaa_wq(new_device, new_wq->wq);
901 			del_iaa_device(new_device);
902 			free_iaa_wq(new_wq);
903 			goto out;
904 		}
905 	}
906 
907 	if (WARN_ON(nr_iaa == 0))
908 		return -EINVAL;
909 
910 	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
911 out:
912 	return 0;
913 }
914 
915 static void remove_iaa_wq(struct idxd_wq *wq)
916 {
917 	struct iaa_device *iaa_device;
918 
919 	list_for_each_entry(iaa_device, &iaa_devices, list) {
920 		if (iaa_has_wq(iaa_device, wq)) {
921 			del_iaa_wq(iaa_device, wq);
922 			break;
923 		}
924 	}
925 
926 	if (nr_iaa)
927 		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
928 	else
929 		cpus_per_iaa = 0;
930 }
931 
932 static int wq_table_add_wqs(int iaa, int cpu)
933 {
934 	struct iaa_device *iaa_device, *found_device = NULL;
935 	int ret = 0, cur_iaa = 0, n_wqs_added = 0;
936 	struct idxd_device *idxd;
937 	struct iaa_wq *iaa_wq;
938 	struct pci_dev *pdev;
939 	struct device *dev;
940 
941 	list_for_each_entry(iaa_device, &iaa_devices, list) {
942 		idxd = iaa_device->idxd;
943 		pdev = idxd->pdev;
944 		dev = &pdev->dev;
945 
946 		if (cur_iaa != iaa) {
947 			cur_iaa++;
948 			continue;
949 		}
950 
951 		found_device = iaa_device;
952 		dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
953 			found_device->idxd->id, cur_iaa);
954 		break;
955 	}
956 
957 	if (!found_device) {
958 		found_device = list_first_entry_or_null(&iaa_devices,
959 							struct iaa_device, list);
960 		if (!found_device) {
961 			pr_debug("couldn't find any iaa devices with wqs!\n");
962 			ret = -EINVAL;
963 			goto out;
964 		}
965 		cur_iaa = 0;
966 
967 		idxd = found_device->idxd;
968 		pdev = idxd->pdev;
969 		dev = &pdev->dev;
970 		dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
971 			found_device->idxd->id, cur_iaa);
972 	}
973 
974 	list_for_each_entry(iaa_wq, &found_device->wqs, list) {
975 		wq_table_add(cpu, iaa_wq->wq);
976 		pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
977 			 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
978 		n_wqs_added++;
979 	}
980 
981 	if (!n_wqs_added) {
982 		pr_debug("couldn't find any iaa wqs!\n");
983 		ret = -EINVAL;
984 		goto out;
985 	}
986 out:
987 	return ret;
988 }
989 
990 /*
991  * Rebalance the wq table so that given a cpu, it's easy to find the
992  * closest IAA instance.  The idea is to try to choose the most
993  * appropriate IAA instance for a caller and spread available
994  * workqueues around to clients.
995  */
996 static void rebalance_wq_table(void)
997 {
998 	const struct cpumask *node_cpus;
999 	int node, cpu, iaa = -1;
1000 
1001 	if (nr_iaa == 0)
1002 		return;
1003 
1004 	pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
1005 		 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
1006 
1007 	clear_wq_table();
1008 
1009 	if (nr_iaa == 1) {
1010 		for (cpu = 0; cpu < nr_cpus; cpu++) {
1011 			if (WARN_ON(wq_table_add_wqs(0, cpu))) {
1012 				pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
1013 				return;
1014 			}
1015 		}
1016 
1017 		return;
1018 	}
1019 
1020 	for_each_node_with_cpus(node) {
1021 		node_cpus = cpumask_of_node(node);
1022 
1023 		for (cpu = 0; cpu < nr_cpus_per_node; cpu++) {
1024 			int node_cpu = cpumask_nth(cpu, node_cpus);
1025 
1026 			if (WARN_ON(node_cpu >= nr_cpu_ids)) {
1027 				pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
1028 				return;
1029 			}
1030 
1031 			if ((cpu % cpus_per_iaa) == 0)
1032 				iaa++;
1033 
1034 			if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
1035 				pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
1036 				return;
1037 			}
1038 		}
1039 	}
1040 }
1041 
1042 static inline int check_completion(struct device *dev,
1043 				   struct iax_completion_record *comp,
1044 				   bool compress,
1045 				   bool only_once)
1046 {
1047 	char *op_str = compress ? "compress" : "decompress";
1048 	int ret = 0;
1049 
1050 	while (!comp->status) {
1051 		if (only_once)
1052 			return -EAGAIN;
1053 		cpu_relax();
1054 	}
1055 
1056 	if (comp->status != IAX_COMP_SUCCESS) {
1057 		if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
1058 			ret = -ETIMEDOUT;
1059 			dev_dbg(dev, "%s timed out, size=0x%x\n",
1060 				op_str, comp->output_size);
1061 			update_completion_timeout_errs();
1062 			goto out;
1063 		}
1064 
1065 		if (comp->status == IAA_ANALYTICS_ERROR &&
1066 		    comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
1067 			ret = -E2BIG;
1068 			dev_dbg(dev, "compressed > uncompressed size,"
1069 				" not compressing, size=0x%x\n",
1070 				comp->output_size);
1071 			update_completion_comp_buf_overflow_errs();
1072 			goto out;
1073 		}
1074 
1075 		if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
1076 			ret = -EOVERFLOW;
1077 			goto out;
1078 		}
1079 
1080 		ret = -EINVAL;
1081 		dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
1082 			op_str, comp->status, comp->error_code, comp->output_size);
1083 		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
1084 		update_completion_einval_errs();
1085 
1086 		goto out;
1087 	}
1088 out:
1089 	return ret;
1090 }
1091 
1092 static int deflate_generic_decompress(struct acomp_req *req)
1093 {
1094 	void *src, *dst;
1095 	int ret;
1096 
1097 	src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1098 	dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1099 
1100 	ret = crypto_comp_decompress(deflate_generic_tfm,
1101 				     src, req->slen, dst, &req->dlen);
1102 
1103 	kunmap_local(src);
1104 	kunmap_local(dst);
1105 
1106 	update_total_sw_decomp_calls();
1107 
1108 	return ret;
1109 }
1110 
1111 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1112 				struct acomp_req *req,
1113 				dma_addr_t *src_addr, dma_addr_t *dst_addr);
1114 
1115 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1116 			       struct idxd_wq *wq,
1117 			       dma_addr_t src_addr, unsigned int slen,
1118 			       dma_addr_t dst_addr, unsigned int *dlen,
1119 			       u32 compression_crc);
1120 
1121 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1122 			      enum idxd_complete_type comp_type,
1123 			      bool free_desc, void *__ctx,
1124 			      u32 *status)
1125 {
1126 	struct iaa_device_compression_mode *active_compression_mode;
1127 	struct iaa_compression_ctx *compression_ctx;
1128 	struct crypto_ctx *ctx = __ctx;
1129 	struct iaa_device *iaa_device;
1130 	struct idxd_device *idxd;
1131 	struct iaa_wq *iaa_wq;
1132 	struct pci_dev *pdev;
1133 	struct device *dev;
1134 	int ret, err = 0;
1135 
1136 	compression_ctx = crypto_tfm_ctx(ctx->tfm);
1137 
1138 	iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1139 	iaa_device = iaa_wq->iaa_device;
1140 	idxd = iaa_device->idxd;
1141 	pdev = idxd->pdev;
1142 	dev = &pdev->dev;
1143 
1144 	active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1145 								  compression_ctx->mode);
1146 	dev_dbg(dev, "%s: compression mode %s,"
1147 		" ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1148 		active_compression_mode->name,
1149 		ctx->src_addr, ctx->dst_addr);
1150 
1151 	ret = check_completion(dev, idxd_desc->iax_completion,
1152 			       ctx->compress, false);
1153 	if (ret) {
1154 		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1155 		if (!ctx->compress &&
1156 		    idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1157 			pr_warn("%s: falling back to deflate-generic decompress, "
1158 				"analytics error code %x\n", __func__,
1159 				idxd_desc->iax_completion->error_code);
1160 			ret = deflate_generic_decompress(ctx->req);
1161 			if (ret) {
1162 				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1163 					__func__, ret);
1164 				err = -EIO;
1165 				goto err;
1166 			}
1167 		} else {
1168 			err = -EIO;
1169 			goto err;
1170 		}
1171 	} else {
1172 		ctx->req->dlen = idxd_desc->iax_completion->output_size;
1173 	}
1174 
1175 	/* Update stats */
1176 	if (ctx->compress) {
1177 		update_total_comp_bytes_out(ctx->req->dlen);
1178 		update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1179 	} else {
1180 		update_total_decomp_bytes_in(ctx->req->dlen);
1181 		update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen);
1182 	}
1183 
1184 	if (ctx->compress && compression_ctx->verify_compress) {
1185 		dma_addr_t src_addr, dst_addr;
1186 		u32 compression_crc;
1187 
1188 		compression_crc = idxd_desc->iax_completion->crc;
1189 
1190 		ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1191 		if (ret) {
1192 			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1193 			err = -EIO;
1194 			goto out;
1195 		}
1196 
1197 		ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1198 					  ctx->req->slen, dst_addr, &ctx->req->dlen,
1199 					  compression_crc);
1200 		if (ret) {
1201 			dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1202 			err = -EIO;
1203 		}
1204 
1205 		dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1206 		dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1207 
1208 		goto out;
1209 	}
1210 err:
1211 	dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1212 	dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1213 out:
1214 	if (ret != 0)
1215 		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1216 
1217 	if (ctx->req->base.complete)
1218 		acomp_request_complete(ctx->req, err);
1219 
1220 	if (free_desc)
1221 		idxd_free_desc(idxd_desc->wq, idxd_desc);
1222 	iaa_wq_put(idxd_desc->wq);
1223 }
1224 
1225 static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
1226 			struct idxd_wq *wq,
1227 			dma_addr_t src_addr, unsigned int slen,
1228 			dma_addr_t dst_addr, unsigned int *dlen,
1229 			u32 *compression_crc,
1230 			bool disable_async)
1231 {
1232 	struct iaa_device_compression_mode *active_compression_mode;
1233 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1234 	struct iaa_device *iaa_device;
1235 	struct idxd_desc *idxd_desc;
1236 	struct iax_hw_desc *desc;
1237 	struct idxd_device *idxd;
1238 	struct iaa_wq *iaa_wq;
1239 	struct pci_dev *pdev;
1240 	struct device *dev;
1241 	int ret = 0;
1242 
1243 	iaa_wq = idxd_wq_get_private(wq);
1244 	iaa_device = iaa_wq->iaa_device;
1245 	idxd = iaa_device->idxd;
1246 	pdev = idxd->pdev;
1247 	dev = &pdev->dev;
1248 
1249 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1250 
1251 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1252 	if (IS_ERR(idxd_desc)) {
1253 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1254 		dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1255 		return PTR_ERR(idxd_desc);
1256 	}
1257 	desc = idxd_desc->iax_hw;
1258 
1259 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1260 		IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1261 	desc->opcode = IAX_OPCODE_COMPRESS;
1262 	desc->compr_flags = IAA_COMP_FLAGS;
1263 	desc->priv = 0;
1264 
1265 	desc->src1_addr = (u64)src_addr;
1266 	desc->src1_size = slen;
1267 	desc->dst_addr = (u64)dst_addr;
1268 	desc->max_dst_size = *dlen;
1269 	desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1270 	desc->src2_size = sizeof(struct aecs_comp_table_record);
1271 	desc->completion_addr = idxd_desc->compl_dma;
1272 
1273 	if (ctx->use_irq && !disable_async) {
1274 		desc->flags |= IDXD_OP_FLAG_RCI;
1275 
1276 		idxd_desc->crypto.req = req;
1277 		idxd_desc->crypto.tfm = tfm;
1278 		idxd_desc->crypto.src_addr = src_addr;
1279 		idxd_desc->crypto.dst_addr = dst_addr;
1280 		idxd_desc->crypto.compress = true;
1281 
1282 		dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1283 			" src_addr %llx, dst_addr %llx\n", __func__,
1284 			active_compression_mode->name,
1285 			src_addr, dst_addr);
1286 	} else if (ctx->async_mode && !disable_async)
1287 		req->base.data = idxd_desc;
1288 
1289 	dev_dbg(dev, "%s: compression mode %s,"
1290 		" desc->src1_addr %llx, desc->src1_size %d,"
1291 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1292 		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1293 		active_compression_mode->name,
1294 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1295 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1296 
1297 	ret = idxd_submit_desc(wq, idxd_desc);
1298 	if (ret) {
1299 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1300 		goto err;
1301 	}
1302 
1303 	/* Update stats */
1304 	update_total_comp_calls();
1305 	update_wq_comp_calls(wq);
1306 
1307 	if (ctx->async_mode && !disable_async) {
1308 		ret = -EINPROGRESS;
1309 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1310 		goto out;
1311 	}
1312 
1313 	ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1314 	if (ret) {
1315 		dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1316 		goto err;
1317 	}
1318 
1319 	*dlen = idxd_desc->iax_completion->output_size;
1320 
1321 	/* Update stats */
1322 	update_total_comp_bytes_out(*dlen);
1323 	update_wq_comp_bytes(wq, *dlen);
1324 
1325 	*compression_crc = idxd_desc->iax_completion->crc;
1326 
1327 	if (!ctx->async_mode)
1328 		idxd_free_desc(wq, idxd_desc);
1329 out:
1330 	return ret;
1331 err:
1332 	idxd_free_desc(wq, idxd_desc);
1333 	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1334 
1335 	goto out;
1336 }
1337 
1338 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1339 				struct acomp_req *req,
1340 				dma_addr_t *src_addr, dma_addr_t *dst_addr)
1341 {
1342 	int ret = 0;
1343 	int nr_sgs;
1344 
1345 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1346 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1347 
1348 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1349 	if (nr_sgs <= 0 || nr_sgs > 1) {
1350 		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1351 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1352 			iaa_wq->wq->id, ret);
1353 		ret = -EIO;
1354 		goto out;
1355 	}
1356 	*src_addr = sg_dma_address(req->src);
1357 	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1358 		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1359 		req->src, req->slen, sg_dma_len(req->src));
1360 
1361 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1362 	if (nr_sgs <= 0 || nr_sgs > 1) {
1363 		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1364 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1365 			iaa_wq->wq->id, ret);
1366 		ret = -EIO;
1367 		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1368 		goto out;
1369 	}
1370 	*dst_addr = sg_dma_address(req->dst);
1371 	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1372 		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1373 		req->dst, req->dlen, sg_dma_len(req->dst));
1374 out:
1375 	return ret;
1376 }
1377 
1378 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1379 			       struct idxd_wq *wq,
1380 			       dma_addr_t src_addr, unsigned int slen,
1381 			       dma_addr_t dst_addr, unsigned int *dlen,
1382 			       u32 compression_crc)
1383 {
1384 	struct iaa_device_compression_mode *active_compression_mode;
1385 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1386 	struct iaa_device *iaa_device;
1387 	struct idxd_desc *idxd_desc;
1388 	struct iax_hw_desc *desc;
1389 	struct idxd_device *idxd;
1390 	struct iaa_wq *iaa_wq;
1391 	struct pci_dev *pdev;
1392 	struct device *dev;
1393 	int ret = 0;
1394 
1395 	iaa_wq = idxd_wq_get_private(wq);
1396 	iaa_device = iaa_wq->iaa_device;
1397 	idxd = iaa_device->idxd;
1398 	pdev = idxd->pdev;
1399 	dev = &pdev->dev;
1400 
1401 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1402 
1403 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1404 	if (IS_ERR(idxd_desc)) {
1405 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1406 		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1407 			PTR_ERR(idxd_desc));
1408 		return PTR_ERR(idxd_desc);
1409 	}
1410 	desc = idxd_desc->iax_hw;
1411 
1412 	/* Verify (optional) - decompress and check crc, suppress dest write */
1413 
1414 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1415 	desc->opcode = IAX_OPCODE_DECOMPRESS;
1416 	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1417 	desc->priv = 0;
1418 
1419 	desc->src1_addr = (u64)dst_addr;
1420 	desc->src1_size = *dlen;
1421 	desc->dst_addr = (u64)src_addr;
1422 	desc->max_dst_size = slen;
1423 	desc->completion_addr = idxd_desc->compl_dma;
1424 
1425 	dev_dbg(dev, "(verify) compression mode %s,"
1426 		" desc->src1_addr %llx, desc->src1_size %d,"
1427 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1428 		" desc->src2_addr %llx, desc->src2_size %d\n",
1429 		active_compression_mode->name,
1430 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1431 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1432 
1433 	ret = idxd_submit_desc(wq, idxd_desc);
1434 	if (ret) {
1435 		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1436 		goto err;
1437 	}
1438 
1439 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1440 	if (ret) {
1441 		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1442 		goto err;
1443 	}
1444 
1445 	if (compression_crc != idxd_desc->iax_completion->crc) {
1446 		ret = -EINVAL;
1447 		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1448 			" comp=0x%x, decomp=0x%x\n", compression_crc,
1449 			idxd_desc->iax_completion->crc);
1450 		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1451 			       8, 1, idxd_desc->iax_completion, 64, 0);
1452 		goto err;
1453 	}
1454 
1455 	idxd_free_desc(wq, idxd_desc);
1456 out:
1457 	return ret;
1458 err:
1459 	idxd_free_desc(wq, idxd_desc);
1460 	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1461 
1462 	goto out;
1463 }
1464 
1465 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1466 			  struct idxd_wq *wq,
1467 			  dma_addr_t src_addr, unsigned int slen,
1468 			  dma_addr_t dst_addr, unsigned int *dlen,
1469 			  bool disable_async)
1470 {
1471 	struct iaa_device_compression_mode *active_compression_mode;
1472 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1473 	struct iaa_device *iaa_device;
1474 	struct idxd_desc *idxd_desc;
1475 	struct iax_hw_desc *desc;
1476 	struct idxd_device *idxd;
1477 	struct iaa_wq *iaa_wq;
1478 	struct pci_dev *pdev;
1479 	struct device *dev;
1480 	int ret = 0;
1481 
1482 	iaa_wq = idxd_wq_get_private(wq);
1483 	iaa_device = iaa_wq->iaa_device;
1484 	idxd = iaa_device->idxd;
1485 	pdev = idxd->pdev;
1486 	dev = &pdev->dev;
1487 
1488 	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1489 
1490 	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1491 	if (IS_ERR(idxd_desc)) {
1492 		dev_dbg(dev, "idxd descriptor allocation failed\n");
1493 		dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1494 			PTR_ERR(idxd_desc));
1495 		return PTR_ERR(idxd_desc);
1496 	}
1497 	desc = idxd_desc->iax_hw;
1498 
1499 	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1500 	desc->opcode = IAX_OPCODE_DECOMPRESS;
1501 	desc->max_dst_size = PAGE_SIZE;
1502 	desc->decompr_flags = IAA_DECOMP_FLAGS;
1503 	desc->priv = 0;
1504 
1505 	desc->src1_addr = (u64)src_addr;
1506 	desc->dst_addr = (u64)dst_addr;
1507 	desc->max_dst_size = *dlen;
1508 	desc->src1_size = slen;
1509 	desc->completion_addr = idxd_desc->compl_dma;
1510 
1511 	if (ctx->use_irq && !disable_async) {
1512 		desc->flags |= IDXD_OP_FLAG_RCI;
1513 
1514 		idxd_desc->crypto.req = req;
1515 		idxd_desc->crypto.tfm = tfm;
1516 		idxd_desc->crypto.src_addr = src_addr;
1517 		idxd_desc->crypto.dst_addr = dst_addr;
1518 		idxd_desc->crypto.compress = false;
1519 
1520 		dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1521 			" src_addr %llx, dst_addr %llx\n", __func__,
1522 			active_compression_mode->name,
1523 			src_addr, dst_addr);
1524 	} else if (ctx->async_mode && !disable_async)
1525 		req->base.data = idxd_desc;
1526 
1527 	dev_dbg(dev, "%s: decompression mode %s,"
1528 		" desc->src1_addr %llx, desc->src1_size %d,"
1529 		" desc->dst_addr %llx, desc->max_dst_size %d,"
1530 		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1531 		active_compression_mode->name,
1532 		desc->src1_addr, desc->src1_size, desc->dst_addr,
1533 		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1534 
1535 	ret = idxd_submit_desc(wq, idxd_desc);
1536 	if (ret) {
1537 		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1538 		goto err;
1539 	}
1540 
1541 	/* Update stats */
1542 	update_total_decomp_calls();
1543 	update_wq_decomp_calls(wq);
1544 
1545 	if (ctx->async_mode && !disable_async) {
1546 		ret = -EINPROGRESS;
1547 		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1548 		goto out;
1549 	}
1550 
1551 	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1552 	if (ret) {
1553 		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1554 		if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1555 			pr_warn("%s: falling back to deflate-generic decompress, "
1556 				"analytics error code %x\n", __func__,
1557 				idxd_desc->iax_completion->error_code);
1558 			ret = deflate_generic_decompress(req);
1559 			if (ret) {
1560 				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1561 					__func__, ret);
1562 				goto err;
1563 			}
1564 		} else {
1565 			goto err;
1566 		}
1567 	} else {
1568 		req->dlen = idxd_desc->iax_completion->output_size;
1569 	}
1570 
1571 	*dlen = req->dlen;
1572 
1573 	if (!ctx->async_mode)
1574 		idxd_free_desc(wq, idxd_desc);
1575 
1576 	/* Update stats */
1577 	update_total_decomp_bytes_in(slen);
1578 	update_wq_decomp_bytes(wq, slen);
1579 out:
1580 	return ret;
1581 err:
1582 	idxd_free_desc(wq, idxd_desc);
1583 	dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1584 
1585 	goto out;
1586 }
1587 
1588 static int iaa_comp_acompress(struct acomp_req *req)
1589 {
1590 	struct iaa_compression_ctx *compression_ctx;
1591 	struct crypto_tfm *tfm = req->base.tfm;
1592 	dma_addr_t src_addr, dst_addr;
1593 	bool disable_async = false;
1594 	int nr_sgs, cpu, ret = 0;
1595 	struct iaa_wq *iaa_wq;
1596 	u32 compression_crc;
1597 	struct idxd_wq *wq;
1598 	struct device *dev;
1599 	int order = -1;
1600 
1601 	compression_ctx = crypto_tfm_ctx(tfm);
1602 
1603 	if (!iaa_crypto_enabled) {
1604 		pr_debug("iaa_crypto disabled, not compressing\n");
1605 		return -ENODEV;
1606 	}
1607 
1608 	if (!req->src || !req->slen) {
1609 		pr_debug("invalid src, not compressing\n");
1610 		return -EINVAL;
1611 	}
1612 
1613 	cpu = get_cpu();
1614 	wq = wq_table_next_wq(cpu);
1615 	put_cpu();
1616 	if (!wq) {
1617 		pr_debug("no wq configured for cpu=%d\n", cpu);
1618 		return -ENODEV;
1619 	}
1620 
1621 	ret = iaa_wq_get(wq);
1622 	if (ret) {
1623 		pr_debug("no wq available for cpu=%d\n", cpu);
1624 		return -ENODEV;
1625 	}
1626 
1627 	iaa_wq = idxd_wq_get_private(wq);
1628 
1629 	if (!req->dst) {
1630 		gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1631 
1632 		/* incompressible data will always be < 2 * slen */
1633 		req->dlen = 2 * req->slen;
1634 		order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1635 		req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1636 		if (!req->dst) {
1637 			ret = -ENOMEM;
1638 			order = -1;
1639 			goto out;
1640 		}
1641 		disable_async = true;
1642 	}
1643 
1644 	dev = &wq->idxd->pdev->dev;
1645 
1646 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1647 	if (nr_sgs <= 0 || nr_sgs > 1) {
1648 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1649 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1650 			iaa_wq->wq->id, ret);
1651 		ret = -EIO;
1652 		goto out;
1653 	}
1654 	src_addr = sg_dma_address(req->src);
1655 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1656 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1657 		req->src, req->slen, sg_dma_len(req->src));
1658 
1659 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1660 	if (nr_sgs <= 0 || nr_sgs > 1) {
1661 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1662 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1663 			iaa_wq->wq->id, ret);
1664 		ret = -EIO;
1665 		goto err_map_dst;
1666 	}
1667 	dst_addr = sg_dma_address(req->dst);
1668 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1669 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1670 		req->dst, req->dlen, sg_dma_len(req->dst));
1671 
1672 	ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1673 			   &req->dlen, &compression_crc, disable_async);
1674 	if (ret == -EINPROGRESS)
1675 		return ret;
1676 
1677 	if (!ret && compression_ctx->verify_compress) {
1678 		ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1679 		if (ret) {
1680 			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1681 			goto out;
1682 		}
1683 
1684 		ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1685 					  dst_addr, &req->dlen, compression_crc);
1686 		if (ret)
1687 			dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1688 
1689 		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1690 		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1691 
1692 		goto out;
1693 	}
1694 
1695 	if (ret)
1696 		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1697 
1698 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1699 err_map_dst:
1700 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1701 out:
1702 	iaa_wq_put(wq);
1703 
1704 	if (order >= 0)
1705 		sgl_free_order(req->dst, order);
1706 
1707 	return ret;
1708 }
1709 
1710 static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1711 {
1712 	gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1713 		GFP_KERNEL : GFP_ATOMIC;
1714 	struct crypto_tfm *tfm = req->base.tfm;
1715 	dma_addr_t src_addr, dst_addr;
1716 	int nr_sgs, cpu, ret = 0;
1717 	struct iaa_wq *iaa_wq;
1718 	struct device *dev;
1719 	struct idxd_wq *wq;
1720 	int order = -1;
1721 
1722 	cpu = get_cpu();
1723 	wq = wq_table_next_wq(cpu);
1724 	put_cpu();
1725 	if (!wq) {
1726 		pr_debug("no wq configured for cpu=%d\n", cpu);
1727 		return -ENODEV;
1728 	}
1729 
1730 	ret = iaa_wq_get(wq);
1731 	if (ret) {
1732 		pr_debug("no wq available for cpu=%d\n", cpu);
1733 		return -ENODEV;
1734 	}
1735 
1736 	iaa_wq = idxd_wq_get_private(wq);
1737 
1738 	dev = &wq->idxd->pdev->dev;
1739 
1740 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1741 	if (nr_sgs <= 0 || nr_sgs > 1) {
1742 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1743 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1744 			iaa_wq->wq->id, ret);
1745 		ret = -EIO;
1746 		goto out;
1747 	}
1748 	src_addr = sg_dma_address(req->src);
1749 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1750 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1751 		req->src, req->slen, sg_dma_len(req->src));
1752 
1753 	req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1754 alloc_dest:
1755 	order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1756 	req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1757 	if (!req->dst) {
1758 		ret = -ENOMEM;
1759 		order = -1;
1760 		goto out;
1761 	}
1762 
1763 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1764 	if (nr_sgs <= 0 || nr_sgs > 1) {
1765 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1766 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1767 			iaa_wq->wq->id, ret);
1768 		ret = -EIO;
1769 		goto err_map_dst;
1770 	}
1771 
1772 	dst_addr = sg_dma_address(req->dst);
1773 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1775 		req->dst, req->dlen, sg_dma_len(req->dst));
1776 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1777 			     dst_addr, &req->dlen, true);
1778 	if (ret == -EOVERFLOW) {
1779 		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1780 		req->dlen *= 2;
1781 		if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1782 			goto err_map_dst;
1783 		goto alloc_dest;
1784 	}
1785 
1786 	if (ret != 0)
1787 		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1788 
1789 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1790 err_map_dst:
1791 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1792 out:
1793 	iaa_wq_put(wq);
1794 
1795 	if (order >= 0)
1796 		sgl_free_order(req->dst, order);
1797 
1798 	return ret;
1799 }
1800 
1801 static int iaa_comp_adecompress(struct acomp_req *req)
1802 {
1803 	struct crypto_tfm *tfm = req->base.tfm;
1804 	dma_addr_t src_addr, dst_addr;
1805 	int nr_sgs, cpu, ret = 0;
1806 	struct iaa_wq *iaa_wq;
1807 	struct device *dev;
1808 	struct idxd_wq *wq;
1809 
1810 	if (!iaa_crypto_enabled) {
1811 		pr_debug("iaa_crypto disabled, not decompressing\n");
1812 		return -ENODEV;
1813 	}
1814 
1815 	if (!req->src || !req->slen) {
1816 		pr_debug("invalid src, not decompressing\n");
1817 		return -EINVAL;
1818 	}
1819 
1820 	if (!req->dst)
1821 		return iaa_comp_adecompress_alloc_dest(req);
1822 
1823 	cpu = get_cpu();
1824 	wq = wq_table_next_wq(cpu);
1825 	put_cpu();
1826 	if (!wq) {
1827 		pr_debug("no wq configured for cpu=%d\n", cpu);
1828 		return -ENODEV;
1829 	}
1830 
1831 	ret = iaa_wq_get(wq);
1832 	if (ret) {
1833 		pr_debug("no wq available for cpu=%d\n", cpu);
1834 		return -ENODEV;
1835 	}
1836 
1837 	iaa_wq = idxd_wq_get_private(wq);
1838 
1839 	dev = &wq->idxd->pdev->dev;
1840 
1841 	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1842 	if (nr_sgs <= 0 || nr_sgs > 1) {
1843 		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1844 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1845 			iaa_wq->wq->id, ret);
1846 		ret = -EIO;
1847 		goto out;
1848 	}
1849 	src_addr = sg_dma_address(req->src);
1850 	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1851 		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1852 		req->src, req->slen, sg_dma_len(req->src));
1853 
1854 	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1855 	if (nr_sgs <= 0 || nr_sgs > 1) {
1856 		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1857 			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1858 			iaa_wq->wq->id, ret);
1859 		ret = -EIO;
1860 		goto err_map_dst;
1861 	}
1862 	dst_addr = sg_dma_address(req->dst);
1863 	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1864 		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1865 		req->dst, req->dlen, sg_dma_len(req->dst));
1866 
1867 	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1868 			     dst_addr, &req->dlen, false);
1869 	if (ret == -EINPROGRESS)
1870 		return ret;
1871 
1872 	if (ret != 0)
1873 		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1874 
1875 	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1876 err_map_dst:
1877 	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1878 out:
1879 	iaa_wq_put(wq);
1880 
1881 	return ret;
1882 }
1883 
1884 static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1885 {
1886 	ctx->verify_compress = iaa_verify_compress;
1887 	ctx->async_mode = async_mode;
1888 	ctx->use_irq = use_irq;
1889 }
1890 
1891 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1892 {
1893 	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1894 	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1895 
1896 	compression_ctx_init(ctx);
1897 
1898 	ctx->mode = IAA_MODE_FIXED;
1899 
1900 	return 0;
1901 }
1902 
1903 static void dst_free(struct scatterlist *sgl)
1904 {
1905 	/*
1906 	 * Called for req->dst = NULL cases but we free elsewhere
1907 	 * using sgl_free_order().
1908 	 */
1909 }
1910 
1911 static struct acomp_alg iaa_acomp_fixed_deflate = {
1912 	.init			= iaa_comp_init_fixed,
1913 	.compress		= iaa_comp_acompress,
1914 	.decompress		= iaa_comp_adecompress,
1915 	.dst_free               = dst_free,
1916 	.base			= {
1917 		.cra_name		= "deflate",
1918 		.cra_driver_name	= "deflate-iaa",
1919 		.cra_ctxsize		= sizeof(struct iaa_compression_ctx),
1920 		.cra_module		= THIS_MODULE,
1921 		.cra_priority		= IAA_ALG_PRIORITY,
1922 	}
1923 };
1924 
1925 static int iaa_register_compression_device(void)
1926 {
1927 	int ret;
1928 
1929 	ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1930 	if (ret) {
1931 		pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1932 		goto out;
1933 	}
1934 
1935 	iaa_crypto_registered = true;
1936 out:
1937 	return ret;
1938 }
1939 
1940 static int iaa_unregister_compression_device(void)
1941 {
1942 	if (iaa_crypto_registered)
1943 		crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1944 
1945 	return 0;
1946 }
1947 
1948 static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1949 {
1950 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1951 	struct idxd_device *idxd = wq->idxd;
1952 	struct idxd_driver_data *data = idxd->data;
1953 	struct device *dev = &idxd_dev->conf_dev;
1954 	bool first_wq = false;
1955 	int ret = 0;
1956 
1957 	if (idxd->state != IDXD_DEV_ENABLED)
1958 		return -ENXIO;
1959 
1960 	if (data->type != IDXD_TYPE_IAX)
1961 		return -ENODEV;
1962 
1963 	mutex_lock(&wq->wq_lock);
1964 
1965 	if (idxd_wq_get_private(wq)) {
1966 		mutex_unlock(&wq->wq_lock);
1967 		return -EBUSY;
1968 	}
1969 
1970 	if (!idxd_wq_driver_name_match(wq, dev)) {
1971 		dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1972 			idxd->id, wq->id, wq->driver_name, dev->driver->name);
1973 		idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1974 		ret = -ENODEV;
1975 		goto err;
1976 	}
1977 
1978 	wq->type = IDXD_WQT_KERNEL;
1979 
1980 	ret = idxd_drv_enable_wq(wq);
1981 	if (ret < 0) {
1982 		dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1983 			idxd->id, wq->id, ret);
1984 		ret = -ENXIO;
1985 		goto err;
1986 	}
1987 
1988 	mutex_lock(&iaa_devices_lock);
1989 
1990 	if (list_empty(&iaa_devices)) {
1991 		ret = alloc_wq_table(wq->idxd->max_wqs);
1992 		if (ret)
1993 			goto err_alloc;
1994 		first_wq = true;
1995 	}
1996 
1997 	ret = save_iaa_wq(wq);
1998 	if (ret)
1999 		goto err_save;
2000 
2001 	rebalance_wq_table();
2002 
2003 	if (first_wq) {
2004 		iaa_crypto_enabled = true;
2005 		ret = iaa_register_compression_device();
2006 		if (ret != 0) {
2007 			iaa_crypto_enabled = false;
2008 			dev_dbg(dev, "IAA compression device registration failed\n");
2009 			goto err_register;
2010 		}
2011 		try_module_get(THIS_MODULE);
2012 
2013 		pr_info("iaa_crypto now ENABLED\n");
2014 	}
2015 
2016 	mutex_unlock(&iaa_devices_lock);
2017 out:
2018 	mutex_unlock(&wq->wq_lock);
2019 
2020 	return ret;
2021 
2022 err_register:
2023 	remove_iaa_wq(wq);
2024 	free_iaa_wq(idxd_wq_get_private(wq));
2025 err_save:
2026 	if (first_wq)
2027 		free_wq_table();
2028 err_alloc:
2029 	mutex_unlock(&iaa_devices_lock);
2030 	idxd_drv_disable_wq(wq);
2031 err:
2032 	wq->type = IDXD_WQT_NONE;
2033 
2034 	goto out;
2035 }
2036 
2037 static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
2038 {
2039 	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
2040 	struct idxd_device *idxd = wq->idxd;
2041 	struct iaa_wq *iaa_wq;
2042 	bool free = false;
2043 
2044 	idxd_wq_quiesce(wq);
2045 
2046 	mutex_lock(&wq->wq_lock);
2047 	mutex_lock(&iaa_devices_lock);
2048 
2049 	remove_iaa_wq(wq);
2050 
2051 	spin_lock(&idxd->dev_lock);
2052 	iaa_wq = idxd_wq_get_private(wq);
2053 	if (!iaa_wq) {
2054 		spin_unlock(&idxd->dev_lock);
2055 		pr_err("%s: no iaa_wq available to remove\n", __func__);
2056 		goto out;
2057 	}
2058 
2059 	if (iaa_wq->ref) {
2060 		iaa_wq->remove = true;
2061 	} else {
2062 		wq = iaa_wq->wq;
2063 		idxd_wq_set_private(wq, NULL);
2064 		free = true;
2065 	}
2066 	spin_unlock(&idxd->dev_lock);
2067 	if (free) {
2068 		__free_iaa_wq(iaa_wq);
2069 		kfree(iaa_wq);
2070 	}
2071 
2072 	idxd_drv_disable_wq(wq);
2073 	rebalance_wq_table();
2074 
2075 	if (nr_iaa == 0) {
2076 		iaa_crypto_enabled = false;
2077 		free_wq_table();
2078 		module_put(THIS_MODULE);
2079 
2080 		pr_info("iaa_crypto now DISABLED\n");
2081 	}
2082 out:
2083 	mutex_unlock(&iaa_devices_lock);
2084 	mutex_unlock(&wq->wq_lock);
2085 }
2086 
2087 static enum idxd_dev_type dev_types[] = {
2088 	IDXD_DEV_WQ,
2089 	IDXD_DEV_NONE,
2090 };
2091 
2092 static struct idxd_device_driver iaa_crypto_driver = {
2093 	.probe = iaa_crypto_probe,
2094 	.remove = iaa_crypto_remove,
2095 	.name = IDXD_SUBDRIVER_NAME,
2096 	.type = dev_types,
2097 	.desc_complete = iaa_desc_complete,
2098 };
2099 
2100 static int __init iaa_crypto_init_module(void)
2101 {
2102 	int ret = 0;
2103 	int node;
2104 
2105 	nr_cpus = num_online_cpus();
2106 	for_each_node_with_cpus(node)
2107 		nr_nodes++;
2108 	if (!nr_nodes) {
2109 		pr_err("IAA couldn't find any nodes with cpus\n");
2110 		return -ENODEV;
2111 	}
2112 	nr_cpus_per_node = nr_cpus / nr_nodes;
2113 
2114 	if (crypto_has_comp("deflate-generic", 0, 0))
2115 		deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2116 
2117 	if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2118 		pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2119 		       "deflate-generic", PTR_ERR(deflate_generic_tfm));
2120 		return -ENOMEM;
2121 	}
2122 
2123 	ret = iaa_aecs_init_fixed();
2124 	if (ret < 0) {
2125 		pr_debug("IAA fixed compression mode init failed\n");
2126 		goto err_aecs_init;
2127 	}
2128 
2129 	ret = idxd_driver_register(&iaa_crypto_driver);
2130 	if (ret) {
2131 		pr_debug("IAA wq sub-driver registration failed\n");
2132 		goto err_driver_reg;
2133 	}
2134 
2135 	ret = driver_create_file(&iaa_crypto_driver.drv,
2136 				 &driver_attr_verify_compress);
2137 	if (ret) {
2138 		pr_debug("IAA verify_compress attr creation failed\n");
2139 		goto err_verify_attr_create;
2140 	}
2141 
2142 	ret = driver_create_file(&iaa_crypto_driver.drv,
2143 				 &driver_attr_sync_mode);
2144 	if (ret) {
2145 		pr_debug("IAA sync mode attr creation failed\n");
2146 		goto err_sync_attr_create;
2147 	}
2148 
2149 	if (iaa_crypto_debugfs_init())
2150 		pr_warn("debugfs init failed, stats not available\n");
2151 
2152 	pr_debug("initialized\n");
2153 out:
2154 	return ret;
2155 
2156 err_sync_attr_create:
2157 	driver_remove_file(&iaa_crypto_driver.drv,
2158 			   &driver_attr_verify_compress);
2159 err_verify_attr_create:
2160 	idxd_driver_unregister(&iaa_crypto_driver);
2161 err_driver_reg:
2162 	iaa_aecs_cleanup_fixed();
2163 err_aecs_init:
2164 	crypto_free_comp(deflate_generic_tfm);
2165 
2166 	goto out;
2167 }
2168 
2169 static void __exit iaa_crypto_cleanup_module(void)
2170 {
2171 	if (iaa_unregister_compression_device())
2172 		pr_debug("IAA compression device unregister failed\n");
2173 
2174 	iaa_crypto_debugfs_cleanup();
2175 	driver_remove_file(&iaa_crypto_driver.drv,
2176 			   &driver_attr_sync_mode);
2177 	driver_remove_file(&iaa_crypto_driver.drv,
2178 			   &driver_attr_verify_compress);
2179 	idxd_driver_unregister(&iaa_crypto_driver);
2180 	iaa_aecs_cleanup_fixed();
2181 	crypto_free_comp(deflate_generic_tfm);
2182 
2183 	pr_debug("cleaned up\n");
2184 }
2185 
2186 MODULE_IMPORT_NS(IDXD);
2187 MODULE_LICENSE("GPL");
2188 MODULE_ALIAS_IDXD_DEVICE(0);
2189 MODULE_AUTHOR("Intel Corporation");
2190 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2191 
2192 module_init(iaa_crypto_init_module);
2193 module_exit(iaa_crypto_cleanup_module);
2194