xref: /linux/drivers/dma/idxd/init.c (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3 #include <linux/init.h>
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/pci.h>
8 #include <linux/interrupt.h>
9 #include <linux/delay.h>
10 #include <linux/dma-mapping.h>
11 #include <linux/workqueue.h>
12 #include <linux/fs.h>
13 #include <linux/io-64-nonatomic-lo-hi.h>
14 #include <linux/device.h>
15 #include <linux/idr.h>
16 #include <linux/iommu.h>
17 #include <uapi/linux/idxd.h>
18 #include <linux/dmaengine.h>
19 #include "../dmaengine.h"
20 #include "registers.h"
21 #include "idxd.h"
22 #include "perfmon.h"
23 
24 MODULE_VERSION(IDXD_DRIVER_VERSION);
25 MODULE_DESCRIPTION("Intel Data Streaming Accelerator and In-Memory Analytics Accelerator common driver");
26 MODULE_LICENSE("GPL v2");
27 MODULE_AUTHOR("Intel Corporation");
28 MODULE_IMPORT_NS("IDXD");
29 
30 static bool sva = true;
31 module_param(sva, bool, 0644);
32 MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
33 
34 bool tc_override;
35 module_param(tc_override, bool, 0644);
36 MODULE_PARM_DESC(tc_override, "Override traffic class defaults");
37 
38 #define DRV_NAME "idxd"
39 
40 bool support_enqcmd;
41 DEFINE_IDA(idxd_ida);
42 
43 static struct idxd_driver_data idxd_driver_data[] = {
44 	[IDXD_TYPE_DSA] = {
45 		.name_prefix = "dsa",
46 		.type = IDXD_TYPE_DSA,
47 		.compl_size = sizeof(struct dsa_completion_record),
48 		.align = 32,
49 		.dev_type = &dsa_device_type,
50 		.evl_cr_off = offsetof(struct dsa_evl_entry, cr),
51 		.user_submission_safe = false, /* See INTEL-SA-01084 security advisory */
52 		.cr_status_off = offsetof(struct dsa_completion_record, status),
53 		.cr_result_off = offsetof(struct dsa_completion_record, result),
54 	},
55 	[IDXD_TYPE_IAX] = {
56 		.name_prefix = "iax",
57 		.type = IDXD_TYPE_IAX,
58 		.compl_size = sizeof(struct iax_completion_record),
59 		.align = 64,
60 		.dev_type = &iax_device_type,
61 		.evl_cr_off = offsetof(struct iax_evl_entry, cr),
62 		.user_submission_safe = false, /* See INTEL-SA-01084 security advisory */
63 		.cr_status_off = offsetof(struct iax_completion_record, status),
64 		.cr_result_off = offsetof(struct iax_completion_record, error_code),
65 		.load_device_defaults = idxd_load_iaa_device_defaults,
66 	},
67 };
68 
69 static struct pci_device_id idxd_pci_tbl[] = {
70 	/* DSA ver 1.0 platforms */
71 	{ PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) },
72 	/* DSA on GNR-D platforms */
73 	{ PCI_DEVICE_DATA(INTEL, DSA_GNRD, &idxd_driver_data[IDXD_TYPE_DSA]) },
74 	/* DSA on DMR platforms */
75 	{ PCI_DEVICE_DATA(INTEL, DSA_DMR, &idxd_driver_data[IDXD_TYPE_DSA]) },
76 
77 	/* IAX ver 1.0 platforms */
78 	{ PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) },
79 	/* IAA on DMR platforms */
80 	{ PCI_DEVICE_DATA(INTEL, IAA_DMR, &idxd_driver_data[IDXD_TYPE_IAX]) },
81 	/* IAA PTL platforms */
82 	{ PCI_DEVICE_DATA(INTEL, IAA_PTL, &idxd_driver_data[IDXD_TYPE_IAX]) },
83 	/* IAA WCL platforms */
84 	{ PCI_DEVICE_DATA(INTEL, IAA_WCL, &idxd_driver_data[IDXD_TYPE_IAX]) },
85 	{ 0, }
86 };
87 MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
88 
89 static int idxd_setup_interrupts(struct idxd_device *idxd)
90 {
91 	struct pci_dev *pdev = idxd->pdev;
92 	struct device *dev = &pdev->dev;
93 	struct idxd_irq_entry *ie;
94 	int i, msixcnt;
95 	int rc = 0;
96 
97 	msixcnt = pci_msix_vec_count(pdev);
98 	if (msixcnt < 0) {
99 		dev_err(dev, "Not MSI-X interrupt capable.\n");
100 		return -ENOSPC;
101 	}
102 	idxd->irq_cnt = msixcnt;
103 
104 	rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
105 	if (rc != msixcnt) {
106 		dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc);
107 		return -ENOSPC;
108 	}
109 	dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
110 
111 
112 	ie = idxd_get_ie(idxd, 0);
113 	ie->vector = pci_irq_vector(pdev, 0);
114 	rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie);
115 	if (rc < 0) {
116 		dev_err(dev, "Failed to allocate misc interrupt.\n");
117 		goto err_misc_irq;
118 	}
119 	dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector);
120 
121 	for (i = 0; i < idxd->max_wqs; i++) {
122 		int msix_idx = i + 1;
123 
124 		ie = idxd_get_ie(idxd, msix_idx);
125 		ie->id = msix_idx;
126 		ie->int_handle = INVALID_INT_HANDLE;
127 		ie->pasid = IOMMU_PASID_INVALID;
128 
129 		spin_lock_init(&ie->list_lock);
130 		init_llist_head(&ie->pending_llist);
131 		INIT_LIST_HEAD(&ie->work_list);
132 	}
133 
134 	idxd_unmask_error_interrupts(idxd);
135 	return 0;
136 
137  err_misc_irq:
138 	idxd_mask_error_interrupts(idxd);
139 	pci_free_irq_vectors(pdev);
140 	dev_err(dev, "No usable interrupts\n");
141 	return rc;
142 }
143 
144 static void idxd_cleanup_interrupts(struct idxd_device *idxd)
145 {
146 	struct pci_dev *pdev = idxd->pdev;
147 	struct idxd_irq_entry *ie;
148 	int msixcnt;
149 
150 	msixcnt = pci_msix_vec_count(pdev);
151 	if (msixcnt <= 0)
152 		return;
153 
154 	ie = idxd_get_ie(idxd, 0);
155 	idxd_mask_error_interrupts(idxd);
156 	free_irq(ie->vector, ie);
157 	pci_free_irq_vectors(pdev);
158 }
159 
160 static void idxd_clean_wqs(struct idxd_device *idxd)
161 {
162 	struct idxd_wq *wq;
163 	struct device *conf_dev;
164 	int i;
165 
166 	for (i = 0; i < idxd->max_wqs; i++) {
167 		wq = idxd->wqs[i];
168 		if (idxd->hw.wq_cap.op_config)
169 			bitmap_free(wq->opcap_bmap);
170 		kfree(wq->wqcfg);
171 		conf_dev = wq_confdev(wq);
172 		put_device(conf_dev);
173 		kfree(wq);
174 	}
175 	bitmap_free(idxd->wq_enable_map);
176 	kfree(idxd->wqs);
177 }
178 
179 static int idxd_setup_wqs(struct idxd_device *idxd)
180 {
181 	struct device *dev = &idxd->pdev->dev;
182 	struct idxd_wq *wq;
183 	struct device *conf_dev;
184 	int i, rc;
185 
186 	idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
187 				 GFP_KERNEL, dev_to_node(dev));
188 	if (!idxd->wqs)
189 		return -ENOMEM;
190 
191 	idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev));
192 	if (!idxd->wq_enable_map) {
193 		rc = -ENOMEM;
194 		goto err_free_wqs;
195 	}
196 
197 	for (i = 0; i < idxd->max_wqs; i++) {
198 		wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
199 		if (!wq) {
200 			rc = -ENOMEM;
201 			goto err_unwind;
202 		}
203 
204 		idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ);
205 		conf_dev = wq_confdev(wq);
206 		wq->id = i;
207 		wq->idxd = idxd;
208 		device_initialize(conf_dev);
209 		conf_dev->parent = idxd_confdev(idxd);
210 		conf_dev->bus = &dsa_bus_type;
211 		conf_dev->type = &idxd_wq_device_type;
212 		rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id);
213 		if (rc < 0) {
214 			put_device(conf_dev);
215 			kfree(wq);
216 			goto err_unwind;
217 		}
218 
219 		mutex_init(&wq->wq_lock);
220 		init_waitqueue_head(&wq->err_queue);
221 		init_completion(&wq->wq_dead);
222 		init_completion(&wq->wq_resurrect);
223 		wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
224 		idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
225 		idxd_wq_set_init_max_sgl_size(idxd, wq);
226 		wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
227 		wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
228 		if (!wq->wqcfg) {
229 			put_device(conf_dev);
230 			kfree(wq);
231 			rc = -ENOMEM;
232 			goto err_unwind;
233 		}
234 
235 		if (idxd->hw.wq_cap.op_config) {
236 			wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL);
237 			if (!wq->opcap_bmap) {
238 				kfree(wq->wqcfg);
239 				put_device(conf_dev);
240 				kfree(wq);
241 				rc = -ENOMEM;
242 				goto err_unwind;
243 			}
244 			bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
245 		}
246 		mutex_init(&wq->uc_lock);
247 		xa_init(&wq->upasid_xa);
248 		idxd->wqs[i] = wq;
249 	}
250 
251 	return 0;
252 
253 err_unwind:
254 	while (--i >= 0) {
255 		wq = idxd->wqs[i];
256 		if (idxd->hw.wq_cap.op_config)
257 			bitmap_free(wq->opcap_bmap);
258 		kfree(wq->wqcfg);
259 		conf_dev = wq_confdev(wq);
260 		put_device(conf_dev);
261 		kfree(wq);
262 	}
263 	bitmap_free(idxd->wq_enable_map);
264 
265 err_free_wqs:
266 	kfree(idxd->wqs);
267 
268 	return rc;
269 }
270 
271 static void idxd_clean_engines(struct idxd_device *idxd)
272 {
273 	struct idxd_engine *engine;
274 	struct device *conf_dev;
275 	int i;
276 
277 	for (i = 0; i < idxd->max_engines; i++) {
278 		engine = idxd->engines[i];
279 		conf_dev = engine_confdev(engine);
280 		put_device(conf_dev);
281 		kfree(engine);
282 	}
283 	kfree(idxd->engines);
284 }
285 
286 static int idxd_setup_engines(struct idxd_device *idxd)
287 {
288 	struct idxd_engine *engine;
289 	struct device *dev = &idxd->pdev->dev;
290 	struct device *conf_dev;
291 	int i, rc;
292 
293 	idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
294 				     GFP_KERNEL, dev_to_node(dev));
295 	if (!idxd->engines)
296 		return -ENOMEM;
297 
298 	for (i = 0; i < idxd->max_engines; i++) {
299 		engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev));
300 		if (!engine) {
301 			rc = -ENOMEM;
302 			goto err;
303 		}
304 
305 		idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE);
306 		conf_dev = engine_confdev(engine);
307 		engine->id = i;
308 		engine->idxd = idxd;
309 		device_initialize(conf_dev);
310 		conf_dev->parent = idxd_confdev(idxd);
311 		conf_dev->bus = &dsa_bus_type;
312 		conf_dev->type = &idxd_engine_device_type;
313 		rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
314 		if (rc < 0) {
315 			put_device(conf_dev);
316 			kfree(engine);
317 			goto err;
318 		}
319 
320 		idxd->engines[i] = engine;
321 	}
322 
323 	return 0;
324 
325  err:
326 	while (--i >= 0) {
327 		engine = idxd->engines[i];
328 		conf_dev = engine_confdev(engine);
329 		put_device(conf_dev);
330 		kfree(engine);
331 	}
332 	kfree(idxd->engines);
333 
334 	return rc;
335 }
336 
337 static void idxd_clean_groups(struct idxd_device *idxd)
338 {
339 	struct idxd_group *group;
340 	int i;
341 
342 	for (i = 0; i < idxd->max_groups; i++) {
343 		group = idxd->groups[i];
344 		put_device(group_confdev(group));
345 		kfree(group);
346 	}
347 	kfree(idxd->groups);
348 }
349 
350 static int idxd_setup_groups(struct idxd_device *idxd)
351 {
352 	struct device *dev = &idxd->pdev->dev;
353 	struct device *conf_dev;
354 	struct idxd_group *group;
355 	int i, rc;
356 
357 	idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *),
358 				    GFP_KERNEL, dev_to_node(dev));
359 	if (!idxd->groups)
360 		return -ENOMEM;
361 
362 	for (i = 0; i < idxd->max_groups; i++) {
363 		group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev));
364 		if (!group) {
365 			rc = -ENOMEM;
366 			goto err;
367 		}
368 
369 		idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP);
370 		conf_dev = group_confdev(group);
371 		group->id = i;
372 		group->idxd = idxd;
373 		device_initialize(conf_dev);
374 		conf_dev->parent = idxd_confdev(idxd);
375 		conf_dev->bus = &dsa_bus_type;
376 		conf_dev->type = &idxd_group_device_type;
377 		rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
378 		if (rc < 0) {
379 			put_device(conf_dev);
380 			kfree(group);
381 			goto err;
382 		}
383 
384 		idxd->groups[i] = group;
385 		if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) {
386 			group->tc_a = 1;
387 			group->tc_b = 1;
388 		} else {
389 			group->tc_a = -1;
390 			group->tc_b = -1;
391 		}
392 		/*
393 		 * The default value is the same as the value of
394 		 * total read buffers in GRPCAP.
395 		 */
396 		group->rdbufs_allowed = idxd->max_rdbufs;
397 	}
398 
399 	return 0;
400 
401  err:
402 	while (--i >= 0) {
403 		group = idxd->groups[i];
404 		put_device(group_confdev(group));
405 		kfree(group);
406 	}
407 	kfree(idxd->groups);
408 
409 	return rc;
410 }
411 
412 static void idxd_cleanup_internals(struct idxd_device *idxd)
413 {
414 	idxd_clean_groups(idxd);
415 	idxd_clean_engines(idxd);
416 	idxd_clean_wqs(idxd);
417 	destroy_workqueue(idxd->wq);
418 }
419 
420 static int idxd_init_evl(struct idxd_device *idxd)
421 {
422 	struct device *dev = &idxd->pdev->dev;
423 	unsigned int evl_cache_size;
424 	struct idxd_evl *evl;
425 	const char *idxd_name;
426 
427 	if (idxd->hw.gen_cap.evl_support == 0)
428 		return 0;
429 
430 	evl = kzalloc_node(sizeof(*evl), GFP_KERNEL, dev_to_node(dev));
431 	if (!evl)
432 		return -ENOMEM;
433 
434 	mutex_init(&evl->lock);
435 	evl->size = IDXD_EVL_SIZE_MIN;
436 
437 	idxd_name = dev_name(idxd_confdev(idxd));
438 	evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd);
439 	/*
440 	 * Since completion record in evl_cache will be copied to user
441 	 * when handling completion record page fault, need to create
442 	 * the cache suitable for user copy.
443 	 */
444 	idxd->evl_cache = kmem_cache_create_usercopy(idxd_name, evl_cache_size,
445 						     0, 0, 0, evl_cache_size,
446 						     NULL);
447 	if (!idxd->evl_cache) {
448 		kfree(evl);
449 		return -ENOMEM;
450 	}
451 
452 	idxd->evl = evl;
453 	return 0;
454 }
455 
456 static int idxd_setup_internals(struct idxd_device *idxd)
457 {
458 	struct device *dev = &idxd->pdev->dev;
459 	int rc;
460 
461 	init_waitqueue_head(&idxd->cmd_waitq);
462 
463 	rc = idxd_setup_wqs(idxd);
464 	if (rc < 0)
465 		goto err_wqs;
466 
467 	rc = idxd_setup_engines(idxd);
468 	if (rc < 0)
469 		goto err_engine;
470 
471 	rc = idxd_setup_groups(idxd);
472 	if (rc < 0)
473 		goto err_group;
474 
475 	idxd->wq = create_workqueue(dev_name(dev));
476 	if (!idxd->wq) {
477 		rc = -ENOMEM;
478 		goto err_wkq_create;
479 	}
480 
481 	rc = idxd_init_evl(idxd);
482 	if (rc < 0)
483 		goto err_evl;
484 
485 	return 0;
486 
487  err_evl:
488 	destroy_workqueue(idxd->wq);
489  err_wkq_create:
490 	idxd_clean_groups(idxd);
491  err_group:
492 	idxd_clean_engines(idxd);
493  err_engine:
494 	idxd_clean_wqs(idxd);
495  err_wqs:
496 	return rc;
497 }
498 
499 static void idxd_read_table_offsets(struct idxd_device *idxd)
500 {
501 	union offsets_reg offsets;
502 	struct device *dev = &idxd->pdev->dev;
503 
504 	offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET);
505 	offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64));
506 	idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT;
507 	dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset);
508 	idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT;
509 	dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", idxd->wqcfg_offset);
510 	idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT;
511 	dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", idxd->msix_perm_offset);
512 	idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT;
513 	dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
514 }
515 
516 void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count)
517 {
518 	int i, j, nr;
519 
520 	for (i = 0, nr = 0; i < count; i++) {
521 		for (j = 0; j < BITS_PER_LONG_LONG; j++) {
522 			if (val[i] & BIT(j))
523 				set_bit(nr, bmap);
524 			nr++;
525 		}
526 	}
527 }
528 
529 static void idxd_read_caps(struct idxd_device *idxd)
530 {
531 	struct device *dev = &idxd->pdev->dev;
532 	int i;
533 
534 	/* reading generic capabilities */
535 	idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
536 	dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
537 
538 	if (idxd->hw.gen_cap.cmd_cap) {
539 		idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET);
540 		dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
541 	}
542 
543 	/* reading command capabilities */
544 	if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))
545 		idxd->request_int_handles = true;
546 
547 	idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
548 	dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
549 	idxd_set_max_batch_size(idxd->data->type, idxd, 1U << idxd->hw.gen_cap.max_batch_shift);
550 	dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size);
551 	if (idxd->hw.gen_cap.config_en)
552 		set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags);
553 
554 	/* reading group capabilities */
555 	idxd->hw.group_cap.bits =
556 		ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET);
557 	dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
558 	idxd->max_groups = idxd->hw.group_cap.num_groups;
559 	dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
560 	idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
561 	dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
562 	idxd->nr_rdbufs = idxd->max_rdbufs;
563 
564 	/* read engine capabilities */
565 	idxd->hw.engine_cap.bits =
566 		ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET);
567 	dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits);
568 	idxd->max_engines = idxd->hw.engine_cap.num_engines;
569 	dev_dbg(dev, "max engines: %u\n", idxd->max_engines);
570 
571 	/* read workqueue capabilities */
572 	idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET);
573 	dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits);
574 	idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size;
575 	dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
576 	idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
577 	dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
578 	idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
579 	dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size);
580 
581 	/* reading operation capabilities */
582 	for (i = 0; i < 4; i++) {
583 		idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base +
584 				IDXD_OPCAP_OFFSET + i * sizeof(u64));
585 		dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
586 	}
587 	multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4);
588 
589 	if (idxd->hw.version >= DEVICE_VERSION_3) {
590 		idxd->hw.dsacap0.bits = ioread64(idxd->reg_base + IDXD_DSACAP0_OFFSET);
591 		idxd->hw.dsacap1.bits = ioread64(idxd->reg_base + IDXD_DSACAP1_OFFSET);
592 		idxd->hw.dsacap2.bits = ioread64(idxd->reg_base + IDXD_DSACAP2_OFFSET);
593 	}
594 	if (idxd_sgl_supported(idxd)) {
595 		idxd->max_sgl_size = 1U << idxd->hw.dsacap0.max_sgl_shift;
596 		dev_dbg(dev, "max sgl size: %u\n", idxd->max_sgl_size);
597 	}
598 
599 	/* read iaa cap */
600 	if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2)
601 		idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET);
602 }
603 
604 static void idxd_free(struct idxd_device *idxd)
605 {
606 	if (!idxd)
607 		return;
608 
609 	put_device(idxd_confdev(idxd));
610 	bitmap_free(idxd->opcap_bmap);
611 	ida_free(&idxd_ida, idxd->id);
612 	kfree(idxd);
613 }
614 
615 static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
616 {
617 	struct device *dev = &pdev->dev;
618 	struct device *conf_dev;
619 	struct idxd_device *idxd;
620 	int rc;
621 
622 	idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev));
623 	if (!idxd)
624 		return NULL;
625 
626 	conf_dev = idxd_confdev(idxd);
627 	idxd->pdev = pdev;
628 	idxd->data = data;
629 	idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type);
630 	idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
631 	if (idxd->id < 0)
632 		goto err_ida;
633 
634 	idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev));
635 	if (!idxd->opcap_bmap)
636 		goto err_opcap;
637 
638 	device_initialize(conf_dev);
639 	conf_dev->parent = dev;
640 	conf_dev->bus = &dsa_bus_type;
641 	conf_dev->type = idxd->data->dev_type;
642 	rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
643 	if (rc < 0)
644 		goto err_name;
645 
646 	spin_lock_init(&idxd->dev_lock);
647 	spin_lock_init(&idxd->cmd_lock);
648 
649 	return idxd;
650 
651 err_name:
652 	put_device(conf_dev);
653 	bitmap_free(idxd->opcap_bmap);
654 err_opcap:
655 	ida_free(&idxd_ida, idxd->id);
656 err_ida:
657 	kfree(idxd);
658 
659 	return NULL;
660 }
661 
662 static int idxd_enable_system_pasid(struct idxd_device *idxd)
663 {
664 	struct pci_dev *pdev = idxd->pdev;
665 	struct device *dev = &pdev->dev;
666 	struct iommu_domain *domain;
667 	ioasid_t pasid;
668 	int ret;
669 
670 	/*
671 	 * Attach a global PASID to the DMA domain so that we can use ENQCMDS
672 	 * to submit work on buffers mapped by DMA API.
673 	 */
674 	domain = iommu_get_domain_for_dev(dev);
675 	if (!domain)
676 		return -EPERM;
677 
678 	pasid = iommu_alloc_global_pasid(dev);
679 	if (pasid == IOMMU_PASID_INVALID)
680 		return -ENOSPC;
681 
682 	/*
683 	 * DMA domain is owned by the driver, it should support all valid
684 	 * types such as DMA-FQ, identity, etc.
685 	 */
686 	ret = iommu_attach_device_pasid(domain, dev, pasid, NULL);
687 	if (ret) {
688 		dev_err(dev, "failed to attach device pasid %d, domain type %d",
689 			pasid, domain->type);
690 		iommu_free_global_pasid(pasid);
691 		return ret;
692 	}
693 
694 	/* Since we set user privilege for kernel DMA, enable completion IRQ */
695 	idxd_set_user_intr(idxd, 1);
696 	idxd->pasid = pasid;
697 
698 	return ret;
699 }
700 
701 static void idxd_disable_system_pasid(struct idxd_device *idxd)
702 {
703 	struct pci_dev *pdev = idxd->pdev;
704 	struct device *dev = &pdev->dev;
705 	struct iommu_domain *domain;
706 
707 	domain = iommu_get_domain_for_dev(dev);
708 	if (!domain)
709 		return;
710 
711 	iommu_detach_device_pasid(domain, dev, idxd->pasid);
712 	iommu_free_global_pasid(idxd->pasid);
713 
714 	idxd_set_user_intr(idxd, 0);
715 	idxd->sva = NULL;
716 	idxd->pasid = IOMMU_PASID_INVALID;
717 }
718 
719 static int idxd_probe(struct idxd_device *idxd)
720 {
721 	struct pci_dev *pdev = idxd->pdev;
722 	struct device *dev = &pdev->dev;
723 	int rc;
724 
725 	dev_dbg(dev, "%s entered and resetting device\n", __func__);
726 	rc = idxd_device_init_reset(idxd);
727 	if (rc < 0)
728 		return rc;
729 
730 	dev_dbg(dev, "IDXD reset complete\n");
731 
732 	if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
733 		set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
734 
735 		rc = idxd_enable_system_pasid(idxd);
736 		if (rc)
737 			dev_warn(dev, "No in-kernel DMA with PASID. %d\n", rc);
738 		else
739 			set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
740 	} else if (!sva) {
741 		dev_warn(dev, "User forced SVA off via module param.\n");
742 	}
743 
744 	idxd_read_caps(idxd);
745 	idxd_read_table_offsets(idxd);
746 
747 	rc = idxd_setup_internals(idxd);
748 	if (rc)
749 		goto err;
750 
751 	/* If the configs are readonly, then load them from device */
752 	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
753 		dev_dbg(dev, "Loading RO device config\n");
754 		rc = idxd_device_load_config(idxd);
755 		if (rc < 0)
756 			goto err_config;
757 	}
758 
759 	rc = idxd_setup_interrupts(idxd);
760 	if (rc)
761 		goto err_config;
762 
763 	idxd->major = idxd_cdev_get_major(idxd);
764 
765 	rc = perfmon_pmu_init(idxd);
766 	if (rc < 0)
767 		dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc);
768 
769 	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
770 	return 0;
771 
772  err_config:
773 	idxd_cleanup_internals(idxd);
774  err:
775 	if (device_pasid_enabled(idxd))
776 		idxd_disable_system_pasid(idxd);
777 	return rc;
778 }
779 
780 static void idxd_cleanup(struct idxd_device *idxd)
781 {
782 	perfmon_pmu_remove(idxd);
783 	idxd_cleanup_interrupts(idxd);
784 	idxd_cleanup_internals(idxd);
785 	if (device_pasid_enabled(idxd))
786 		idxd_disable_system_pasid(idxd);
787 }
788 
789 /*
790  * Attach IDXD device to IDXD driver.
791  */
792 static int idxd_bind(struct device_driver *drv, const char *buf)
793 {
794 	const struct bus_type *bus = drv->bus;
795 	struct device *dev;
796 	int err = -ENODEV;
797 
798 	dev = bus_find_device_by_name(bus, NULL, buf);
799 	if (dev)
800 		err = device_driver_attach(drv, dev);
801 
802 	put_device(dev);
803 
804 	return err;
805 }
806 
807 /*
808  * Detach IDXD device from driver.
809  */
810 static void idxd_unbind(struct device_driver *drv, const char *buf)
811 {
812 	const struct bus_type *bus = drv->bus;
813 	struct device *dev;
814 
815 	dev = bus_find_device_by_name(bus, NULL, buf);
816 	if (dev && dev->driver == drv)
817 		device_release_driver(dev);
818 
819 	put_device(dev);
820 }
821 
822 #define idxd_free_saved_configs(saved_configs, count)	\
823 	do {						\
824 		int i;					\
825 							\
826 		for (i = 0; i < (count); i++)		\
827 			kfree(saved_configs[i]);	\
828 	} while (0)
829 
830 static void idxd_free_saved(struct idxd_group **saved_groups,
831 			    struct idxd_engine **saved_engines,
832 			    struct idxd_wq **saved_wqs,
833 			    struct idxd_device *idxd)
834 {
835 	if (saved_groups)
836 		idxd_free_saved_configs(saved_groups, idxd->max_groups);
837 	if (saved_engines)
838 		idxd_free_saved_configs(saved_engines, idxd->max_engines);
839 	if (saved_wqs)
840 		idxd_free_saved_configs(saved_wqs, idxd->max_wqs);
841 }
842 
843 /*
844  * Save IDXD device configurations including engines, groups, wqs etc.
845  * The saved configurations can be restored when needed.
846  */
847 static int idxd_device_config_save(struct idxd_device *idxd,
848 				   struct idxd_saved_states *idxd_saved)
849 {
850 	struct device *dev = &idxd->pdev->dev;
851 	int i;
852 
853 	memcpy(&idxd_saved->saved_idxd, idxd, sizeof(*idxd));
854 
855 	if (idxd->evl) {
856 		memcpy(&idxd_saved->saved_evl, idxd->evl,
857 		       sizeof(struct idxd_evl));
858 	}
859 
860 	struct idxd_group **saved_groups __free(kfree) =
861 			kcalloc_node(idxd->max_groups,
862 				     sizeof(struct idxd_group *),
863 				     GFP_KERNEL, dev_to_node(dev));
864 	if (!saved_groups)
865 		return -ENOMEM;
866 
867 	for (i = 0; i < idxd->max_groups; i++) {
868 		struct idxd_group *saved_group __free(kfree) =
869 			kzalloc_node(sizeof(*saved_group), GFP_KERNEL,
870 				     dev_to_node(dev));
871 
872 		if (!saved_group) {
873 			/* Free saved groups */
874 			idxd_free_saved(saved_groups, NULL, NULL, idxd);
875 
876 			return -ENOMEM;
877 		}
878 
879 		memcpy(saved_group, idxd->groups[i], sizeof(*saved_group));
880 		saved_groups[i] = no_free_ptr(saved_group);
881 	}
882 
883 	struct idxd_engine **saved_engines =
884 			kcalloc_node(idxd->max_engines,
885 				     sizeof(struct idxd_engine *),
886 				     GFP_KERNEL, dev_to_node(dev));
887 	if (!saved_engines) {
888 		/* Free saved groups */
889 		idxd_free_saved(saved_groups, NULL, NULL, idxd);
890 
891 		return -ENOMEM;
892 	}
893 	for (i = 0; i < idxd->max_engines; i++) {
894 		struct idxd_engine *saved_engine __free(kfree) =
895 				kzalloc_node(sizeof(*saved_engine), GFP_KERNEL,
896 					     dev_to_node(dev));
897 		if (!saved_engine) {
898 			/* Free saved groups and engines */
899 			idxd_free_saved(saved_groups, saved_engines, NULL,
900 					idxd);
901 
902 			return -ENOMEM;
903 		}
904 
905 		memcpy(saved_engine, idxd->engines[i], sizeof(*saved_engine));
906 		saved_engines[i] = no_free_ptr(saved_engine);
907 	}
908 
909 	unsigned long *saved_wq_enable_map __free(bitmap) =
910 			bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL,
911 					   dev_to_node(dev));
912 	if (!saved_wq_enable_map) {
913 		/* Free saved groups and engines */
914 		idxd_free_saved(saved_groups, saved_engines, NULL, idxd);
915 
916 		return -ENOMEM;
917 	}
918 
919 	bitmap_copy(saved_wq_enable_map, idxd->wq_enable_map, idxd->max_wqs);
920 
921 	struct idxd_wq **saved_wqs __free(kfree) =
922 			kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
923 				     GFP_KERNEL, dev_to_node(dev));
924 	if (!saved_wqs) {
925 		/* Free saved groups and engines */
926 		idxd_free_saved(saved_groups, saved_engines, NULL, idxd);
927 
928 		return -ENOMEM;
929 	}
930 
931 	for (i = 0; i < idxd->max_wqs; i++) {
932 		struct idxd_wq *saved_wq __free(kfree) =
933 			kzalloc_node(sizeof(*saved_wq), GFP_KERNEL,
934 				     dev_to_node(dev));
935 		struct idxd_wq *wq;
936 
937 		if (!saved_wq) {
938 			/* Free saved groups, engines, and wqs */
939 			idxd_free_saved(saved_groups, saved_engines, saved_wqs,
940 					idxd);
941 
942 			return -ENOMEM;
943 		}
944 
945 		if (!test_bit(i, saved_wq_enable_map))
946 			continue;
947 
948 		wq = idxd->wqs[i];
949 		mutex_lock(&wq->wq_lock);
950 		memcpy(saved_wq, wq, sizeof(*saved_wq));
951 		saved_wqs[i] = no_free_ptr(saved_wq);
952 		mutex_unlock(&wq->wq_lock);
953 	}
954 
955 	/* Save configurations */
956 	idxd_saved->saved_groups = no_free_ptr(saved_groups);
957 	idxd_saved->saved_engines = no_free_ptr(saved_engines);
958 	idxd_saved->saved_wq_enable_map = no_free_ptr(saved_wq_enable_map);
959 	idxd_saved->saved_wqs = no_free_ptr(saved_wqs);
960 
961 	return 0;
962 }
963 
964 /*
965  * Restore IDXD device configurations including engines, groups, wqs etc
966  * that were saved before.
967  */
968 static void idxd_device_config_restore(struct idxd_device *idxd,
969 				       struct idxd_saved_states *idxd_saved)
970 {
971 	struct idxd_evl *saved_evl = &idxd_saved->saved_evl;
972 	int i;
973 
974 	idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit;
975 
976 	idxd->evl->size = saved_evl->size;
977 
978 	for (i = 0; i < idxd->max_groups; i++) {
979 		struct idxd_group *saved_group, *group;
980 
981 		saved_group = idxd_saved->saved_groups[i];
982 		group = idxd->groups[i];
983 
984 		group->rdbufs_allowed = saved_group->rdbufs_allowed;
985 		group->rdbufs_reserved = saved_group->rdbufs_reserved;
986 		group->tc_a = saved_group->tc_a;
987 		group->tc_b = saved_group->tc_b;
988 		group->use_rdbuf_limit = saved_group->use_rdbuf_limit;
989 
990 		kfree(saved_group);
991 	}
992 	kfree(idxd_saved->saved_groups);
993 
994 	for (i = 0; i < idxd->max_engines; i++) {
995 		struct idxd_engine *saved_engine, *engine;
996 
997 		saved_engine = idxd_saved->saved_engines[i];
998 		engine = idxd->engines[i];
999 
1000 		engine->group = saved_engine->group;
1001 
1002 		kfree(saved_engine);
1003 	}
1004 	kfree(idxd_saved->saved_engines);
1005 
1006 	bitmap_copy(idxd->wq_enable_map, idxd_saved->saved_wq_enable_map,
1007 		    idxd->max_wqs);
1008 	bitmap_free(idxd_saved->saved_wq_enable_map);
1009 
1010 	for (i = 0; i < idxd->max_wqs; i++) {
1011 		struct idxd_wq *saved_wq, *wq;
1012 		size_t len;
1013 
1014 		if (!test_bit(i, idxd->wq_enable_map))
1015 			continue;
1016 
1017 		saved_wq = idxd_saved->saved_wqs[i];
1018 		wq = idxd->wqs[i];
1019 
1020 		mutex_lock(&wq->wq_lock);
1021 
1022 		wq->group = saved_wq->group;
1023 		wq->flags = saved_wq->flags;
1024 		wq->threshold = saved_wq->threshold;
1025 		wq->size = saved_wq->size;
1026 		wq->priority = saved_wq->priority;
1027 		wq->type = saved_wq->type;
1028 		len = strlen(saved_wq->name) + 1;
1029 		strscpy(wq->name, saved_wq->name, len);
1030 		wq->max_xfer_bytes = saved_wq->max_xfer_bytes;
1031 		wq->max_batch_size = saved_wq->max_batch_size;
1032 		wq->enqcmds_retries = saved_wq->enqcmds_retries;
1033 		wq->descs = saved_wq->descs;
1034 		wq->idxd_chan = saved_wq->idxd_chan;
1035 		len = strlen(saved_wq->driver_name) + 1;
1036 		strscpy(wq->driver_name, saved_wq->driver_name, len);
1037 
1038 		mutex_unlock(&wq->wq_lock);
1039 
1040 		kfree(saved_wq);
1041 	}
1042 
1043 	kfree(idxd_saved->saved_wqs);
1044 }
1045 
1046 static void idxd_reset_prepare(struct pci_dev *pdev)
1047 {
1048 	struct idxd_device *idxd = pci_get_drvdata(pdev);
1049 	struct device *dev = &idxd->pdev->dev;
1050 	const char *idxd_name;
1051 	int rc;
1052 
1053 	idxd_name = dev_name(idxd_confdev(idxd));
1054 
1055 	struct idxd_saved_states *idxd_saved __free(kfree) =
1056 			kzalloc_node(sizeof(*idxd_saved), GFP_KERNEL,
1057 				     dev_to_node(&pdev->dev));
1058 	if (!idxd_saved) {
1059 		dev_err(dev, "HALT: no memory\n");
1060 
1061 		return;
1062 	}
1063 
1064 	/* Save IDXD configurations. */
1065 	rc = idxd_device_config_save(idxd, idxd_saved);
1066 	if (rc < 0) {
1067 		dev_err(dev, "HALT: cannot save %s configs\n", idxd_name);
1068 
1069 		return;
1070 	}
1071 
1072 	idxd->idxd_saved = no_free_ptr(idxd_saved);
1073 
1074 	/* Save PCI device state. */
1075 	pci_save_state(idxd->pdev);
1076 }
1077 
1078 static void idxd_reset_done(struct pci_dev *pdev)
1079 {
1080 	struct idxd_device *idxd = pci_get_drvdata(pdev);
1081 	const char *idxd_name;
1082 	struct device *dev;
1083 	int rc, i;
1084 
1085 	if (!idxd->idxd_saved)
1086 		return;
1087 
1088 	dev = &idxd->pdev->dev;
1089 	idxd_name = dev_name(idxd_confdev(idxd));
1090 
1091 	/* Restore PCI device state. */
1092 	pci_restore_state(idxd->pdev);
1093 
1094 	/* Unbind idxd device from driver. */
1095 	idxd_unbind(&idxd_drv.drv, idxd_name);
1096 
1097 	/*
1098 	 * Probe PCI device without allocating or changing
1099 	 * idxd software data which keeps the same as before FLR.
1100 	 */
1101 	idxd_pci_probe_alloc(idxd, NULL, NULL);
1102 
1103 	/* Restore IDXD configurations. */
1104 	idxd_device_config_restore(idxd, idxd->idxd_saved);
1105 
1106 	/* Re-configure IDXD device if allowed. */
1107 	if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
1108 		rc = idxd_device_config(idxd);
1109 		if (rc < 0) {
1110 			dev_err(dev, "HALT: %s config fails\n", idxd_name);
1111 			goto out;
1112 		}
1113 	}
1114 
1115 	/* Bind IDXD device to driver. */
1116 	rc = idxd_bind(&idxd_drv.drv, idxd_name);
1117 	if (rc < 0) {
1118 		dev_err(dev, "HALT: binding %s to driver fails\n", idxd_name);
1119 		goto out;
1120 	}
1121 
1122 	/* Bind enabled wq in the IDXD device to driver. */
1123 	for (i = 0; i < idxd->max_wqs; i++) {
1124 		if (test_bit(i, idxd->wq_enable_map)) {
1125 			struct idxd_wq *wq = idxd->wqs[i];
1126 			char wq_name[32];
1127 
1128 			wq->state = IDXD_WQ_DISABLED;
1129 			sprintf(wq_name, "wq%d.%d", idxd->id, wq->id);
1130 			/*
1131 			 * Bind to user driver depending on wq type.
1132 			 *
1133 			 * Currently only support user type WQ. Will support
1134 			 * kernel type WQ in the future.
1135 			 */
1136 			if (wq->type == IDXD_WQT_USER)
1137 				rc = idxd_bind(&idxd_user_drv.drv, wq_name);
1138 			else
1139 				rc = -EINVAL;
1140 			if (rc < 0) {
1141 				clear_bit(i, idxd->wq_enable_map);
1142 				dev_err(dev,
1143 					"HALT: unable to re-enable wq %s\n",
1144 					dev_name(wq_confdev(wq)));
1145 			}
1146 		}
1147 	}
1148 out:
1149 	kfree(idxd->idxd_saved);
1150 }
1151 
1152 static const struct pci_error_handlers idxd_error_handler = {
1153 	.reset_prepare	= idxd_reset_prepare,
1154 	.reset_done	= idxd_reset_done,
1155 };
1156 
1157 /*
1158  * Probe idxd PCI device.
1159  * If idxd is not given, need to allocate idxd and set up its data.
1160  *
1161  * If idxd is given, idxd was allocated and setup already. Just need to
1162  * configure device without re-allocating and re-configuring idxd data.
1163  * This is useful for recovering from FLR.
1164  */
1165 int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev,
1166 			 const struct pci_device_id *id)
1167 {
1168 	bool alloc_idxd = idxd ? false : true;
1169 	struct idxd_driver_data *data;
1170 	struct device *dev;
1171 	int rc;
1172 
1173 	pdev = idxd ? idxd->pdev : pdev;
1174 	dev = &pdev->dev;
1175 	data = id ? (struct idxd_driver_data *)id->driver_data : NULL;
1176 	rc = pci_enable_device(pdev);
1177 	if (rc)
1178 		return rc;
1179 
1180 	if (alloc_idxd) {
1181 		dev_dbg(dev, "Alloc IDXD context\n");
1182 		idxd = idxd_alloc(pdev, data);
1183 		if (!idxd) {
1184 			rc = -ENOMEM;
1185 			goto err_idxd_alloc;
1186 		}
1187 
1188 		dev_dbg(dev, "Mapping BARs\n");
1189 		idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0);
1190 		if (!idxd->reg_base) {
1191 			rc = -ENOMEM;
1192 			goto err_iomap;
1193 		}
1194 
1195 		dev_dbg(dev, "Set DMA masks\n");
1196 		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
1197 		if (rc)
1198 			goto err;
1199 	}
1200 
1201 	dev_dbg(dev, "Set PCI master\n");
1202 	pci_set_master(pdev);
1203 	pci_set_drvdata(pdev, idxd);
1204 
1205 	if (alloc_idxd) {
1206 		idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET);
1207 		rc = idxd_probe(idxd);
1208 		if (rc) {
1209 			dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
1210 			goto err;
1211 		}
1212 
1213 		if (data->load_device_defaults) {
1214 			rc = data->load_device_defaults(idxd);
1215 			if (rc)
1216 				dev_warn(dev, "IDXD loading device defaults failed\n");
1217 		}
1218 
1219 		rc = idxd_register_devices(idxd);
1220 		if (rc) {
1221 			dev_err(dev, "IDXD sysfs setup failed\n");
1222 			goto err_dev_register;
1223 		}
1224 
1225 		rc = idxd_device_init_debugfs(idxd);
1226 		if (rc)
1227 			dev_warn(dev, "IDXD debugfs failed to setup\n");
1228 	}
1229 
1230 	if (!alloc_idxd) {
1231 		/* Release interrupts in the IDXD device. */
1232 		idxd_cleanup_interrupts(idxd);
1233 
1234 		/* Re-enable interrupts in the IDXD device. */
1235 		rc = idxd_setup_interrupts(idxd);
1236 		if (rc)
1237 			dev_warn(dev, "IDXD interrupts failed to setup\n");
1238 	}
1239 
1240 	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
1241 		 idxd->hw.version);
1242 
1243 	if (data)
1244 		idxd->user_submission_safe = data->user_submission_safe;
1245 
1246 	return 0;
1247 
1248  err_dev_register:
1249 	idxd_cleanup(idxd);
1250  err:
1251 	pci_iounmap(pdev, idxd->reg_base);
1252  err_iomap:
1253 	idxd_free(idxd);
1254  err_idxd_alloc:
1255 	pci_disable_device(pdev);
1256 	return rc;
1257 }
1258 
1259 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1260 {
1261 	return idxd_pci_probe_alloc(NULL, pdev, id);
1262 }
1263 
1264 void idxd_wqs_quiesce(struct idxd_device *idxd)
1265 {
1266 	struct idxd_wq *wq;
1267 	int i;
1268 
1269 	for (i = 0; i < idxd->max_wqs; i++) {
1270 		wq = idxd->wqs[i];
1271 		if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL)
1272 			idxd_wq_quiesce(wq);
1273 	}
1274 }
1275 
1276 static void idxd_shutdown(struct pci_dev *pdev)
1277 {
1278 	struct idxd_device *idxd = pci_get_drvdata(pdev);
1279 	struct idxd_irq_entry *irq_entry;
1280 	int rc;
1281 
1282 	rc = idxd_device_disable(idxd);
1283 	if (rc)
1284 		dev_err(&pdev->dev, "Disabling device failed\n");
1285 
1286 	irq_entry = &idxd->ie;
1287 	synchronize_irq(irq_entry->vector);
1288 	idxd_mask_error_interrupts(idxd);
1289 	flush_workqueue(idxd->wq);
1290 }
1291 
1292 static void idxd_remove(struct pci_dev *pdev)
1293 {
1294 	struct idxd_device *idxd = pci_get_drvdata(pdev);
1295 
1296 	idxd_unregister_devices(idxd);
1297 	/*
1298 	 * When ->release() is called for the idxd->conf_dev, it frees all the memory related
1299 	 * to the idxd context. The driver still needs those bits in order to do the rest of
1300 	 * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref
1301 	 * on the device here to hold off the freeing while allowing the idxd sub-driver
1302 	 * to unbind.
1303 	 */
1304 	get_device(idxd_confdev(idxd));
1305 	device_unregister(idxd_confdev(idxd));
1306 	idxd_shutdown(pdev);
1307 	idxd_device_remove_debugfs(idxd);
1308 	perfmon_pmu_remove(idxd);
1309 	idxd_cleanup_interrupts(idxd);
1310 	if (device_pasid_enabled(idxd))
1311 		idxd_disable_system_pasid(idxd);
1312 	pci_iounmap(pdev, idxd->reg_base);
1313 	put_device(idxd_confdev(idxd));
1314 	pci_disable_device(pdev);
1315 }
1316 
1317 static struct pci_driver idxd_pci_driver = {
1318 	.name		= DRV_NAME,
1319 	.id_table	= idxd_pci_tbl,
1320 	.probe		= idxd_pci_probe,
1321 	.remove		= idxd_remove,
1322 	.shutdown	= idxd_shutdown,
1323 	.err_handler	= &idxd_error_handler,
1324 };
1325 
1326 static int __init idxd_init_module(void)
1327 {
1328 	int err;
1329 
1330 	/*
1331 	 * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
1332 	 * enumerating the device. We can not utilize it.
1333 	 */
1334 	if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
1335 		pr_warn("idxd driver failed to load without MOVDIR64B.\n");
1336 		return -ENODEV;
1337 	}
1338 
1339 	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
1340 		pr_warn("Platform does not have ENQCMD(S) support.\n");
1341 	else
1342 		support_enqcmd = true;
1343 
1344 	err = idxd_driver_register(&idxd_drv);
1345 	if (err < 0)
1346 		goto err_idxd_driver_register;
1347 
1348 	err = idxd_driver_register(&idxd_dmaengine_drv);
1349 	if (err < 0)
1350 		goto err_idxd_dmaengine_driver_register;
1351 
1352 	err = idxd_driver_register(&idxd_user_drv);
1353 	if (err < 0)
1354 		goto err_idxd_user_driver_register;
1355 
1356 	err = idxd_cdev_register();
1357 	if (err)
1358 		goto err_cdev_register;
1359 
1360 	err = idxd_init_debugfs();
1361 	if (err)
1362 		goto err_debugfs;
1363 
1364 	err = pci_register_driver(&idxd_pci_driver);
1365 	if (err)
1366 		goto err_pci_register;
1367 
1368 	return 0;
1369 
1370 err_pci_register:
1371 	idxd_remove_debugfs();
1372 err_debugfs:
1373 	idxd_cdev_remove();
1374 err_cdev_register:
1375 	idxd_driver_unregister(&idxd_user_drv);
1376 err_idxd_user_driver_register:
1377 	idxd_driver_unregister(&idxd_dmaengine_drv);
1378 err_idxd_dmaengine_driver_register:
1379 	idxd_driver_unregister(&idxd_drv);
1380 err_idxd_driver_register:
1381 	return err;
1382 }
1383 module_init(idxd_init_module);
1384 
1385 static void __exit idxd_exit_module(void)
1386 {
1387 	idxd_driver_unregister(&idxd_user_drv);
1388 	idxd_driver_unregister(&idxd_dmaengine_drv);
1389 	idxd_driver_unregister(&idxd_drv);
1390 	pci_unregister_driver(&idxd_pci_driver);
1391 	idxd_cdev_remove();
1392 	idxd_remove_debugfs();
1393 }
1394 module_exit(idxd_exit_module);
1395