xref: /linux/drivers/dma/idxd/init.c (revision 93a40a6d7428921897bb7fed5ffb4ce83df05432)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3 #include <linux/init.h>
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/pci.h>
8 #include <linux/interrupt.h>
9 #include <linux/delay.h>
10 #include <linux/dma-mapping.h>
11 #include <linux/workqueue.h>
12 #include <linux/aer.h>
13 #include <linux/fs.h>
14 #include <linux/io-64-nonatomic-lo-hi.h>
15 #include <linux/device.h>
16 #include <linux/idr.h>
17 #include <linux/intel-svm.h>
18 #include <linux/iommu.h>
19 #include <uapi/linux/idxd.h>
20 #include <linux/dmaengine.h>
21 #include "../dmaengine.h"
22 #include "registers.h"
23 #include "idxd.h"
24 
25 MODULE_VERSION(IDXD_DRIVER_VERSION);
26 MODULE_LICENSE("GPL v2");
27 MODULE_AUTHOR("Intel Corporation");
28 
29 static bool sva = true;
30 module_param(sva, bool, 0644);
31 MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
32 
33 #define DRV_NAME "idxd"
34 
35 bool support_enqcmd;
36 DEFINE_IDA(idxd_ida);
37 
38 static struct idxd_driver_data idxd_driver_data[] = {
39 	[IDXD_TYPE_DSA] = {
40 		.name_prefix = "dsa",
41 		.type = IDXD_TYPE_DSA,
42 		.compl_size = sizeof(struct dsa_completion_record),
43 		.align = 32,
44 		.dev_type = &dsa_device_type,
45 	},
46 	[IDXD_TYPE_IAX] = {
47 		.name_prefix = "iax",
48 		.type = IDXD_TYPE_IAX,
49 		.compl_size = sizeof(struct iax_completion_record),
50 		.align = 64,
51 		.dev_type = &iax_device_type,
52 	},
53 };
54 
55 static struct pci_device_id idxd_pci_tbl[] = {
56 	/* DSA ver 1.0 platforms */
57 	{ PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) },
58 
59 	/* IAX ver 1.0 platforms */
60 	{ PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) },
61 	{ 0, }
62 };
63 MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
64 
65 static int idxd_setup_interrupts(struct idxd_device *idxd)
66 {
67 	struct pci_dev *pdev = idxd->pdev;
68 	struct device *dev = &pdev->dev;
69 	struct idxd_irq_entry *irq_entry;
70 	int i, msixcnt;
71 	int rc = 0;
72 
73 	msixcnt = pci_msix_vec_count(pdev);
74 	if (msixcnt < 0) {
75 		dev_err(dev, "Not MSI-X interrupt capable.\n");
76 		return -ENOSPC;
77 	}
78 
79 	rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
80 	if (rc != msixcnt) {
81 		dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc);
82 		return -ENOSPC;
83 	}
84 	dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
85 
86 	/*
87 	 * We implement 1 completion list per MSI-X entry except for
88 	 * entry 0, which is for errors and others.
89 	 */
90 	idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
91 					 GFP_KERNEL, dev_to_node(dev));
92 	if (!idxd->irq_entries) {
93 		rc = -ENOMEM;
94 		goto err_irq_entries;
95 	}
96 
97 	for (i = 0; i < msixcnt; i++) {
98 		idxd->irq_entries[i].id = i;
99 		idxd->irq_entries[i].idxd = idxd;
100 		idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
101 		spin_lock_init(&idxd->irq_entries[i].list_lock);
102 	}
103 
104 	irq_entry = &idxd->irq_entries[0];
105 	rc = request_threaded_irq(irq_entry->vector, idxd_irq_handler, idxd_misc_thread,
106 				  0, "idxd-misc", irq_entry);
107 	if (rc < 0) {
108 		dev_err(dev, "Failed to allocate misc interrupt.\n");
109 		goto err_misc_irq;
110 	}
111 
112 	dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
113 
114 	/* first MSI-X entry is not for wq interrupts */
115 	idxd->num_wq_irqs = msixcnt - 1;
116 
117 	for (i = 1; i < msixcnt; i++) {
118 		irq_entry = &idxd->irq_entries[i];
119 
120 		init_llist_head(&idxd->irq_entries[i].pending_llist);
121 		INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
122 		rc = request_threaded_irq(irq_entry->vector, idxd_irq_handler,
123 					  idxd_wq_thread, 0, "idxd-portal", irq_entry);
124 		if (rc < 0) {
125 			dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
126 			goto err_wq_irqs;
127 		}
128 		dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
129 	}
130 
131 	idxd_unmask_error_interrupts(idxd);
132 	idxd_msix_perm_setup(idxd);
133 	return 0;
134 
135  err_wq_irqs:
136 	while (--i >= 0) {
137 		irq_entry = &idxd->irq_entries[i];
138 		free_irq(irq_entry->vector, irq_entry);
139 	}
140  err_misc_irq:
141 	/* Disable error interrupt generation */
142 	idxd_mask_error_interrupts(idxd);
143  err_irq_entries:
144 	pci_free_irq_vectors(pdev);
145 	dev_err(dev, "No usable interrupts\n");
146 	return rc;
147 }
148 
149 static int idxd_setup_wqs(struct idxd_device *idxd)
150 {
151 	struct device *dev = &idxd->pdev->dev;
152 	struct idxd_wq *wq;
153 	int i, rc;
154 
155 	idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
156 				 GFP_KERNEL, dev_to_node(dev));
157 	if (!idxd->wqs)
158 		return -ENOMEM;
159 
160 	for (i = 0; i < idxd->max_wqs; i++) {
161 		wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
162 		if (!wq) {
163 			rc = -ENOMEM;
164 			goto err;
165 		}
166 
167 		wq->id = i;
168 		wq->idxd = idxd;
169 		device_initialize(&wq->conf_dev);
170 		wq->conf_dev.parent = &idxd->conf_dev;
171 		wq->conf_dev.bus = &dsa_bus_type;
172 		wq->conf_dev.type = &idxd_wq_device_type;
173 		rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
174 		if (rc < 0) {
175 			put_device(&wq->conf_dev);
176 			goto err;
177 		}
178 
179 		mutex_init(&wq->wq_lock);
180 		init_waitqueue_head(&wq->err_queue);
181 		init_completion(&wq->wq_dead);
182 		wq->max_xfer_bytes = idxd->max_xfer_bytes;
183 		wq->max_batch_size = idxd->max_batch_size;
184 		wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
185 		if (!wq->wqcfg) {
186 			put_device(&wq->conf_dev);
187 			rc = -ENOMEM;
188 			goto err;
189 		}
190 		idxd->wqs[i] = wq;
191 	}
192 
193 	return 0;
194 
195  err:
196 	while (--i >= 0)
197 		put_device(&idxd->wqs[i]->conf_dev);
198 	return rc;
199 }
200 
201 static int idxd_setup_engines(struct idxd_device *idxd)
202 {
203 	struct idxd_engine *engine;
204 	struct device *dev = &idxd->pdev->dev;
205 	int i, rc;
206 
207 	idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
208 				     GFP_KERNEL, dev_to_node(dev));
209 	if (!idxd->engines)
210 		return -ENOMEM;
211 
212 	for (i = 0; i < idxd->max_engines; i++) {
213 		engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev));
214 		if (!engine) {
215 			rc = -ENOMEM;
216 			goto err;
217 		}
218 
219 		engine->id = i;
220 		engine->idxd = idxd;
221 		device_initialize(&engine->conf_dev);
222 		engine->conf_dev.parent = &idxd->conf_dev;
223 		engine->conf_dev.type = &idxd_engine_device_type;
224 		rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
225 		if (rc < 0) {
226 			put_device(&engine->conf_dev);
227 			goto err;
228 		}
229 
230 		idxd->engines[i] = engine;
231 	}
232 
233 	return 0;
234 
235  err:
236 	while (--i >= 0)
237 		put_device(&idxd->engines[i]->conf_dev);
238 	return rc;
239 }
240 
241 static int idxd_setup_groups(struct idxd_device *idxd)
242 {
243 	struct device *dev = &idxd->pdev->dev;
244 	struct idxd_group *group;
245 	int i, rc;
246 
247 	idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *),
248 				    GFP_KERNEL, dev_to_node(dev));
249 	if (!idxd->groups)
250 		return -ENOMEM;
251 
252 	for (i = 0; i < idxd->max_groups; i++) {
253 		group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev));
254 		if (!group) {
255 			rc = -ENOMEM;
256 			goto err;
257 		}
258 
259 		group->id = i;
260 		group->idxd = idxd;
261 		device_initialize(&group->conf_dev);
262 		group->conf_dev.parent = &idxd->conf_dev;
263 		group->conf_dev.bus = &dsa_bus_type;
264 		group->conf_dev.type = &idxd_group_device_type;
265 		rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
266 		if (rc < 0) {
267 			put_device(&group->conf_dev);
268 			goto err;
269 		}
270 
271 		idxd->groups[i] = group;
272 		group->tc_a = -1;
273 		group->tc_b = -1;
274 	}
275 
276 	return 0;
277 
278  err:
279 	while (--i >= 0)
280 		put_device(&idxd->groups[i]->conf_dev);
281 	return rc;
282 }
283 
284 static int idxd_setup_internals(struct idxd_device *idxd)
285 {
286 	struct device *dev = &idxd->pdev->dev;
287 	int rc, i;
288 
289 	init_waitqueue_head(&idxd->cmd_waitq);
290 
291 	rc = idxd_setup_wqs(idxd);
292 	if (rc < 0)
293 		return rc;
294 
295 	rc = idxd_setup_engines(idxd);
296 	if (rc < 0)
297 		goto err_engine;
298 
299 	rc = idxd_setup_groups(idxd);
300 	if (rc < 0)
301 		goto err_group;
302 
303 	idxd->wq = create_workqueue(dev_name(dev));
304 	if (!idxd->wq) {
305 		rc = -ENOMEM;
306 		goto err_wkq_create;
307 	}
308 
309 	return 0;
310 
311  err_wkq_create:
312 	for (i = 0; i < idxd->max_groups; i++)
313 		put_device(&idxd->groups[i]->conf_dev);
314  err_group:
315 	for (i = 0; i < idxd->max_engines; i++)
316 		put_device(&idxd->engines[i]->conf_dev);
317  err_engine:
318 	for (i = 0; i < idxd->max_wqs; i++)
319 		put_device(&idxd->wqs[i]->conf_dev);
320 	return rc;
321 }
322 
323 static void idxd_read_table_offsets(struct idxd_device *idxd)
324 {
325 	union offsets_reg offsets;
326 	struct device *dev = &idxd->pdev->dev;
327 
328 	offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET);
329 	offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64));
330 	idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT;
331 	dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset);
332 	idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT;
333 	dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", idxd->wqcfg_offset);
334 	idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT;
335 	dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", idxd->msix_perm_offset);
336 	idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT;
337 	dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
338 }
339 
340 static void idxd_read_caps(struct idxd_device *idxd)
341 {
342 	struct device *dev = &idxd->pdev->dev;
343 	int i;
344 
345 	/* reading generic capabilities */
346 	idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
347 	dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
348 	idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
349 	dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
350 	idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
351 	dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size);
352 	if (idxd->hw.gen_cap.config_en)
353 		set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags);
354 
355 	/* reading group capabilities */
356 	idxd->hw.group_cap.bits =
357 		ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET);
358 	dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
359 	idxd->max_groups = idxd->hw.group_cap.num_groups;
360 	dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
361 	idxd->max_tokens = idxd->hw.group_cap.total_tokens;
362 	dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
363 	idxd->nr_tokens = idxd->max_tokens;
364 
365 	/* read engine capabilities */
366 	idxd->hw.engine_cap.bits =
367 		ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET);
368 	dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits);
369 	idxd->max_engines = idxd->hw.engine_cap.num_engines;
370 	dev_dbg(dev, "max engines: %u\n", idxd->max_engines);
371 
372 	/* read workqueue capabilities */
373 	idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET);
374 	dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits);
375 	idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size;
376 	dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
377 	idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
378 	dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
379 	idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
380 	dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size);
381 
382 	/* reading operation capabilities */
383 	for (i = 0; i < 4; i++) {
384 		idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base +
385 				IDXD_OPCAP_OFFSET + i * sizeof(u64));
386 		dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
387 	}
388 }
389 
390 static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
391 {
392 	struct device *dev = &pdev->dev;
393 	struct idxd_device *idxd;
394 	int rc;
395 
396 	idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev));
397 	if (!idxd)
398 		return NULL;
399 
400 	idxd->pdev = pdev;
401 	idxd->data = data;
402 	idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
403 	if (idxd->id < 0)
404 		return NULL;
405 
406 	device_initialize(&idxd->conf_dev);
407 	idxd->conf_dev.parent = dev;
408 	idxd->conf_dev.bus = &dsa_bus_type;
409 	idxd->conf_dev.type = idxd->data->dev_type;
410 	rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
411 	if (rc < 0) {
412 		put_device(&idxd->conf_dev);
413 		return NULL;
414 	}
415 
416 	spin_lock_init(&idxd->dev_lock);
417 
418 	return idxd;
419 }
420 
421 static int idxd_enable_system_pasid(struct idxd_device *idxd)
422 {
423 	int flags;
424 	unsigned int pasid;
425 	struct iommu_sva *sva;
426 
427 	flags = SVM_FLAG_SUPERVISOR_MODE;
428 
429 	sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
430 	if (IS_ERR(sva)) {
431 		dev_warn(&idxd->pdev->dev,
432 			 "iommu sva bind failed: %ld\n", PTR_ERR(sva));
433 		return PTR_ERR(sva);
434 	}
435 
436 	pasid = iommu_sva_get_pasid(sva);
437 	if (pasid == IOMMU_PASID_INVALID) {
438 		iommu_sva_unbind_device(sva);
439 		return -ENODEV;
440 	}
441 
442 	idxd->sva = sva;
443 	idxd->pasid = pasid;
444 	dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
445 	return 0;
446 }
447 
448 static void idxd_disable_system_pasid(struct idxd_device *idxd)
449 {
450 
451 	iommu_sva_unbind_device(idxd->sva);
452 	idxd->sva = NULL;
453 }
454 
455 static int idxd_probe(struct idxd_device *idxd)
456 {
457 	struct pci_dev *pdev = idxd->pdev;
458 	struct device *dev = &pdev->dev;
459 	int rc;
460 
461 	dev_dbg(dev, "%s entered and resetting device\n", __func__);
462 	rc = idxd_device_init_reset(idxd);
463 	if (rc < 0)
464 		return rc;
465 
466 	dev_dbg(dev, "IDXD reset complete\n");
467 
468 	if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
469 		rc = idxd_enable_system_pasid(idxd);
470 		if (rc < 0)
471 			dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
472 		else
473 			set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
474 	} else if (!sva) {
475 		dev_warn(dev, "User forced SVA off via module param.\n");
476 	}
477 
478 	idxd_read_caps(idxd);
479 	idxd_read_table_offsets(idxd);
480 
481 	rc = idxd_setup_internals(idxd);
482 	if (rc)
483 		goto err;
484 
485 	rc = idxd_setup_interrupts(idxd);
486 	if (rc)
487 		goto err;
488 
489 	dev_dbg(dev, "IDXD interrupt setup complete.\n");
490 
491 	idxd->major = idxd_cdev_get_major(idxd);
492 
493 	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
494 	return 0;
495 
496  err:
497 	if (device_pasid_enabled(idxd))
498 		idxd_disable_system_pasid(idxd);
499 	return rc;
500 }
501 
502 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
503 {
504 	struct device *dev = &pdev->dev;
505 	struct idxd_device *idxd;
506 	struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data;
507 	int rc;
508 
509 	rc = pci_enable_device(pdev);
510 	if (rc)
511 		return rc;
512 
513 	dev_dbg(dev, "Alloc IDXD context\n");
514 	idxd = idxd_alloc(pdev, data);
515 	if (!idxd) {
516 		rc = -ENOMEM;
517 		goto err_idxd_alloc;
518 	}
519 
520 	dev_dbg(dev, "Mapping BARs\n");
521 	idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0);
522 	if (!idxd->reg_base) {
523 		rc = -ENOMEM;
524 		goto err_iomap;
525 	}
526 
527 	dev_dbg(dev, "Set DMA masks\n");
528 	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
529 	if (rc)
530 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
531 	if (rc)
532 		goto err;
533 
534 	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
535 	if (rc)
536 		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
537 	if (rc)
538 		goto err;
539 
540 	dev_dbg(dev, "Set PCI master\n");
541 	pci_set_master(pdev);
542 	pci_set_drvdata(pdev, idxd);
543 
544 	idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET);
545 	rc = idxd_probe(idxd);
546 	if (rc) {
547 		dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
548 		goto err;
549 	}
550 
551 	rc = idxd_register_devices(idxd);
552 	if (rc) {
553 		dev_err(dev, "IDXD sysfs setup failed\n");
554 		goto err;
555 	}
556 
557 	idxd->state = IDXD_DEV_CONF_READY;
558 
559 	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
560 		 idxd->hw.version);
561 
562 	return 0;
563 
564  err:
565 	pci_iounmap(pdev, idxd->reg_base);
566  err_iomap:
567 	put_device(&idxd->conf_dev);
568  err_idxd_alloc:
569 	pci_disable_device(pdev);
570 	return rc;
571 }
572 
573 static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
574 {
575 	struct idxd_desc *desc, *itr;
576 	struct llist_node *head;
577 
578 	head = llist_del_all(&ie->pending_llist);
579 	if (!head)
580 		return;
581 
582 	llist_for_each_entry_safe(desc, itr, head, llnode) {
583 		idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
584 		idxd_free_desc(desc->wq, desc);
585 	}
586 }
587 
588 static void idxd_flush_work_list(struct idxd_irq_entry *ie)
589 {
590 	struct idxd_desc *desc, *iter;
591 
592 	list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
593 		list_del(&desc->list);
594 		idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
595 		idxd_free_desc(desc->wq, desc);
596 	}
597 }
598 
599 static void idxd_shutdown(struct pci_dev *pdev)
600 {
601 	struct idxd_device *idxd = pci_get_drvdata(pdev);
602 	int rc, i;
603 	struct idxd_irq_entry *irq_entry;
604 	int msixcnt = pci_msix_vec_count(pdev);
605 
606 	rc = idxd_device_disable(idxd);
607 	if (rc)
608 		dev_err(&pdev->dev, "Disabling device failed\n");
609 
610 	dev_dbg(&pdev->dev, "%s called\n", __func__);
611 	idxd_mask_msix_vectors(idxd);
612 	idxd_mask_error_interrupts(idxd);
613 
614 	for (i = 0; i < msixcnt; i++) {
615 		irq_entry = &idxd->irq_entries[i];
616 		synchronize_irq(irq_entry->vector);
617 		free_irq(irq_entry->vector, irq_entry);
618 		if (i == 0)
619 			continue;
620 		idxd_flush_pending_llist(irq_entry);
621 		idxd_flush_work_list(irq_entry);
622 	}
623 
624 	idxd_msix_perm_clear(idxd);
625 	pci_free_irq_vectors(pdev);
626 	pci_iounmap(pdev, idxd->reg_base);
627 	pci_disable_device(pdev);
628 	destroy_workqueue(idxd->wq);
629 }
630 
631 static void idxd_remove(struct pci_dev *pdev)
632 {
633 	struct idxd_device *idxd = pci_get_drvdata(pdev);
634 
635 	dev_dbg(&pdev->dev, "%s called\n", __func__);
636 	idxd_shutdown(pdev);
637 	if (device_pasid_enabled(idxd))
638 		idxd_disable_system_pasid(idxd);
639 	idxd_unregister_devices(idxd);
640 }
641 
642 static struct pci_driver idxd_pci_driver = {
643 	.name		= DRV_NAME,
644 	.id_table	= idxd_pci_tbl,
645 	.probe		= idxd_pci_probe,
646 	.remove		= idxd_remove,
647 	.shutdown	= idxd_shutdown,
648 };
649 
650 static int __init idxd_init_module(void)
651 {
652 	int err;
653 
654 	/*
655 	 * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
656 	 * enumerating the device. We can not utilize it.
657 	 */
658 	if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) {
659 		pr_warn("idxd driver failed to load without MOVDIR64B.\n");
660 		return -ENODEV;
661 	}
662 
663 	if (!boot_cpu_has(X86_FEATURE_ENQCMD))
664 		pr_warn("Platform does not have ENQCMD(S) support.\n");
665 	else
666 		support_enqcmd = true;
667 
668 	err = idxd_register_bus_type();
669 	if (err < 0)
670 		return err;
671 
672 	err = idxd_register_driver();
673 	if (err < 0)
674 		goto err_idxd_driver_register;
675 
676 	err = idxd_cdev_register();
677 	if (err)
678 		goto err_cdev_register;
679 
680 	err = pci_register_driver(&idxd_pci_driver);
681 	if (err)
682 		goto err_pci_register;
683 
684 	return 0;
685 
686 err_pci_register:
687 	idxd_cdev_remove();
688 err_cdev_register:
689 	idxd_unregister_driver();
690 err_idxd_driver_register:
691 	idxd_unregister_bus_type();
692 	return err;
693 }
694 module_init(idxd_init_module);
695 
696 static void __exit idxd_exit_module(void)
697 {
698 	pci_unregister_driver(&idxd_pci_driver);
699 	idxd_cdev_remove();
700 	idxd_unregister_bus_type();
701 }
702 module_exit(idxd_exit_module);
703