xref: /linux/drivers/s390/crypto/vfio_ap_ops.c (revision 6beeaf48db6c548fcfc2ad32739d33af2fef3a5b)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Adjunct processor matrix VFIO device driver callbacks.
4  *
5  * Copyright IBM Corp. 2018
6  *
7  * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8  *	      Halil Pasic <pasic@linux.ibm.com>
9  *	      Pierre Morel <pmorel@linux.ibm.com>
10  */
11 #include <linux/string.h>
12 #include <linux/vfio.h>
13 #include <linux/device.h>
14 #include <linux/list.h>
15 #include <linux/ctype.h>
16 #include <linux/bitops.h>
17 #include <linux/kvm_host.h>
18 #include <linux/module.h>
19 #include <asm/kvm.h>
20 #include <asm/zcrypt.h>
21 
22 #include "vfio_ap_private.h"
23 
24 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
25 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
26 
27 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
28 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
29 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
30 
31 static int match_apqn(struct device *dev, const void *data)
32 {
33 	struct vfio_ap_queue *q = dev_get_drvdata(dev);
34 
35 	return (q->apqn == *(int *)(data)) ? 1 : 0;
36 }
37 
38 /**
39  * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
40  * @matrix_mdev: the associated mediated matrix
41  * @apqn: The queue APQN
42  *
43  * Retrieve a queue with a specific APQN from the list of the
44  * devices of the vfio_ap_drv.
45  * Verify that the APID and the APQI are set in the matrix.
46  *
47  * Return: the pointer to the associated vfio_ap_queue
48  */
49 static struct vfio_ap_queue *vfio_ap_get_queue(
50 					struct ap_matrix_mdev *matrix_mdev,
51 					int apqn)
52 {
53 	struct vfio_ap_queue *q;
54 
55 	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
56 		return NULL;
57 	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
58 		return NULL;
59 
60 	q = vfio_ap_find_queue(apqn);
61 	if (q)
62 		q->matrix_mdev = matrix_mdev;
63 
64 	return q;
65 }
66 
67 /**
68  * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
69  * @apqn: The AP Queue number
70  *
71  * Checks the IRQ bit for the status of this APQN using ap_tapq.
72  * Returns if the ap_tapq function succeeded and the bit is clear.
73  * Returns if ap_tapq function failed with invalid, deconfigured or
74  * checkstopped AP.
75  * Otherwise retries up to 5 times after waiting 20ms.
76  */
77 static void vfio_ap_wait_for_irqclear(int apqn)
78 {
79 	struct ap_queue_status status;
80 	int retry = 5;
81 
82 	do {
83 		status = ap_tapq(apqn, NULL);
84 		switch (status.response_code) {
85 		case AP_RESPONSE_NORMAL:
86 		case AP_RESPONSE_RESET_IN_PROGRESS:
87 			if (!status.irq_enabled)
88 				return;
89 			fallthrough;
90 		case AP_RESPONSE_BUSY:
91 			msleep(20);
92 			break;
93 		case AP_RESPONSE_Q_NOT_AVAIL:
94 		case AP_RESPONSE_DECONFIGURED:
95 		case AP_RESPONSE_CHECKSTOPPED:
96 		default:
97 			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
98 				  status.response_code, apqn);
99 			return;
100 		}
101 	} while (--retry);
102 
103 	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
104 		  __func__, status.response_code, apqn);
105 }
106 
107 /**
108  * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
109  * @q: The vfio_ap_queue
110  *
111  * Unregisters the ISC in the GIB when the saved ISC not invalid.
112  * Unpins the guest's page holding the NIB when it exists.
113  * Resets the saved_pfn and saved_isc to invalid values.
114  */
115 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
116 {
117 	if (!q)
118 		return;
119 	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
120 	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
121 		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
122 		q->saved_isc = VFIO_AP_ISC_INVALID;
123 	}
124 	if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
125 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
126 				 &q->saved_pfn, 1);
127 		q->saved_pfn = 0;
128 	}
129 }
130 
131 /**
132  * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
133  * @q: The vfio_ap_queue
134  *
135  * Uses ap_aqic to disable the interruption and in case of success, reset
136  * in progress or IRQ disable command already proceeded: calls
137  * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
138  * and calls vfio_ap_free_aqic_resources() to free the resources associated
139  * with the AP interrupt handling.
140  *
141  * In the case the AP is busy, or a reset is in progress,
142  * retries after 20ms, up to 5 times.
143  *
144  * Returns if ap_aqic function failed with invalid, deconfigured or
145  * checkstopped AP.
146  *
147  * Return: &struct ap_queue_status
148  */
149 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
150 {
151 	struct ap_qirq_ctrl aqic_gisa = {};
152 	struct ap_queue_status status;
153 	int retries = 5;
154 
155 	do {
156 		status = ap_aqic(q->apqn, aqic_gisa, NULL);
157 		switch (status.response_code) {
158 		case AP_RESPONSE_OTHERWISE_CHANGED:
159 		case AP_RESPONSE_NORMAL:
160 			vfio_ap_wait_for_irqclear(q->apqn);
161 			goto end_free;
162 		case AP_RESPONSE_RESET_IN_PROGRESS:
163 		case AP_RESPONSE_BUSY:
164 			msleep(20);
165 			break;
166 		case AP_RESPONSE_Q_NOT_AVAIL:
167 		case AP_RESPONSE_DECONFIGURED:
168 		case AP_RESPONSE_CHECKSTOPPED:
169 		case AP_RESPONSE_INVALID_ADDRESS:
170 		default:
171 			/* All cases in default means AP not operational */
172 			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
173 				  status.response_code);
174 			goto end_free;
175 		}
176 	} while (retries--);
177 
178 	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
179 		  status.response_code);
180 end_free:
181 	vfio_ap_free_aqic_resources(q);
182 	q->matrix_mdev = NULL;
183 	return status;
184 }
185 
186 /**
187  * vfio_ap_irq_enable - Enable Interruption for a APQN
188  *
189  * @q:	 the vfio_ap_queue holding AQIC parameters
190  *
191  * Pin the NIB saved in *q
192  * Register the guest ISC to GIB interface and retrieve the
193  * host ISC to issue the host side PQAP/AQIC
194  *
195  * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
196  * vfio_pin_pages failed.
197  *
198  * Otherwise return the ap_queue_status returned by the ap_aqic(),
199  * all retry handling will be done by the guest.
200  *
201  * Return: &struct ap_queue_status
202  */
203 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
204 						 int isc,
205 						 unsigned long nib)
206 {
207 	struct ap_qirq_ctrl aqic_gisa = {};
208 	struct ap_queue_status status = {};
209 	struct kvm_s390_gisa *gisa;
210 	struct kvm *kvm;
211 	unsigned long h_nib, g_pfn, h_pfn;
212 	int ret;
213 
214 	g_pfn = nib >> PAGE_SHIFT;
215 	ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
216 			     IOMMU_READ | IOMMU_WRITE, &h_pfn);
217 	switch (ret) {
218 	case 1:
219 		break;
220 	default:
221 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
222 		return status;
223 	}
224 
225 	kvm = q->matrix_mdev->kvm;
226 	gisa = kvm->arch.gisa_int.origin;
227 
228 	h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
229 	aqic_gisa.gisc = isc;
230 	aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
231 	aqic_gisa.ir = 1;
232 	aqic_gisa.gisa = (uint64_t)gisa >> 4;
233 
234 	status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
235 	switch (status.response_code) {
236 	case AP_RESPONSE_NORMAL:
237 		/* See if we did clear older IRQ configuration */
238 		vfio_ap_free_aqic_resources(q);
239 		q->saved_pfn = g_pfn;
240 		q->saved_isc = isc;
241 		break;
242 	case AP_RESPONSE_OTHERWISE_CHANGED:
243 		/* We could not modify IRQ setings: clear new configuration */
244 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
245 		kvm_s390_gisc_unregister(kvm, isc);
246 		break;
247 	default:
248 		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
249 			status.response_code);
250 		vfio_ap_irq_disable(q);
251 		break;
252 	}
253 
254 	return status;
255 }
256 
257 /**
258  * handle_pqap - PQAP instruction callback
259  *
260  * @vcpu: The vcpu on which we received the PQAP instruction
261  *
262  * Get the general register contents to initialize internal variables.
263  * REG[0]: APQN
264  * REG[1]: IR and ISC
265  * REG[2]: NIB
266  *
267  * Response.status may be set to following Response Code:
268  * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
269  * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
270  * - AP_RESPONSE_NORMAL (0) : in case of successs
271  *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
272  * We take the matrix_dev lock to ensure serialization on queues and
273  * mediated device access.
274  *
275  * Return: 0 if we could handle the request inside KVM.
276  * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
277  */
278 static int handle_pqap(struct kvm_vcpu *vcpu)
279 {
280 	uint64_t status;
281 	uint16_t apqn;
282 	struct vfio_ap_queue *q;
283 	struct ap_queue_status qstatus = {
284 			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
285 	struct ap_matrix_mdev *matrix_mdev;
286 
287 	/* If we do not use the AIV facility just go to userland */
288 	if (!(vcpu->arch.sie_block->eca & ECA_AIV))
289 		return -EOPNOTSUPP;
290 
291 	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
292 	mutex_lock(&matrix_dev->lock);
293 
294 	if (!vcpu->kvm->arch.crypto.pqap_hook)
295 		goto out_unlock;
296 	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
297 				   struct ap_matrix_mdev, pqap_hook);
298 
299 	/* If the there is no guest using the mdev, there is nothing to do */
300 	if (!matrix_mdev->kvm)
301 		goto out_unlock;
302 
303 	q = vfio_ap_get_queue(matrix_mdev, apqn);
304 	if (!q)
305 		goto out_unlock;
306 
307 	status = vcpu->run->s.regs.gprs[1];
308 
309 	/* If IR bit(16) is set we enable the interrupt */
310 	if ((status >> (63 - 16)) & 0x01)
311 		qstatus = vfio_ap_irq_enable(q, status & 0x07,
312 					     vcpu->run->s.regs.gprs[2]);
313 	else
314 		qstatus = vfio_ap_irq_disable(q);
315 
316 out_unlock:
317 	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
318 	vcpu->run->s.regs.gprs[1] >>= 32;
319 	mutex_unlock(&matrix_dev->lock);
320 	return 0;
321 }
322 
323 static void vfio_ap_matrix_init(struct ap_config_info *info,
324 				struct ap_matrix *matrix)
325 {
326 	matrix->apm_max = info->apxa ? info->Na : 63;
327 	matrix->aqm_max = info->apxa ? info->Nd : 15;
328 	matrix->adm_max = info->apxa ? info->Nd : 15;
329 }
330 
331 static int vfio_ap_mdev_probe(struct mdev_device *mdev)
332 {
333 	struct ap_matrix_mdev *matrix_mdev;
334 	int ret;
335 
336 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
337 		return -EPERM;
338 
339 	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
340 	if (!matrix_mdev) {
341 		ret = -ENOMEM;
342 		goto err_dec_available;
343 	}
344 	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
345 			    &vfio_ap_matrix_dev_ops);
346 
347 	matrix_mdev->mdev = mdev;
348 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
349 	matrix_mdev->pqap_hook = handle_pqap;
350 	mutex_lock(&matrix_dev->lock);
351 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
352 	mutex_unlock(&matrix_dev->lock);
353 
354 	ret = vfio_register_group_dev(&matrix_mdev->vdev);
355 	if (ret)
356 		goto err_list;
357 	dev_set_drvdata(&mdev->dev, matrix_mdev);
358 	return 0;
359 
360 err_list:
361 	mutex_lock(&matrix_dev->lock);
362 	list_del(&matrix_mdev->node);
363 	mutex_unlock(&matrix_dev->lock);
364 	kfree(matrix_mdev);
365 err_dec_available:
366 	atomic_inc(&matrix_dev->available_instances);
367 	return ret;
368 }
369 
370 static void vfio_ap_mdev_remove(struct mdev_device *mdev)
371 {
372 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
373 
374 	vfio_unregister_group_dev(&matrix_mdev->vdev);
375 
376 	mutex_lock(&matrix_dev->lock);
377 	vfio_ap_mdev_reset_queues(matrix_mdev);
378 	list_del(&matrix_mdev->node);
379 	kfree(matrix_mdev);
380 	atomic_inc(&matrix_dev->available_instances);
381 	mutex_unlock(&matrix_dev->lock);
382 }
383 
384 static ssize_t name_show(struct mdev_type *mtype,
385 			 struct mdev_type_attribute *attr, char *buf)
386 {
387 	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
388 }
389 
390 static MDEV_TYPE_ATTR_RO(name);
391 
392 static ssize_t available_instances_show(struct mdev_type *mtype,
393 					struct mdev_type_attribute *attr,
394 					char *buf)
395 {
396 	return sprintf(buf, "%d\n",
397 		       atomic_read(&matrix_dev->available_instances));
398 }
399 
400 static MDEV_TYPE_ATTR_RO(available_instances);
401 
402 static ssize_t device_api_show(struct mdev_type *mtype,
403 			       struct mdev_type_attribute *attr, char *buf)
404 {
405 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
406 }
407 
408 static MDEV_TYPE_ATTR_RO(device_api);
409 
410 static struct attribute *vfio_ap_mdev_type_attrs[] = {
411 	&mdev_type_attr_name.attr,
412 	&mdev_type_attr_device_api.attr,
413 	&mdev_type_attr_available_instances.attr,
414 	NULL,
415 };
416 
417 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
418 	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
419 	.attrs = vfio_ap_mdev_type_attrs,
420 };
421 
422 static struct attribute_group *vfio_ap_mdev_type_groups[] = {
423 	&vfio_ap_mdev_hwvirt_type_group,
424 	NULL,
425 };
426 
427 struct vfio_ap_queue_reserved {
428 	unsigned long *apid;
429 	unsigned long *apqi;
430 	bool reserved;
431 };
432 
433 /**
434  * vfio_ap_has_queue - determines if the AP queue containing the target in @data
435  *
436  * @dev: an AP queue device
437  * @data: a struct vfio_ap_queue_reserved reference
438  *
439  * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
440  * apid or apqi specified in @data:
441  *
442  * - If @data contains both an apid and apqi value, then @data will be flagged
443  *   as reserved if the APID and APQI fields for the AP queue device matches
444  *
445  * - If @data contains only an apid value, @data will be flagged as
446  *   reserved if the APID field in the AP queue device matches
447  *
448  * - If @data contains only an apqi value, @data will be flagged as
449  *   reserved if the APQI field in the AP queue device matches
450  *
451  * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
452  * @data does not contain either an apid or apqi.
453  */
454 static int vfio_ap_has_queue(struct device *dev, void *data)
455 {
456 	struct vfio_ap_queue_reserved *qres = data;
457 	struct ap_queue *ap_queue = to_ap_queue(dev);
458 	ap_qid_t qid;
459 	unsigned long id;
460 
461 	if (qres->apid && qres->apqi) {
462 		qid = AP_MKQID(*qres->apid, *qres->apqi);
463 		if (qid == ap_queue->qid)
464 			qres->reserved = true;
465 	} else if (qres->apid && !qres->apqi) {
466 		id = AP_QID_CARD(ap_queue->qid);
467 		if (id == *qres->apid)
468 			qres->reserved = true;
469 	} else if (!qres->apid && qres->apqi) {
470 		id = AP_QID_QUEUE(ap_queue->qid);
471 		if (id == *qres->apqi)
472 			qres->reserved = true;
473 	} else {
474 		return -EINVAL;
475 	}
476 
477 	return 0;
478 }
479 
480 /**
481  * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
482  * @apid or @aqpi is reserved
483  *
484  * @apid: an AP adapter ID
485  * @apqi: an AP queue index
486  *
487  * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
488  * driver according to the following rules:
489  *
490  * - If both @apid and @apqi are not NULL, then there must be an AP queue
491  *   device bound to the vfio_ap driver with the APQN identified by @apid and
492  *   @apqi
493  *
494  * - If only @apid is not NULL, then there must be an AP queue device bound
495  *   to the vfio_ap driver with an APQN containing @apid
496  *
497  * - If only @apqi is not NULL, then there must be an AP queue device bound
498  *   to the vfio_ap driver with an APQN containing @apqi
499  *
500  * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
501  */
502 static int vfio_ap_verify_queue_reserved(unsigned long *apid,
503 					 unsigned long *apqi)
504 {
505 	int ret;
506 	struct vfio_ap_queue_reserved qres;
507 
508 	qres.apid = apid;
509 	qres.apqi = apqi;
510 	qres.reserved = false;
511 
512 	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
513 				     &qres, vfio_ap_has_queue);
514 	if (ret)
515 		return ret;
516 
517 	if (qres.reserved)
518 		return 0;
519 
520 	return -EADDRNOTAVAIL;
521 }
522 
523 static int
524 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
525 					     unsigned long apid)
526 {
527 	int ret;
528 	unsigned long apqi;
529 	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
530 
531 	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
532 		return vfio_ap_verify_queue_reserved(&apid, NULL);
533 
534 	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
535 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
536 		if (ret)
537 			return ret;
538 	}
539 
540 	return 0;
541 }
542 
543 /**
544  * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
545  *
546  * @matrix_mdev: the mediated matrix device
547  *
548  * Verifies that the APQNs derived from the cross product of the AP adapter IDs
549  * and AP queue indexes comprising the AP matrix are not configured for another
550  * mediated device. AP queue sharing is not allowed.
551  *
552  * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
553  */
554 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
555 {
556 	struct ap_matrix_mdev *lstdev;
557 	DECLARE_BITMAP(apm, AP_DEVICES);
558 	DECLARE_BITMAP(aqm, AP_DOMAINS);
559 
560 	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
561 		if (matrix_mdev == lstdev)
562 			continue;
563 
564 		memset(apm, 0, sizeof(apm));
565 		memset(aqm, 0, sizeof(aqm));
566 
567 		/*
568 		 * We work on full longs, as we can only exclude the leftover
569 		 * bits in non-inverse order. The leftover is all zeros.
570 		 */
571 		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
572 				lstdev->matrix.apm, AP_DEVICES))
573 			continue;
574 
575 		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
576 				lstdev->matrix.aqm, AP_DOMAINS))
577 			continue;
578 
579 		return -EADDRINUSE;
580 	}
581 
582 	return 0;
583 }
584 
585 /**
586  * assign_adapter_store - parses the APID from @buf and sets the
587  * corresponding bit in the mediated matrix device's APM
588  *
589  * @dev:	the matrix device
590  * @attr:	the mediated matrix device's assign_adapter attribute
591  * @buf:	a buffer containing the AP adapter number (APID) to
592  *		be assigned
593  * @count:	the number of bytes in @buf
594  *
595  * Return: the number of bytes processed if the APID is valid; otherwise,
596  * returns one of the following errors:
597  *
598  *	1. -EINVAL
599  *	   The APID is not a valid number
600  *
601  *	2. -ENODEV
602  *	   The APID exceeds the maximum value configured for the system
603  *
604  *	3. -EADDRNOTAVAIL
605  *	   An APQN derived from the cross product of the APID being assigned
606  *	   and the APQIs previously assigned is not bound to the vfio_ap device
607  *	   driver; or, if no APQIs have yet been assigned, the APID is not
608  *	   contained in an APQN bound to the vfio_ap device driver.
609  *
610  *	4. -EADDRINUSE
611  *	   An APQN derived from the cross product of the APID being assigned
612  *	   and the APQIs previously assigned is being used by another mediated
613  *	   matrix device
614  */
615 static ssize_t assign_adapter_store(struct device *dev,
616 				    struct device_attribute *attr,
617 				    const char *buf, size_t count)
618 {
619 	int ret;
620 	unsigned long apid;
621 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
622 
623 	mutex_lock(&matrix_dev->lock);
624 
625 	/* If the KVM guest is running, disallow assignment of adapter */
626 	if (matrix_mdev->kvm) {
627 		ret = -EBUSY;
628 		goto done;
629 	}
630 
631 	ret = kstrtoul(buf, 0, &apid);
632 	if (ret)
633 		goto done;
634 
635 	if (apid > matrix_mdev->matrix.apm_max) {
636 		ret = -ENODEV;
637 		goto done;
638 	}
639 
640 	/*
641 	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
642 	 * number (APID). The bits in the mask, from most significant to least
643 	 * significant bit, correspond to APIDs 0-255.
644 	 */
645 	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
646 	if (ret)
647 		goto done;
648 
649 	set_bit_inv(apid, matrix_mdev->matrix.apm);
650 
651 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
652 	if (ret)
653 		goto share_err;
654 
655 	ret = count;
656 	goto done;
657 
658 share_err:
659 	clear_bit_inv(apid, matrix_mdev->matrix.apm);
660 done:
661 	mutex_unlock(&matrix_dev->lock);
662 
663 	return ret;
664 }
665 static DEVICE_ATTR_WO(assign_adapter);
666 
667 /**
668  * unassign_adapter_store - parses the APID from @buf and clears the
669  * corresponding bit in the mediated matrix device's APM
670  *
671  * @dev:	the matrix device
672  * @attr:	the mediated matrix device's unassign_adapter attribute
673  * @buf:	a buffer containing the adapter number (APID) to be unassigned
674  * @count:	the number of bytes in @buf
675  *
676  * Return: the number of bytes processed if the APID is valid; otherwise,
677  * returns one of the following errors:
678  *	-EINVAL if the APID is not a number
679  *	-ENODEV if the APID it exceeds the maximum value configured for the
680  *		system
681  */
682 static ssize_t unassign_adapter_store(struct device *dev,
683 				      struct device_attribute *attr,
684 				      const char *buf, size_t count)
685 {
686 	int ret;
687 	unsigned long apid;
688 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
689 
690 	mutex_lock(&matrix_dev->lock);
691 
692 	/* If the KVM guest is running, disallow unassignment of adapter */
693 	if (matrix_mdev->kvm) {
694 		ret = -EBUSY;
695 		goto done;
696 	}
697 
698 	ret = kstrtoul(buf, 0, &apid);
699 	if (ret)
700 		goto done;
701 
702 	if (apid > matrix_mdev->matrix.apm_max) {
703 		ret = -ENODEV;
704 		goto done;
705 	}
706 
707 	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
708 	ret = count;
709 done:
710 	mutex_unlock(&matrix_dev->lock);
711 	return ret;
712 }
713 static DEVICE_ATTR_WO(unassign_adapter);
714 
715 static int
716 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
717 					     unsigned long apqi)
718 {
719 	int ret;
720 	unsigned long apid;
721 	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
722 
723 	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
724 		return vfio_ap_verify_queue_reserved(NULL, &apqi);
725 
726 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
727 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
728 		if (ret)
729 			return ret;
730 	}
731 
732 	return 0;
733 }
734 
735 /**
736  * assign_domain_store - parses the APQI from @buf and sets the
737  * corresponding bit in the mediated matrix device's AQM
738  *
739  *
740  * @dev:	the matrix device
741  * @attr:	the mediated matrix device's assign_domain attribute
742  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
743  *		be assigned
744  * @count:	the number of bytes in @buf
745  *
746  * Return: the number of bytes processed if the APQI is valid; otherwise returns
747  * one of the following errors:
748  *
749  *	1. -EINVAL
750  *	   The APQI is not a valid number
751  *
752  *	2. -ENODEV
753  *	   The APQI exceeds the maximum value configured for the system
754  *
755  *	3. -EADDRNOTAVAIL
756  *	   An APQN derived from the cross product of the APQI being assigned
757  *	   and the APIDs previously assigned is not bound to the vfio_ap device
758  *	   driver; or, if no APIDs have yet been assigned, the APQI is not
759  *	   contained in an APQN bound to the vfio_ap device driver.
760  *
761  *	4. -EADDRINUSE
762  *	   An APQN derived from the cross product of the APQI being assigned
763  *	   and the APIDs previously assigned is being used by another mediated
764  *	   matrix device
765  */
766 static ssize_t assign_domain_store(struct device *dev,
767 				   struct device_attribute *attr,
768 				   const char *buf, size_t count)
769 {
770 	int ret;
771 	unsigned long apqi;
772 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
773 	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
774 
775 	mutex_lock(&matrix_dev->lock);
776 
777 	/* If the KVM guest is running, disallow assignment of domain */
778 	if (matrix_mdev->kvm) {
779 		ret = -EBUSY;
780 		goto done;
781 	}
782 
783 	ret = kstrtoul(buf, 0, &apqi);
784 	if (ret)
785 		goto done;
786 	if (apqi > max_apqi) {
787 		ret = -ENODEV;
788 		goto done;
789 	}
790 
791 	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
792 	if (ret)
793 		goto done;
794 
795 	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
796 
797 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
798 	if (ret)
799 		goto share_err;
800 
801 	ret = count;
802 	goto done;
803 
804 share_err:
805 	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
806 done:
807 	mutex_unlock(&matrix_dev->lock);
808 
809 	return ret;
810 }
811 static DEVICE_ATTR_WO(assign_domain);
812 
813 
814 /**
815  * unassign_domain_store - parses the APQI from @buf and clears the
816  * corresponding bit in the mediated matrix device's AQM
817  *
818  * @dev:	the matrix device
819  * @attr:	the mediated matrix device's unassign_domain attribute
820  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
821  *		be unassigned
822  * @count:	the number of bytes in @buf
823  *
824  * Return: the number of bytes processed if the APQI is valid; otherwise,
825  * returns one of the following errors:
826  *	-EINVAL if the APQI is not a number
827  *	-ENODEV if the APQI exceeds the maximum value configured for the system
828  */
829 static ssize_t unassign_domain_store(struct device *dev,
830 				     struct device_attribute *attr,
831 				     const char *buf, size_t count)
832 {
833 	int ret;
834 	unsigned long apqi;
835 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
836 
837 	mutex_lock(&matrix_dev->lock);
838 
839 	/* If the KVM guest is running, disallow unassignment of domain */
840 	if (matrix_mdev->kvm) {
841 		ret = -EBUSY;
842 		goto done;
843 	}
844 
845 	ret = kstrtoul(buf, 0, &apqi);
846 	if (ret)
847 		goto done;
848 
849 	if (apqi > matrix_mdev->matrix.aqm_max) {
850 		ret = -ENODEV;
851 		goto done;
852 	}
853 
854 	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
855 	ret = count;
856 
857 done:
858 	mutex_unlock(&matrix_dev->lock);
859 	return ret;
860 }
861 static DEVICE_ATTR_WO(unassign_domain);
862 
863 /**
864  * assign_control_domain_store - parses the domain ID from @buf and sets
865  * the corresponding bit in the mediated matrix device's ADM
866  *
867  *
868  * @dev:	the matrix device
869  * @attr:	the mediated matrix device's assign_control_domain attribute
870  * @buf:	a buffer containing the domain ID to be assigned
871  * @count:	the number of bytes in @buf
872  *
873  * Return: the number of bytes processed if the domain ID is valid; otherwise,
874  * returns one of the following errors:
875  *	-EINVAL if the ID is not a number
876  *	-ENODEV if the ID exceeds the maximum value configured for the system
877  */
878 static ssize_t assign_control_domain_store(struct device *dev,
879 					   struct device_attribute *attr,
880 					   const char *buf, size_t count)
881 {
882 	int ret;
883 	unsigned long id;
884 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
885 
886 	mutex_lock(&matrix_dev->lock);
887 
888 	/* If the KVM guest is running, disallow assignment of control domain */
889 	if (matrix_mdev->kvm) {
890 		ret = -EBUSY;
891 		goto done;
892 	}
893 
894 	ret = kstrtoul(buf, 0, &id);
895 	if (ret)
896 		goto done;
897 
898 	if (id > matrix_mdev->matrix.adm_max) {
899 		ret = -ENODEV;
900 		goto done;
901 	}
902 
903 	/* Set the bit in the ADM (bitmask) corresponding to the AP control
904 	 * domain number (id). The bits in the mask, from most significant to
905 	 * least significant, correspond to IDs 0 up to the one less than the
906 	 * number of control domains that can be assigned.
907 	 */
908 	set_bit_inv(id, matrix_mdev->matrix.adm);
909 	ret = count;
910 done:
911 	mutex_unlock(&matrix_dev->lock);
912 	return ret;
913 }
914 static DEVICE_ATTR_WO(assign_control_domain);
915 
916 /**
917  * unassign_control_domain_store - parses the domain ID from @buf and
918  * clears the corresponding bit in the mediated matrix device's ADM
919  *
920  * @dev:	the matrix device
921  * @attr:	the mediated matrix device's unassign_control_domain attribute
922  * @buf:	a buffer containing the domain ID to be unassigned
923  * @count:	the number of bytes in @buf
924  *
925  * Return: the number of bytes processed if the domain ID is valid; otherwise,
926  * returns one of the following errors:
927  *	-EINVAL if the ID is not a number
928  *	-ENODEV if the ID exceeds the maximum value configured for the system
929  */
930 static ssize_t unassign_control_domain_store(struct device *dev,
931 					     struct device_attribute *attr,
932 					     const char *buf, size_t count)
933 {
934 	int ret;
935 	unsigned long domid;
936 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
937 	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
938 
939 	mutex_lock(&matrix_dev->lock);
940 
941 	/* If a KVM guest is running, disallow unassignment of control domain */
942 	if (matrix_mdev->kvm) {
943 		ret = -EBUSY;
944 		goto done;
945 	}
946 
947 	ret = kstrtoul(buf, 0, &domid);
948 	if (ret)
949 		goto done;
950 	if (domid > max_domid) {
951 		ret = -ENODEV;
952 		goto done;
953 	}
954 
955 	clear_bit_inv(domid, matrix_mdev->matrix.adm);
956 	ret = count;
957 done:
958 	mutex_unlock(&matrix_dev->lock);
959 	return ret;
960 }
961 static DEVICE_ATTR_WO(unassign_control_domain);
962 
963 static ssize_t control_domains_show(struct device *dev,
964 				    struct device_attribute *dev_attr,
965 				    char *buf)
966 {
967 	unsigned long id;
968 	int nchars = 0;
969 	int n;
970 	char *bufpos = buf;
971 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
972 	unsigned long max_domid = matrix_mdev->matrix.adm_max;
973 
974 	mutex_lock(&matrix_dev->lock);
975 	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
976 		n = sprintf(bufpos, "%04lx\n", id);
977 		bufpos += n;
978 		nchars += n;
979 	}
980 	mutex_unlock(&matrix_dev->lock);
981 
982 	return nchars;
983 }
984 static DEVICE_ATTR_RO(control_domains);
985 
986 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
987 			   char *buf)
988 {
989 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
990 	char *bufpos = buf;
991 	unsigned long apid;
992 	unsigned long apqi;
993 	unsigned long apid1;
994 	unsigned long apqi1;
995 	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
996 	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
997 	int nchars = 0;
998 	int n;
999 
1000 	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1001 	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1002 
1003 	mutex_lock(&matrix_dev->lock);
1004 
1005 	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1006 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1007 			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1008 					     naqm_bits) {
1009 				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1010 					    apqi);
1011 				bufpos += n;
1012 				nchars += n;
1013 			}
1014 		}
1015 	} else if (apid1 < napm_bits) {
1016 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1017 			n = sprintf(bufpos, "%02lx.\n", apid);
1018 			bufpos += n;
1019 			nchars += n;
1020 		}
1021 	} else if (apqi1 < naqm_bits) {
1022 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1023 			n = sprintf(bufpos, ".%04lx\n", apqi);
1024 			bufpos += n;
1025 			nchars += n;
1026 		}
1027 	}
1028 
1029 	mutex_unlock(&matrix_dev->lock);
1030 
1031 	return nchars;
1032 }
1033 static DEVICE_ATTR_RO(matrix);
1034 
1035 static struct attribute *vfio_ap_mdev_attrs[] = {
1036 	&dev_attr_assign_adapter.attr,
1037 	&dev_attr_unassign_adapter.attr,
1038 	&dev_attr_assign_domain.attr,
1039 	&dev_attr_unassign_domain.attr,
1040 	&dev_attr_assign_control_domain.attr,
1041 	&dev_attr_unassign_control_domain.attr,
1042 	&dev_attr_control_domains.attr,
1043 	&dev_attr_matrix.attr,
1044 	NULL,
1045 };
1046 
1047 static struct attribute_group vfio_ap_mdev_attr_group = {
1048 	.attrs = vfio_ap_mdev_attrs
1049 };
1050 
1051 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1052 	&vfio_ap_mdev_attr_group,
1053 	NULL
1054 };
1055 
1056 /**
1057  * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
1058  * to manage AP resources for the guest whose state is represented by @kvm
1059  *
1060  * @matrix_mdev: a mediated matrix device
1061  * @kvm: reference to KVM instance
1062  *
1063  * Note: The matrix_dev->lock must be taken prior to calling
1064  * this function; however, the lock will be temporarily released while the
1065  * guest's AP configuration is set to avoid a potential lockdep splat.
1066  * The kvm->lock is taken to set the guest's AP configuration which, under
1067  * certain circumstances, will result in a circular lock dependency if this is
1068  * done under the @matrix_mdev->lock.
1069  *
1070  * Return: 0 if no other mediated matrix device has a reference to @kvm;
1071  * otherwise, returns an -EPERM.
1072  */
1073 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1074 				struct kvm *kvm)
1075 {
1076 	struct ap_matrix_mdev *m;
1077 
1078 	if (kvm->arch.crypto.crycbd) {
1079 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1080 		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1081 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1082 
1083 		mutex_lock(&kvm->lock);
1084 		mutex_lock(&matrix_dev->lock);
1085 
1086 		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1087 			if (m != matrix_mdev && m->kvm == kvm) {
1088 				mutex_unlock(&kvm->lock);
1089 				mutex_unlock(&matrix_dev->lock);
1090 				return -EPERM;
1091 			}
1092 		}
1093 
1094 		kvm_get_kvm(kvm);
1095 		matrix_mdev->kvm = kvm;
1096 		kvm_arch_crypto_set_masks(kvm,
1097 					  matrix_mdev->matrix.apm,
1098 					  matrix_mdev->matrix.aqm,
1099 					  matrix_mdev->matrix.adm);
1100 
1101 		mutex_unlock(&kvm->lock);
1102 		mutex_unlock(&matrix_dev->lock);
1103 	}
1104 
1105 	return 0;
1106 }
1107 
1108 /**
1109  * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
1110  *
1111  * @nb: The notifier block
1112  * @action: Action to be taken
1113  * @data: data associated with the request
1114  *
1115  * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1116  * pinned before). Other requests are ignored.
1117  *
1118  * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
1119  */
1120 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1121 				       unsigned long action, void *data)
1122 {
1123 	struct ap_matrix_mdev *matrix_mdev;
1124 
1125 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1126 
1127 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1128 		struct vfio_iommu_type1_dma_unmap *unmap = data;
1129 		unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1130 
1131 		vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
1132 		return NOTIFY_OK;
1133 	}
1134 
1135 	return NOTIFY_DONE;
1136 }
1137 
1138 /**
1139  * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
1140  * by @matrix_mdev.
1141  *
1142  * @matrix_mdev: a matrix mediated device
1143  *
1144  * Note: The matrix_dev->lock must be taken prior to calling
1145  * this function; however, the lock will be temporarily released while the
1146  * guest's AP configuration is cleared to avoid a potential lockdep splat.
1147  * The kvm->lock is taken to clear the guest's AP configuration which, under
1148  * certain circumstances, will result in a circular lock dependency if this is
1149  * done under the @matrix_mdev->lock.
1150  */
1151 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev,
1152 				   struct kvm *kvm)
1153 {
1154 	if (kvm && kvm->arch.crypto.crycbd) {
1155 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1156 		kvm->arch.crypto.pqap_hook = NULL;
1157 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1158 
1159 		mutex_lock(&kvm->lock);
1160 		mutex_lock(&matrix_dev->lock);
1161 
1162 		kvm_arch_crypto_clear_masks(kvm);
1163 		vfio_ap_mdev_reset_queues(matrix_mdev);
1164 		kvm_put_kvm(kvm);
1165 		matrix_mdev->kvm = NULL;
1166 
1167 		mutex_unlock(&kvm->lock);
1168 		mutex_unlock(&matrix_dev->lock);
1169 	}
1170 }
1171 
1172 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
1173 				       unsigned long action, void *data)
1174 {
1175 	int notify_rc = NOTIFY_OK;
1176 	struct ap_matrix_mdev *matrix_mdev;
1177 
1178 	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
1179 		return NOTIFY_OK;
1180 
1181 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
1182 
1183 	if (!data)
1184 		vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
1185 	else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
1186 		notify_rc = NOTIFY_DONE;
1187 
1188 	return notify_rc;
1189 }
1190 
1191 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1192 {
1193 	struct device *dev;
1194 	struct vfio_ap_queue *q = NULL;
1195 
1196 	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1197 				 &apqn, match_apqn);
1198 	if (dev) {
1199 		q = dev_get_drvdata(dev);
1200 		put_device(dev);
1201 	}
1202 
1203 	return q;
1204 }
1205 
1206 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1207 			     unsigned int retry)
1208 {
1209 	struct ap_queue_status status;
1210 	int ret;
1211 	int retry2 = 2;
1212 
1213 	if (!q)
1214 		return 0;
1215 
1216 retry_zapq:
1217 	status = ap_zapq(q->apqn);
1218 	switch (status.response_code) {
1219 	case AP_RESPONSE_NORMAL:
1220 		ret = 0;
1221 		break;
1222 	case AP_RESPONSE_RESET_IN_PROGRESS:
1223 		if (retry--) {
1224 			msleep(20);
1225 			goto retry_zapq;
1226 		}
1227 		ret = -EBUSY;
1228 		break;
1229 	case AP_RESPONSE_Q_NOT_AVAIL:
1230 	case AP_RESPONSE_DECONFIGURED:
1231 	case AP_RESPONSE_CHECKSTOPPED:
1232 		WARN_ON_ONCE(status.irq_enabled);
1233 		ret = -EBUSY;
1234 		goto free_resources;
1235 	default:
1236 		/* things are really broken, give up */
1237 		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1238 		     status.response_code);
1239 		return -EIO;
1240 	}
1241 
1242 	/* wait for the reset to take effect */
1243 	while (retry2--) {
1244 		if (status.queue_empty && !status.irq_enabled)
1245 			break;
1246 		msleep(20);
1247 		status = ap_tapq(q->apqn, NULL);
1248 	}
1249 	WARN_ON_ONCE(retry2 <= 0);
1250 
1251 free_resources:
1252 	vfio_ap_free_aqic_resources(q);
1253 
1254 	return ret;
1255 }
1256 
1257 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
1258 {
1259 	int ret;
1260 	int rc = 0;
1261 	unsigned long apid, apqi;
1262 	struct vfio_ap_queue *q;
1263 
1264 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1265 			     matrix_mdev->matrix.apm_max + 1) {
1266 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1267 				     matrix_mdev->matrix.aqm_max + 1) {
1268 			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1269 			ret = vfio_ap_mdev_reset_queue(q, 1);
1270 			/*
1271 			 * Regardless whether a queue turns out to be busy, or
1272 			 * is not operational, we need to continue resetting
1273 			 * the remaining queues.
1274 			 */
1275 			if (ret)
1276 				rc = ret;
1277 		}
1278 	}
1279 
1280 	return rc;
1281 }
1282 
1283 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
1284 {
1285 	struct ap_matrix_mdev *matrix_mdev =
1286 		container_of(vdev, struct ap_matrix_mdev, vdev);
1287 	unsigned long events;
1288 	int ret;
1289 
1290 	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
1291 	events = VFIO_GROUP_NOTIFY_SET_KVM;
1292 
1293 	ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1294 				     &events, &matrix_mdev->group_notifier);
1295 	if (ret)
1296 		return ret;
1297 
1298 	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1299 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1300 	ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
1301 				     &events, &matrix_mdev->iommu_notifier);
1302 	if (ret)
1303 		goto out_unregister_group;
1304 	return 0;
1305 
1306 out_unregister_group:
1307 	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1308 				 &matrix_mdev->group_notifier);
1309 	return ret;
1310 }
1311 
1312 static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
1313 {
1314 	struct ap_matrix_mdev *matrix_mdev =
1315 		container_of(vdev, struct ap_matrix_mdev, vdev);
1316 
1317 	vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
1318 				 &matrix_mdev->iommu_notifier);
1319 	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1320 				 &matrix_mdev->group_notifier);
1321 	vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm);
1322 }
1323 
1324 static int vfio_ap_mdev_get_device_info(unsigned long arg)
1325 {
1326 	unsigned long minsz;
1327 	struct vfio_device_info info;
1328 
1329 	minsz = offsetofend(struct vfio_device_info, num_irqs);
1330 
1331 	if (copy_from_user(&info, (void __user *)arg, minsz))
1332 		return -EFAULT;
1333 
1334 	if (info.argsz < minsz)
1335 		return -EINVAL;
1336 
1337 	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1338 	info.num_regions = 0;
1339 	info.num_irqs = 0;
1340 
1341 	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1342 }
1343 
1344 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
1345 				    unsigned int cmd, unsigned long arg)
1346 {
1347 	struct ap_matrix_mdev *matrix_mdev =
1348 		container_of(vdev, struct ap_matrix_mdev, vdev);
1349 	int ret;
1350 
1351 	mutex_lock(&matrix_dev->lock);
1352 	switch (cmd) {
1353 	case VFIO_DEVICE_GET_INFO:
1354 		ret = vfio_ap_mdev_get_device_info(arg);
1355 		break;
1356 	case VFIO_DEVICE_RESET:
1357 		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
1358 		break;
1359 	default:
1360 		ret = -EOPNOTSUPP;
1361 		break;
1362 	}
1363 	mutex_unlock(&matrix_dev->lock);
1364 
1365 	return ret;
1366 }
1367 
1368 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
1369 	.open_device = vfio_ap_mdev_open_device,
1370 	.close_device = vfio_ap_mdev_close_device,
1371 	.ioctl = vfio_ap_mdev_ioctl,
1372 };
1373 
1374 static struct mdev_driver vfio_ap_matrix_driver = {
1375 	.driver = {
1376 		.name = "vfio_ap_mdev",
1377 		.owner = THIS_MODULE,
1378 		.mod_name = KBUILD_MODNAME,
1379 		.dev_groups = vfio_ap_mdev_attr_groups,
1380 	},
1381 	.probe = vfio_ap_mdev_probe,
1382 	.remove = vfio_ap_mdev_remove,
1383 };
1384 
1385 static const struct mdev_parent_ops vfio_ap_matrix_ops = {
1386 	.owner			= THIS_MODULE,
1387 	.device_driver		= &vfio_ap_matrix_driver,
1388 	.supported_type_groups	= vfio_ap_mdev_type_groups,
1389 };
1390 
1391 int vfio_ap_mdev_register(void)
1392 {
1393 	int ret;
1394 
1395 	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1396 
1397 	ret = mdev_register_driver(&vfio_ap_matrix_driver);
1398 	if (ret)
1399 		return ret;
1400 
1401 	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
1402 	if (ret)
1403 		goto err_driver;
1404 	return 0;
1405 
1406 err_driver:
1407 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1408 	return ret;
1409 }
1410 
1411 void vfio_ap_mdev_unregister(void)
1412 {
1413 	mdev_unregister_device(&matrix_dev->device);
1414 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1415 }
1416