xref: /linux/drivers/s390/crypto/vfio_ap_ops.c (revision 50f2944009a25bb39a09f2f7bab64a73ce928bef)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Adjunct processor matrix VFIO device driver callbacks.
4  *
5  * Copyright IBM Corp. 2018
6  *
7  * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8  *	      Halil Pasic <pasic@linux.ibm.com>
9  *	      Pierre Morel <pmorel@linux.ibm.com>
10  */
11 #include <linux/string.h>
12 #include <linux/vfio.h>
13 #include <linux/device.h>
14 #include <linux/list.h>
15 #include <linux/ctype.h>
16 #include <linux/bitops.h>
17 #include <linux/kvm_host.h>
18 #include <linux/module.h>
19 #include <linux/uuid.h>
20 #include <asm/kvm.h>
21 #include <asm/zcrypt.h>
22 
23 #include "vfio_ap_private.h"
24 #include "vfio_ap_debug.h"
25 
26 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
27 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
28 
29 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
30 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
31 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
32 
33 static int match_apqn(struct device *dev, const void *data)
34 {
35 	struct vfio_ap_queue *q = dev_get_drvdata(dev);
36 
37 	return (q->apqn == *(int *)(data)) ? 1 : 0;
38 }
39 
40 /**
41  * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
42  * @matrix_mdev: the associated mediated matrix
43  * @apqn: The queue APQN
44  *
45  * Retrieve a queue with a specific APQN from the list of the
46  * devices of the vfio_ap_drv.
47  * Verify that the APID and the APQI are set in the matrix.
48  *
49  * Return: the pointer to the associated vfio_ap_queue
50  */
51 static struct vfio_ap_queue *vfio_ap_get_queue(
52 					struct ap_matrix_mdev *matrix_mdev,
53 					int apqn)
54 {
55 	struct vfio_ap_queue *q;
56 
57 	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
58 		return NULL;
59 	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
60 		return NULL;
61 
62 	q = vfio_ap_find_queue(apqn);
63 	if (q)
64 		q->matrix_mdev = matrix_mdev;
65 
66 	return q;
67 }
68 
69 /**
70  * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
71  * @apqn: The AP Queue number
72  *
73  * Checks the IRQ bit for the status of this APQN using ap_tapq.
74  * Returns if the ap_tapq function succeeded and the bit is clear.
75  * Returns if ap_tapq function failed with invalid, deconfigured or
76  * checkstopped AP.
77  * Otherwise retries up to 5 times after waiting 20ms.
78  */
79 static void vfio_ap_wait_for_irqclear(int apqn)
80 {
81 	struct ap_queue_status status;
82 	int retry = 5;
83 
84 	do {
85 		status = ap_tapq(apqn, NULL);
86 		switch (status.response_code) {
87 		case AP_RESPONSE_NORMAL:
88 		case AP_RESPONSE_RESET_IN_PROGRESS:
89 			if (!status.irq_enabled)
90 				return;
91 			fallthrough;
92 		case AP_RESPONSE_BUSY:
93 			msleep(20);
94 			break;
95 		case AP_RESPONSE_Q_NOT_AVAIL:
96 		case AP_RESPONSE_DECONFIGURED:
97 		case AP_RESPONSE_CHECKSTOPPED:
98 		default:
99 			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
100 				  status.response_code, apqn);
101 			return;
102 		}
103 	} while (--retry);
104 
105 	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
106 		  __func__, status.response_code, apqn);
107 }
108 
109 /**
110  * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
111  * @q: The vfio_ap_queue
112  *
113  * Unregisters the ISC in the GIB when the saved ISC not invalid.
114  * Unpins the guest's page holding the NIB when it exists.
115  * Resets the saved_pfn and saved_isc to invalid values.
116  */
117 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
118 {
119 	if (!q)
120 		return;
121 	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
122 	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
123 		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
124 		q->saved_isc = VFIO_AP_ISC_INVALID;
125 	}
126 	if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
127 		vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1);
128 		q->saved_pfn = 0;
129 	}
130 }
131 
132 /**
133  * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
134  * @q: The vfio_ap_queue
135  *
136  * Uses ap_aqic to disable the interruption and in case of success, reset
137  * in progress or IRQ disable command already proceeded: calls
138  * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
139  * and calls vfio_ap_free_aqic_resources() to free the resources associated
140  * with the AP interrupt handling.
141  *
142  * In the case the AP is busy, or a reset is in progress,
143  * retries after 20ms, up to 5 times.
144  *
145  * Returns if ap_aqic function failed with invalid, deconfigured or
146  * checkstopped AP.
147  *
148  * Return: &struct ap_queue_status
149  */
150 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
151 {
152 	struct ap_qirq_ctrl aqic_gisa = {};
153 	struct ap_queue_status status;
154 	int retries = 5;
155 
156 	do {
157 		status = ap_aqic(q->apqn, aqic_gisa, NULL);
158 		switch (status.response_code) {
159 		case AP_RESPONSE_OTHERWISE_CHANGED:
160 		case AP_RESPONSE_NORMAL:
161 			vfio_ap_wait_for_irqclear(q->apqn);
162 			goto end_free;
163 		case AP_RESPONSE_RESET_IN_PROGRESS:
164 		case AP_RESPONSE_BUSY:
165 			msleep(20);
166 			break;
167 		case AP_RESPONSE_Q_NOT_AVAIL:
168 		case AP_RESPONSE_DECONFIGURED:
169 		case AP_RESPONSE_CHECKSTOPPED:
170 		case AP_RESPONSE_INVALID_ADDRESS:
171 		default:
172 			/* All cases in default means AP not operational */
173 			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
174 				  status.response_code);
175 			goto end_free;
176 		}
177 	} while (retries--);
178 
179 	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
180 		  status.response_code);
181 end_free:
182 	vfio_ap_free_aqic_resources(q);
183 	q->matrix_mdev = NULL;
184 	return status;
185 }
186 
187 /**
188  * vfio_ap_validate_nib - validate a notification indicator byte (nib) address.
189  *
190  * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction.
191  * @nib: the location for storing the nib address.
192  * @g_pfn: the location for storing the page frame number of the page containing
193  *	   the nib.
194  *
195  * When the PQAP(AQIC) instruction is executed, general register 2 contains the
196  * address of the notification indicator byte (nib) used for IRQ notification.
197  * This function parses the nib from gr2 and calculates the page frame
198  * number for the guest of the page containing the nib. The values are
199  * stored in @nib and @g_pfn respectively.
200  *
201  * The g_pfn of the nib is then validated to ensure the nib address is valid.
202  *
203  * Return: returns zero if the nib address is a valid; otherwise, returns
204  *	   -EINVAL.
205  */
206 static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib,
207 				unsigned long *g_pfn)
208 {
209 	*nib = vcpu->run->s.regs.gprs[2];
210 	*g_pfn = *nib >> PAGE_SHIFT;
211 
212 	if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn)))
213 		return -EINVAL;
214 
215 	return 0;
216 }
217 
218 /**
219  * vfio_ap_irq_enable - Enable Interruption for a APQN
220  *
221  * @q:	 the vfio_ap_queue holding AQIC parameters
222  * @isc: the guest ISC to register with the GIB interface
223  * @vcpu: the vcpu object containing the registers specifying the parameters
224  *	  passed to the PQAP(AQIC) instruction.
225  *
226  * Pin the NIB saved in *q
227  * Register the guest ISC to GIB interface and retrieve the
228  * host ISC to issue the host side PQAP/AQIC
229  *
230  * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
231  * vfio_pin_pages failed.
232  *
233  * Otherwise return the ap_queue_status returned by the ap_aqic(),
234  * all retry handling will be done by the guest.
235  *
236  * Return: &struct ap_queue_status
237  */
238 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
239 						 int isc,
240 						 struct kvm_vcpu *vcpu)
241 {
242 	unsigned long nib;
243 	struct ap_qirq_ctrl aqic_gisa = {};
244 	struct ap_queue_status status = {};
245 	struct kvm_s390_gisa *gisa;
246 	int nisc;
247 	struct kvm *kvm;
248 	unsigned long h_nib, g_pfn, h_pfn;
249 	int ret;
250 
251 	/* Verify that the notification indicator byte address is valid */
252 	if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) {
253 		VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
254 				 __func__, nib, g_pfn, q->apqn);
255 
256 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
257 		return status;
258 	}
259 
260 	ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1,
261 			     IOMMU_READ | IOMMU_WRITE, &h_pfn);
262 	switch (ret) {
263 	case 1:
264 		break;
265 	default:
266 		VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d,"
267 				 "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
268 				 __func__, ret, nib, g_pfn, q->apqn);
269 
270 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
271 		return status;
272 	}
273 
274 	kvm = q->matrix_mdev->kvm;
275 	gisa = kvm->arch.gisa_int.origin;
276 
277 	h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
278 	aqic_gisa.gisc = isc;
279 
280 	nisc = kvm_s390_gisc_register(kvm, isc);
281 	if (nisc < 0) {
282 		VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
283 				 __func__, nisc, isc, q->apqn);
284 
285 		status.response_code = AP_RESPONSE_INVALID_GISA;
286 		return status;
287 	}
288 
289 	aqic_gisa.isc = nisc;
290 	aqic_gisa.ir = 1;
291 	aqic_gisa.gisa = (uint64_t)gisa >> 4;
292 
293 	status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
294 	switch (status.response_code) {
295 	case AP_RESPONSE_NORMAL:
296 		/* See if we did clear older IRQ configuration */
297 		vfio_ap_free_aqic_resources(q);
298 		q->saved_pfn = g_pfn;
299 		q->saved_isc = isc;
300 		break;
301 	case AP_RESPONSE_OTHERWISE_CHANGED:
302 		/* We could not modify IRQ setings: clear new configuration */
303 		vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1);
304 		kvm_s390_gisc_unregister(kvm, isc);
305 		break;
306 	default:
307 		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
308 			status.response_code);
309 		vfio_ap_irq_disable(q);
310 		break;
311 	}
312 
313 	if (status.response_code != AP_RESPONSE_NORMAL) {
314 		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: "
315 				 "zone=%#x, ir=%#x, gisc=%#x, f=%#x,"
316 				 "gisa=%#x, isc=%#x, apqn=%#04x\n",
317 				 __func__, status.response_code,
318 				 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc,
319 				 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc,
320 				 q->apqn);
321 	}
322 
323 	return status;
324 }
325 
326 /**
327  * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array
328  *				of big endian elements that can be passed by
329  *				value to an s390dbf sprintf event function to
330  *				format a UUID string.
331  *
332  * @guid: the object containing the little endian guid
333  * @uuid: a six-element array of long values that can be passed by value as
334  *	  arguments for a formatting string specifying a UUID.
335  *
336  * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf
337  * event functions if the memory for the passed string is available as long as
338  * the debug feature exists. Since a mediated device can be removed at any
339  * time, it's name can not be used because %s passes the reference to the string
340  * in memory and the reference will go stale once the device is removed .
341  *
342  * The s390dbf string formatting function allows a maximum of 9 arguments for a
343  * message to be displayed in the 'sprintf' view. In order to use the bytes
344  * comprising the mediated device's UUID to display the mediated device name,
345  * they will have to be converted into an array whose elements can be passed by
346  * value to sprintf. For example:
347  *
348  * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 }
349  * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804
350  * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 }
351  * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx"
352  */
353 static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid)
354 {
355 	/*
356 	 * The input guid is ordered in little endian, so it needs to be
357 	 * reordered for displaying a UUID as a string. This specifies the
358 	 * guid indices in proper order.
359 	 */
360 	uuid[0] = le32_to_cpup((__le32 *)guid);
361 	uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]);
362 	uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]);
363 	uuid[3] = *((__u16 *)&guid->b[8]);
364 	uuid[4] = *((__u16 *)&guid->b[10]);
365 	uuid[5] = *((__u32 *)&guid->b[12]);
366 }
367 
368 /**
369  * handle_pqap - PQAP instruction callback
370  *
371  * @vcpu: The vcpu on which we received the PQAP instruction
372  *
373  * Get the general register contents to initialize internal variables.
374  * REG[0]: APQN
375  * REG[1]: IR and ISC
376  * REG[2]: NIB
377  *
378  * Response.status may be set to following Response Code:
379  * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
380  * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
381  * - AP_RESPONSE_NORMAL (0) : in case of successs
382  *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
383  * We take the matrix_dev lock to ensure serialization on queues and
384  * mediated device access.
385  *
386  * Return: 0 if we could handle the request inside KVM.
387  * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
388  */
389 static int handle_pqap(struct kvm_vcpu *vcpu)
390 {
391 	uint64_t status;
392 	uint16_t apqn;
393 	unsigned long uuid[6];
394 	struct vfio_ap_queue *q;
395 	struct ap_queue_status qstatus = {
396 			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
397 	struct ap_matrix_mdev *matrix_mdev;
398 
399 	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
400 
401 	/* If we do not use the AIV facility just go to userland */
402 	if (!(vcpu->arch.sie_block->eca & ECA_AIV)) {
403 		VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n",
404 				 __func__, apqn, vcpu->arch.sie_block->eca);
405 
406 		return -EOPNOTSUPP;
407 	}
408 
409 	mutex_lock(&matrix_dev->lock);
410 	if (!vcpu->kvm->arch.crypto.pqap_hook) {
411 		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n",
412 				 __func__, apqn);
413 		goto out_unlock;
414 	}
415 
416 	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
417 				   struct ap_matrix_mdev, pqap_hook);
418 
419 	/* If the there is no guest using the mdev, there is nothing to do */
420 	if (!matrix_mdev->kvm) {
421 		vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid);
422 		VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n",
423 				 __func__, uuid[0],  uuid[1], uuid[2],
424 				 uuid[3], uuid[4], uuid[5], apqn);
425 		goto out_unlock;
426 	}
427 
428 	q = vfio_ap_get_queue(matrix_mdev, apqn);
429 	if (!q) {
430 		VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n",
431 				 __func__, AP_QID_CARD(apqn),
432 				 AP_QID_QUEUE(apqn));
433 		goto out_unlock;
434 	}
435 
436 	status = vcpu->run->s.regs.gprs[1];
437 
438 	/* If IR bit(16) is set we enable the interrupt */
439 	if ((status >> (63 - 16)) & 0x01)
440 		qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu);
441 	else
442 		qstatus = vfio_ap_irq_disable(q);
443 
444 out_unlock:
445 	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
446 	vcpu->run->s.regs.gprs[1] >>= 32;
447 	mutex_unlock(&matrix_dev->lock);
448 	return 0;
449 }
450 
451 static void vfio_ap_matrix_init(struct ap_config_info *info,
452 				struct ap_matrix *matrix)
453 {
454 	matrix->apm_max = info->apxa ? info->Na : 63;
455 	matrix->aqm_max = info->apxa ? info->Nd : 15;
456 	matrix->adm_max = info->apxa ? info->Nd : 15;
457 }
458 
459 static int vfio_ap_mdev_probe(struct mdev_device *mdev)
460 {
461 	struct ap_matrix_mdev *matrix_mdev;
462 	int ret;
463 
464 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
465 		return -EPERM;
466 
467 	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
468 	if (!matrix_mdev) {
469 		ret = -ENOMEM;
470 		goto err_dec_available;
471 	}
472 	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
473 			    &vfio_ap_matrix_dev_ops);
474 
475 	matrix_mdev->mdev = mdev;
476 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
477 	matrix_mdev->pqap_hook = handle_pqap;
478 	mutex_lock(&matrix_dev->lock);
479 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
480 	mutex_unlock(&matrix_dev->lock);
481 
482 	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
483 	if (ret)
484 		goto err_list;
485 	dev_set_drvdata(&mdev->dev, matrix_mdev);
486 	return 0;
487 
488 err_list:
489 	mutex_lock(&matrix_dev->lock);
490 	list_del(&matrix_mdev->node);
491 	mutex_unlock(&matrix_dev->lock);
492 	vfio_uninit_group_dev(&matrix_mdev->vdev);
493 	kfree(matrix_mdev);
494 err_dec_available:
495 	atomic_inc(&matrix_dev->available_instances);
496 	return ret;
497 }
498 
499 static void vfio_ap_mdev_remove(struct mdev_device *mdev)
500 {
501 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
502 
503 	vfio_unregister_group_dev(&matrix_mdev->vdev);
504 
505 	mutex_lock(&matrix_dev->lock);
506 	vfio_ap_mdev_reset_queues(matrix_mdev);
507 	list_del(&matrix_mdev->node);
508 	mutex_unlock(&matrix_dev->lock);
509 	vfio_uninit_group_dev(&matrix_mdev->vdev);
510 	kfree(matrix_mdev);
511 	atomic_inc(&matrix_dev->available_instances);
512 }
513 
514 static ssize_t name_show(struct mdev_type *mtype,
515 			 struct mdev_type_attribute *attr, char *buf)
516 {
517 	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
518 }
519 
520 static MDEV_TYPE_ATTR_RO(name);
521 
522 static ssize_t available_instances_show(struct mdev_type *mtype,
523 					struct mdev_type_attribute *attr,
524 					char *buf)
525 {
526 	return sprintf(buf, "%d\n",
527 		       atomic_read(&matrix_dev->available_instances));
528 }
529 
530 static MDEV_TYPE_ATTR_RO(available_instances);
531 
532 static ssize_t device_api_show(struct mdev_type *mtype,
533 			       struct mdev_type_attribute *attr, char *buf)
534 {
535 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
536 }
537 
538 static MDEV_TYPE_ATTR_RO(device_api);
539 
540 static struct attribute *vfio_ap_mdev_type_attrs[] = {
541 	&mdev_type_attr_name.attr,
542 	&mdev_type_attr_device_api.attr,
543 	&mdev_type_attr_available_instances.attr,
544 	NULL,
545 };
546 
547 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
548 	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
549 	.attrs = vfio_ap_mdev_type_attrs,
550 };
551 
552 static struct attribute_group *vfio_ap_mdev_type_groups[] = {
553 	&vfio_ap_mdev_hwvirt_type_group,
554 	NULL,
555 };
556 
557 struct vfio_ap_queue_reserved {
558 	unsigned long *apid;
559 	unsigned long *apqi;
560 	bool reserved;
561 };
562 
563 /**
564  * vfio_ap_has_queue - determines if the AP queue containing the target in @data
565  *
566  * @dev: an AP queue device
567  * @data: a struct vfio_ap_queue_reserved reference
568  *
569  * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
570  * apid or apqi specified in @data:
571  *
572  * - If @data contains both an apid and apqi value, then @data will be flagged
573  *   as reserved if the APID and APQI fields for the AP queue device matches
574  *
575  * - If @data contains only an apid value, @data will be flagged as
576  *   reserved if the APID field in the AP queue device matches
577  *
578  * - If @data contains only an apqi value, @data will be flagged as
579  *   reserved if the APQI field in the AP queue device matches
580  *
581  * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
582  * @data does not contain either an apid or apqi.
583  */
584 static int vfio_ap_has_queue(struct device *dev, void *data)
585 {
586 	struct vfio_ap_queue_reserved *qres = data;
587 	struct ap_queue *ap_queue = to_ap_queue(dev);
588 	ap_qid_t qid;
589 	unsigned long id;
590 
591 	if (qres->apid && qres->apqi) {
592 		qid = AP_MKQID(*qres->apid, *qres->apqi);
593 		if (qid == ap_queue->qid)
594 			qres->reserved = true;
595 	} else if (qres->apid && !qres->apqi) {
596 		id = AP_QID_CARD(ap_queue->qid);
597 		if (id == *qres->apid)
598 			qres->reserved = true;
599 	} else if (!qres->apid && qres->apqi) {
600 		id = AP_QID_QUEUE(ap_queue->qid);
601 		if (id == *qres->apqi)
602 			qres->reserved = true;
603 	} else {
604 		return -EINVAL;
605 	}
606 
607 	return 0;
608 }
609 
610 /**
611  * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
612  * @apid or @aqpi is reserved
613  *
614  * @apid: an AP adapter ID
615  * @apqi: an AP queue index
616  *
617  * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
618  * driver according to the following rules:
619  *
620  * - If both @apid and @apqi are not NULL, then there must be an AP queue
621  *   device bound to the vfio_ap driver with the APQN identified by @apid and
622  *   @apqi
623  *
624  * - If only @apid is not NULL, then there must be an AP queue device bound
625  *   to the vfio_ap driver with an APQN containing @apid
626  *
627  * - If only @apqi is not NULL, then there must be an AP queue device bound
628  *   to the vfio_ap driver with an APQN containing @apqi
629  *
630  * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
631  */
632 static int vfio_ap_verify_queue_reserved(unsigned long *apid,
633 					 unsigned long *apqi)
634 {
635 	int ret;
636 	struct vfio_ap_queue_reserved qres;
637 
638 	qres.apid = apid;
639 	qres.apqi = apqi;
640 	qres.reserved = false;
641 
642 	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
643 				     &qres, vfio_ap_has_queue);
644 	if (ret)
645 		return ret;
646 
647 	if (qres.reserved)
648 		return 0;
649 
650 	return -EADDRNOTAVAIL;
651 }
652 
653 static int
654 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
655 					     unsigned long apid)
656 {
657 	int ret;
658 	unsigned long apqi;
659 	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
660 
661 	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
662 		return vfio_ap_verify_queue_reserved(&apid, NULL);
663 
664 	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
665 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
666 		if (ret)
667 			return ret;
668 	}
669 
670 	return 0;
671 }
672 
673 /**
674  * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
675  *
676  * @matrix_mdev: the mediated matrix device
677  *
678  * Verifies that the APQNs derived from the cross product of the AP adapter IDs
679  * and AP queue indexes comprising the AP matrix are not configured for another
680  * mediated device. AP queue sharing is not allowed.
681  *
682  * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
683  */
684 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
685 {
686 	struct ap_matrix_mdev *lstdev;
687 	DECLARE_BITMAP(apm, AP_DEVICES);
688 	DECLARE_BITMAP(aqm, AP_DOMAINS);
689 
690 	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
691 		if (matrix_mdev == lstdev)
692 			continue;
693 
694 		memset(apm, 0, sizeof(apm));
695 		memset(aqm, 0, sizeof(aqm));
696 
697 		/*
698 		 * We work on full longs, as we can only exclude the leftover
699 		 * bits in non-inverse order. The leftover is all zeros.
700 		 */
701 		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
702 				lstdev->matrix.apm, AP_DEVICES))
703 			continue;
704 
705 		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
706 				lstdev->matrix.aqm, AP_DOMAINS))
707 			continue;
708 
709 		return -EADDRINUSE;
710 	}
711 
712 	return 0;
713 }
714 
715 /**
716  * assign_adapter_store - parses the APID from @buf and sets the
717  * corresponding bit in the mediated matrix device's APM
718  *
719  * @dev:	the matrix device
720  * @attr:	the mediated matrix device's assign_adapter attribute
721  * @buf:	a buffer containing the AP adapter number (APID) to
722  *		be assigned
723  * @count:	the number of bytes in @buf
724  *
725  * Return: the number of bytes processed if the APID is valid; otherwise,
726  * returns one of the following errors:
727  *
728  *	1. -EINVAL
729  *	   The APID is not a valid number
730  *
731  *	2. -ENODEV
732  *	   The APID exceeds the maximum value configured for the system
733  *
734  *	3. -EADDRNOTAVAIL
735  *	   An APQN derived from the cross product of the APID being assigned
736  *	   and the APQIs previously assigned is not bound to the vfio_ap device
737  *	   driver; or, if no APQIs have yet been assigned, the APID is not
738  *	   contained in an APQN bound to the vfio_ap device driver.
739  *
740  *	4. -EADDRINUSE
741  *	   An APQN derived from the cross product of the APID being assigned
742  *	   and the APQIs previously assigned is being used by another mediated
743  *	   matrix device
744  */
745 static ssize_t assign_adapter_store(struct device *dev,
746 				    struct device_attribute *attr,
747 				    const char *buf, size_t count)
748 {
749 	int ret;
750 	unsigned long apid;
751 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
752 
753 	mutex_lock(&matrix_dev->lock);
754 
755 	/* If the KVM guest is running, disallow assignment of adapter */
756 	if (matrix_mdev->kvm) {
757 		ret = -EBUSY;
758 		goto done;
759 	}
760 
761 	ret = kstrtoul(buf, 0, &apid);
762 	if (ret)
763 		goto done;
764 
765 	if (apid > matrix_mdev->matrix.apm_max) {
766 		ret = -ENODEV;
767 		goto done;
768 	}
769 
770 	/*
771 	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
772 	 * number (APID). The bits in the mask, from most significant to least
773 	 * significant bit, correspond to APIDs 0-255.
774 	 */
775 	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
776 	if (ret)
777 		goto done;
778 
779 	set_bit_inv(apid, matrix_mdev->matrix.apm);
780 
781 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
782 	if (ret)
783 		goto share_err;
784 
785 	ret = count;
786 	goto done;
787 
788 share_err:
789 	clear_bit_inv(apid, matrix_mdev->matrix.apm);
790 done:
791 	mutex_unlock(&matrix_dev->lock);
792 
793 	return ret;
794 }
795 static DEVICE_ATTR_WO(assign_adapter);
796 
797 /**
798  * unassign_adapter_store - parses the APID from @buf and clears the
799  * corresponding bit in the mediated matrix device's APM
800  *
801  * @dev:	the matrix device
802  * @attr:	the mediated matrix device's unassign_adapter attribute
803  * @buf:	a buffer containing the adapter number (APID) to be unassigned
804  * @count:	the number of bytes in @buf
805  *
806  * Return: the number of bytes processed if the APID is valid; otherwise,
807  * returns one of the following errors:
808  *	-EINVAL if the APID is not a number
809  *	-ENODEV if the APID it exceeds the maximum value configured for the
810  *		system
811  */
812 static ssize_t unassign_adapter_store(struct device *dev,
813 				      struct device_attribute *attr,
814 				      const char *buf, size_t count)
815 {
816 	int ret;
817 	unsigned long apid;
818 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
819 
820 	mutex_lock(&matrix_dev->lock);
821 
822 	/* If the KVM guest is running, disallow unassignment of adapter */
823 	if (matrix_mdev->kvm) {
824 		ret = -EBUSY;
825 		goto done;
826 	}
827 
828 	ret = kstrtoul(buf, 0, &apid);
829 	if (ret)
830 		goto done;
831 
832 	if (apid > matrix_mdev->matrix.apm_max) {
833 		ret = -ENODEV;
834 		goto done;
835 	}
836 
837 	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
838 	ret = count;
839 done:
840 	mutex_unlock(&matrix_dev->lock);
841 	return ret;
842 }
843 static DEVICE_ATTR_WO(unassign_adapter);
844 
845 static int
846 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
847 					     unsigned long apqi)
848 {
849 	int ret;
850 	unsigned long apid;
851 	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
852 
853 	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
854 		return vfio_ap_verify_queue_reserved(NULL, &apqi);
855 
856 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
857 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
858 		if (ret)
859 			return ret;
860 	}
861 
862 	return 0;
863 }
864 
865 /**
866  * assign_domain_store - parses the APQI from @buf and sets the
867  * corresponding bit in the mediated matrix device's AQM
868  *
869  * @dev:	the matrix device
870  * @attr:	the mediated matrix device's assign_domain attribute
871  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
872  *		be assigned
873  * @count:	the number of bytes in @buf
874  *
875  * Return: the number of bytes processed if the APQI is valid; otherwise returns
876  * one of the following errors:
877  *
878  *	1. -EINVAL
879  *	   The APQI is not a valid number
880  *
881  *	2. -ENODEV
882  *	   The APQI exceeds the maximum value configured for the system
883  *
884  *	3. -EADDRNOTAVAIL
885  *	   An APQN derived from the cross product of the APQI being assigned
886  *	   and the APIDs previously assigned is not bound to the vfio_ap device
887  *	   driver; or, if no APIDs have yet been assigned, the APQI is not
888  *	   contained in an APQN bound to the vfio_ap device driver.
889  *
890  *	4. -EADDRINUSE
891  *	   An APQN derived from the cross product of the APQI being assigned
892  *	   and the APIDs previously assigned is being used by another mediated
893  *	   matrix device
894  */
895 static ssize_t assign_domain_store(struct device *dev,
896 				   struct device_attribute *attr,
897 				   const char *buf, size_t count)
898 {
899 	int ret;
900 	unsigned long apqi;
901 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
902 	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
903 
904 	mutex_lock(&matrix_dev->lock);
905 
906 	/* If the KVM guest is running, disallow assignment of domain */
907 	if (matrix_mdev->kvm) {
908 		ret = -EBUSY;
909 		goto done;
910 	}
911 
912 	ret = kstrtoul(buf, 0, &apqi);
913 	if (ret)
914 		goto done;
915 	if (apqi > max_apqi) {
916 		ret = -ENODEV;
917 		goto done;
918 	}
919 
920 	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
921 	if (ret)
922 		goto done;
923 
924 	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
925 
926 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
927 	if (ret)
928 		goto share_err;
929 
930 	ret = count;
931 	goto done;
932 
933 share_err:
934 	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
935 done:
936 	mutex_unlock(&matrix_dev->lock);
937 
938 	return ret;
939 }
940 static DEVICE_ATTR_WO(assign_domain);
941 
942 
943 /**
944  * unassign_domain_store - parses the APQI from @buf and clears the
945  * corresponding bit in the mediated matrix device's AQM
946  *
947  * @dev:	the matrix device
948  * @attr:	the mediated matrix device's unassign_domain attribute
949  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
950  *		be unassigned
951  * @count:	the number of bytes in @buf
952  *
953  * Return: the number of bytes processed if the APQI is valid; otherwise,
954  * returns one of the following errors:
955  *	-EINVAL if the APQI is not a number
956  *	-ENODEV if the APQI exceeds the maximum value configured for the system
957  */
958 static ssize_t unassign_domain_store(struct device *dev,
959 				     struct device_attribute *attr,
960 				     const char *buf, size_t count)
961 {
962 	int ret;
963 	unsigned long apqi;
964 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
965 
966 	mutex_lock(&matrix_dev->lock);
967 
968 	/* If the KVM guest is running, disallow unassignment of domain */
969 	if (matrix_mdev->kvm) {
970 		ret = -EBUSY;
971 		goto done;
972 	}
973 
974 	ret = kstrtoul(buf, 0, &apqi);
975 	if (ret)
976 		goto done;
977 
978 	if (apqi > matrix_mdev->matrix.aqm_max) {
979 		ret = -ENODEV;
980 		goto done;
981 	}
982 
983 	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
984 	ret = count;
985 
986 done:
987 	mutex_unlock(&matrix_dev->lock);
988 	return ret;
989 }
990 static DEVICE_ATTR_WO(unassign_domain);
991 
992 /**
993  * assign_control_domain_store - parses the domain ID from @buf and sets
994  * the corresponding bit in the mediated matrix device's ADM
995  *
996  * @dev:	the matrix device
997  * @attr:	the mediated matrix device's assign_control_domain attribute
998  * @buf:	a buffer containing the domain ID to be assigned
999  * @count:	the number of bytes in @buf
1000  *
1001  * Return: the number of bytes processed if the domain ID is valid; otherwise,
1002  * returns one of the following errors:
1003  *	-EINVAL if the ID is not a number
1004  *	-ENODEV if the ID exceeds the maximum value configured for the system
1005  */
1006 static ssize_t assign_control_domain_store(struct device *dev,
1007 					   struct device_attribute *attr,
1008 					   const char *buf, size_t count)
1009 {
1010 	int ret;
1011 	unsigned long id;
1012 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1013 
1014 	mutex_lock(&matrix_dev->lock);
1015 
1016 	/* If the KVM guest is running, disallow assignment of control domain */
1017 	if (matrix_mdev->kvm) {
1018 		ret = -EBUSY;
1019 		goto done;
1020 	}
1021 
1022 	ret = kstrtoul(buf, 0, &id);
1023 	if (ret)
1024 		goto done;
1025 
1026 	if (id > matrix_mdev->matrix.adm_max) {
1027 		ret = -ENODEV;
1028 		goto done;
1029 	}
1030 
1031 	/* Set the bit in the ADM (bitmask) corresponding to the AP control
1032 	 * domain number (id). The bits in the mask, from most significant to
1033 	 * least significant, correspond to IDs 0 up to the one less than the
1034 	 * number of control domains that can be assigned.
1035 	 */
1036 	set_bit_inv(id, matrix_mdev->matrix.adm);
1037 	ret = count;
1038 done:
1039 	mutex_unlock(&matrix_dev->lock);
1040 	return ret;
1041 }
1042 static DEVICE_ATTR_WO(assign_control_domain);
1043 
1044 /**
1045  * unassign_control_domain_store - parses the domain ID from @buf and
1046  * clears the corresponding bit in the mediated matrix device's ADM
1047  *
1048  * @dev:	the matrix device
1049  * @attr:	the mediated matrix device's unassign_control_domain attribute
1050  * @buf:	a buffer containing the domain ID to be unassigned
1051  * @count:	the number of bytes in @buf
1052  *
1053  * Return: the number of bytes processed if the domain ID is valid; otherwise,
1054  * returns one of the following errors:
1055  *	-EINVAL if the ID is not a number
1056  *	-ENODEV if the ID exceeds the maximum value configured for the system
1057  */
1058 static ssize_t unassign_control_domain_store(struct device *dev,
1059 					     struct device_attribute *attr,
1060 					     const char *buf, size_t count)
1061 {
1062 	int ret;
1063 	unsigned long domid;
1064 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1065 	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
1066 
1067 	mutex_lock(&matrix_dev->lock);
1068 
1069 	/* If a KVM guest is running, disallow unassignment of control domain */
1070 	if (matrix_mdev->kvm) {
1071 		ret = -EBUSY;
1072 		goto done;
1073 	}
1074 
1075 	ret = kstrtoul(buf, 0, &domid);
1076 	if (ret)
1077 		goto done;
1078 	if (domid > max_domid) {
1079 		ret = -ENODEV;
1080 		goto done;
1081 	}
1082 
1083 	clear_bit_inv(domid, matrix_mdev->matrix.adm);
1084 	ret = count;
1085 done:
1086 	mutex_unlock(&matrix_dev->lock);
1087 	return ret;
1088 }
1089 static DEVICE_ATTR_WO(unassign_control_domain);
1090 
1091 static ssize_t control_domains_show(struct device *dev,
1092 				    struct device_attribute *dev_attr,
1093 				    char *buf)
1094 {
1095 	unsigned long id;
1096 	int nchars = 0;
1097 	int n;
1098 	char *bufpos = buf;
1099 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1100 	unsigned long max_domid = matrix_mdev->matrix.adm_max;
1101 
1102 	mutex_lock(&matrix_dev->lock);
1103 	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
1104 		n = sprintf(bufpos, "%04lx\n", id);
1105 		bufpos += n;
1106 		nchars += n;
1107 	}
1108 	mutex_unlock(&matrix_dev->lock);
1109 
1110 	return nchars;
1111 }
1112 static DEVICE_ATTR_RO(control_domains);
1113 
1114 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
1115 			   char *buf)
1116 {
1117 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1118 	char *bufpos = buf;
1119 	unsigned long apid;
1120 	unsigned long apqi;
1121 	unsigned long apid1;
1122 	unsigned long apqi1;
1123 	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
1124 	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
1125 	int nchars = 0;
1126 	int n;
1127 
1128 	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1129 	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1130 
1131 	mutex_lock(&matrix_dev->lock);
1132 
1133 	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1134 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1135 			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1136 					     naqm_bits) {
1137 				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1138 					    apqi);
1139 				bufpos += n;
1140 				nchars += n;
1141 			}
1142 		}
1143 	} else if (apid1 < napm_bits) {
1144 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1145 			n = sprintf(bufpos, "%02lx.\n", apid);
1146 			bufpos += n;
1147 			nchars += n;
1148 		}
1149 	} else if (apqi1 < naqm_bits) {
1150 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1151 			n = sprintf(bufpos, ".%04lx\n", apqi);
1152 			bufpos += n;
1153 			nchars += n;
1154 		}
1155 	}
1156 
1157 	mutex_unlock(&matrix_dev->lock);
1158 
1159 	return nchars;
1160 }
1161 static DEVICE_ATTR_RO(matrix);
1162 
1163 static struct attribute *vfio_ap_mdev_attrs[] = {
1164 	&dev_attr_assign_adapter.attr,
1165 	&dev_attr_unassign_adapter.attr,
1166 	&dev_attr_assign_domain.attr,
1167 	&dev_attr_unassign_domain.attr,
1168 	&dev_attr_assign_control_domain.attr,
1169 	&dev_attr_unassign_control_domain.attr,
1170 	&dev_attr_control_domains.attr,
1171 	&dev_attr_matrix.attr,
1172 	NULL,
1173 };
1174 
1175 static struct attribute_group vfio_ap_mdev_attr_group = {
1176 	.attrs = vfio_ap_mdev_attrs
1177 };
1178 
1179 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1180 	&vfio_ap_mdev_attr_group,
1181 	NULL
1182 };
1183 
1184 /**
1185  * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
1186  * to manage AP resources for the guest whose state is represented by @kvm
1187  *
1188  * @matrix_mdev: a mediated matrix device
1189  * @kvm: reference to KVM instance
1190  *
1191  * Return: 0 if no other mediated matrix device has a reference to @kvm;
1192  * otherwise, returns an -EPERM.
1193  */
1194 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1195 				struct kvm *kvm)
1196 {
1197 	struct ap_matrix_mdev *m;
1198 
1199 	if (kvm->arch.crypto.crycbd) {
1200 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1201 		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1202 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1203 
1204 		mutex_lock(&kvm->lock);
1205 		mutex_lock(&matrix_dev->lock);
1206 
1207 		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1208 			if (m != matrix_mdev && m->kvm == kvm) {
1209 				mutex_unlock(&kvm->lock);
1210 				mutex_unlock(&matrix_dev->lock);
1211 				return -EPERM;
1212 			}
1213 		}
1214 
1215 		kvm_get_kvm(kvm);
1216 		matrix_mdev->kvm = kvm;
1217 		kvm_arch_crypto_set_masks(kvm,
1218 					  matrix_mdev->matrix.apm,
1219 					  matrix_mdev->matrix.aqm,
1220 					  matrix_mdev->matrix.adm);
1221 
1222 		mutex_unlock(&kvm->lock);
1223 		mutex_unlock(&matrix_dev->lock);
1224 	}
1225 
1226 	return 0;
1227 }
1228 
1229 /**
1230  * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
1231  *
1232  * @nb: The notifier block
1233  * @action: Action to be taken
1234  * @data: data associated with the request
1235  *
1236  * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1237  * pinned before). Other requests are ignored.
1238  *
1239  * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
1240  */
1241 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1242 				       unsigned long action, void *data)
1243 {
1244 	struct ap_matrix_mdev *matrix_mdev;
1245 
1246 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1247 
1248 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1249 		struct vfio_iommu_type1_dma_unmap *unmap = data;
1250 		unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1251 
1252 		vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
1253 		return NOTIFY_OK;
1254 	}
1255 
1256 	return NOTIFY_DONE;
1257 }
1258 
1259 /**
1260  * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
1261  * by @matrix_mdev.
1262  *
1263  * @matrix_mdev: a matrix mediated device
1264  */
1265 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
1266 {
1267 	struct kvm *kvm = matrix_mdev->kvm;
1268 
1269 	if (kvm && kvm->arch.crypto.crycbd) {
1270 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1271 		kvm->arch.crypto.pqap_hook = NULL;
1272 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1273 
1274 		mutex_lock(&kvm->lock);
1275 		mutex_lock(&matrix_dev->lock);
1276 
1277 		kvm_arch_crypto_clear_masks(kvm);
1278 		vfio_ap_mdev_reset_queues(matrix_mdev);
1279 		kvm_put_kvm(kvm);
1280 		matrix_mdev->kvm = NULL;
1281 
1282 		mutex_unlock(&kvm->lock);
1283 		mutex_unlock(&matrix_dev->lock);
1284 	}
1285 }
1286 
1287 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1288 {
1289 	struct device *dev;
1290 	struct vfio_ap_queue *q = NULL;
1291 
1292 	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1293 				 &apqn, match_apqn);
1294 	if (dev) {
1295 		q = dev_get_drvdata(dev);
1296 		put_device(dev);
1297 	}
1298 
1299 	return q;
1300 }
1301 
1302 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1303 			     unsigned int retry)
1304 {
1305 	struct ap_queue_status status;
1306 	int ret;
1307 	int retry2 = 2;
1308 
1309 	if (!q)
1310 		return 0;
1311 
1312 retry_zapq:
1313 	status = ap_zapq(q->apqn);
1314 	switch (status.response_code) {
1315 	case AP_RESPONSE_NORMAL:
1316 		ret = 0;
1317 		break;
1318 	case AP_RESPONSE_RESET_IN_PROGRESS:
1319 		if (retry--) {
1320 			msleep(20);
1321 			goto retry_zapq;
1322 		}
1323 		ret = -EBUSY;
1324 		break;
1325 	case AP_RESPONSE_Q_NOT_AVAIL:
1326 	case AP_RESPONSE_DECONFIGURED:
1327 	case AP_RESPONSE_CHECKSTOPPED:
1328 		WARN_ON_ONCE(status.irq_enabled);
1329 		ret = -EBUSY;
1330 		goto free_resources;
1331 	default:
1332 		/* things are really broken, give up */
1333 		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1334 		     status.response_code);
1335 		return -EIO;
1336 	}
1337 
1338 	/* wait for the reset to take effect */
1339 	while (retry2--) {
1340 		if (status.queue_empty && !status.irq_enabled)
1341 			break;
1342 		msleep(20);
1343 		status = ap_tapq(q->apqn, NULL);
1344 	}
1345 	WARN_ON_ONCE(retry2 <= 0);
1346 
1347 free_resources:
1348 	vfio_ap_free_aqic_resources(q);
1349 
1350 	return ret;
1351 }
1352 
1353 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
1354 {
1355 	int ret;
1356 	int rc = 0;
1357 	unsigned long apid, apqi;
1358 	struct vfio_ap_queue *q;
1359 
1360 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1361 			     matrix_mdev->matrix.apm_max + 1) {
1362 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1363 				     matrix_mdev->matrix.aqm_max + 1) {
1364 			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1365 			ret = vfio_ap_mdev_reset_queue(q, 1);
1366 			/*
1367 			 * Regardless whether a queue turns out to be busy, or
1368 			 * is not operational, we need to continue resetting
1369 			 * the remaining queues.
1370 			 */
1371 			if (ret)
1372 				rc = ret;
1373 		}
1374 	}
1375 
1376 	return rc;
1377 }
1378 
1379 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
1380 {
1381 	struct ap_matrix_mdev *matrix_mdev =
1382 		container_of(vdev, struct ap_matrix_mdev, vdev);
1383 	unsigned long events;
1384 	int ret;
1385 
1386 	if (!vdev->kvm)
1387 		return -EINVAL;
1388 
1389 	ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
1390 	if (ret)
1391 		return ret;
1392 
1393 	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1394 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1395 	ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
1396 				     &matrix_mdev->iommu_notifier);
1397 	if (ret)
1398 		goto err_kvm;
1399 	return 0;
1400 
1401 err_kvm:
1402 	vfio_ap_mdev_unset_kvm(matrix_mdev);
1403 	return ret;
1404 }
1405 
1406 static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
1407 {
1408 	struct ap_matrix_mdev *matrix_mdev =
1409 		container_of(vdev, struct ap_matrix_mdev, vdev);
1410 
1411 	vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
1412 				 &matrix_mdev->iommu_notifier);
1413 	vfio_ap_mdev_unset_kvm(matrix_mdev);
1414 }
1415 
1416 static int vfio_ap_mdev_get_device_info(unsigned long arg)
1417 {
1418 	unsigned long minsz;
1419 	struct vfio_device_info info;
1420 
1421 	minsz = offsetofend(struct vfio_device_info, num_irqs);
1422 
1423 	if (copy_from_user(&info, (void __user *)arg, minsz))
1424 		return -EFAULT;
1425 
1426 	if (info.argsz < minsz)
1427 		return -EINVAL;
1428 
1429 	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1430 	info.num_regions = 0;
1431 	info.num_irqs = 0;
1432 
1433 	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1434 }
1435 
1436 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
1437 				    unsigned int cmd, unsigned long arg)
1438 {
1439 	struct ap_matrix_mdev *matrix_mdev =
1440 		container_of(vdev, struct ap_matrix_mdev, vdev);
1441 	int ret;
1442 
1443 	mutex_lock(&matrix_dev->lock);
1444 	switch (cmd) {
1445 	case VFIO_DEVICE_GET_INFO:
1446 		ret = vfio_ap_mdev_get_device_info(arg);
1447 		break;
1448 	case VFIO_DEVICE_RESET:
1449 		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
1450 		break;
1451 	default:
1452 		ret = -EOPNOTSUPP;
1453 		break;
1454 	}
1455 	mutex_unlock(&matrix_dev->lock);
1456 
1457 	return ret;
1458 }
1459 
1460 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
1461 	.open_device = vfio_ap_mdev_open_device,
1462 	.close_device = vfio_ap_mdev_close_device,
1463 	.ioctl = vfio_ap_mdev_ioctl,
1464 };
1465 
1466 static struct mdev_driver vfio_ap_matrix_driver = {
1467 	.driver = {
1468 		.name = "vfio_ap_mdev",
1469 		.owner = THIS_MODULE,
1470 		.mod_name = KBUILD_MODNAME,
1471 		.dev_groups = vfio_ap_mdev_attr_groups,
1472 	},
1473 	.probe = vfio_ap_mdev_probe,
1474 	.remove = vfio_ap_mdev_remove,
1475 	.supported_type_groups = vfio_ap_mdev_type_groups,
1476 };
1477 
1478 int vfio_ap_mdev_register(void)
1479 {
1480 	int ret;
1481 
1482 	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1483 
1484 	ret = mdev_register_driver(&vfio_ap_matrix_driver);
1485 	if (ret)
1486 		return ret;
1487 
1488 	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_driver);
1489 	if (ret)
1490 		goto err_driver;
1491 	return 0;
1492 
1493 err_driver:
1494 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1495 	return ret;
1496 }
1497 
1498 void vfio_ap_mdev_unregister(void)
1499 {
1500 	mdev_unregister_device(&matrix_dev->device);
1501 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1502 }
1503