xref: /linux/drivers/s390/crypto/vfio_ap_ops.c (revision 99a15348d5842b3c1f95220dc9b119ee0fe0d81b)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Adjunct processor matrix VFIO device driver callbacks.
4  *
5  * Copyright IBM Corp. 2018
6  *
7  * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
8  *	      Halil Pasic <pasic@linux.ibm.com>
9  *	      Pierre Morel <pmorel@linux.ibm.com>
10  */
11 #include <linux/string.h>
12 #include <linux/vfio.h>
13 #include <linux/device.h>
14 #include <linux/list.h>
15 #include <linux/ctype.h>
16 #include <linux/bitops.h>
17 #include <linux/kvm_host.h>
18 #include <linux/module.h>
19 #include <linux/uuid.h>
20 #include <asm/kvm.h>
21 #include <asm/zcrypt.h>
22 
23 #include "vfio_ap_private.h"
24 #include "vfio_ap_debug.h"
25 
26 #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
27 #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
28 
29 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
30 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
31 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
32 
33 static int match_apqn(struct device *dev, const void *data)
34 {
35 	struct vfio_ap_queue *q = dev_get_drvdata(dev);
36 
37 	return (q->apqn == *(int *)(data)) ? 1 : 0;
38 }
39 
40 /**
41  * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
42  * @matrix_mdev: the associated mediated matrix
43  * @apqn: The queue APQN
44  *
45  * Retrieve a queue with a specific APQN from the list of the
46  * devices of the vfio_ap_drv.
47  * Verify that the APID and the APQI are set in the matrix.
48  *
49  * Return: the pointer to the associated vfio_ap_queue
50  */
51 static struct vfio_ap_queue *vfio_ap_get_queue(
52 					struct ap_matrix_mdev *matrix_mdev,
53 					int apqn)
54 {
55 	struct vfio_ap_queue *q;
56 
57 	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
58 		return NULL;
59 	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
60 		return NULL;
61 
62 	q = vfio_ap_find_queue(apqn);
63 	if (q)
64 		q->matrix_mdev = matrix_mdev;
65 
66 	return q;
67 }
68 
69 /**
70  * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
71  * @apqn: The AP Queue number
72  *
73  * Checks the IRQ bit for the status of this APQN using ap_tapq.
74  * Returns if the ap_tapq function succeeded and the bit is clear.
75  * Returns if ap_tapq function failed with invalid, deconfigured or
76  * checkstopped AP.
77  * Otherwise retries up to 5 times after waiting 20ms.
78  */
79 static void vfio_ap_wait_for_irqclear(int apqn)
80 {
81 	struct ap_queue_status status;
82 	int retry = 5;
83 
84 	do {
85 		status = ap_tapq(apqn, NULL);
86 		switch (status.response_code) {
87 		case AP_RESPONSE_NORMAL:
88 		case AP_RESPONSE_RESET_IN_PROGRESS:
89 			if (!status.irq_enabled)
90 				return;
91 			fallthrough;
92 		case AP_RESPONSE_BUSY:
93 			msleep(20);
94 			break;
95 		case AP_RESPONSE_Q_NOT_AVAIL:
96 		case AP_RESPONSE_DECONFIGURED:
97 		case AP_RESPONSE_CHECKSTOPPED:
98 		default:
99 			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
100 				  status.response_code, apqn);
101 			return;
102 		}
103 	} while (--retry);
104 
105 	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
106 		  __func__, status.response_code, apqn);
107 }
108 
109 /**
110  * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
111  * @q: The vfio_ap_queue
112  *
113  * Unregisters the ISC in the GIB when the saved ISC not invalid.
114  * Unpins the guest's page holding the NIB when it exists.
115  * Resets the saved_pfn and saved_isc to invalid values.
116  */
117 static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
118 {
119 	if (!q)
120 		return;
121 	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
122 	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
123 		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
124 		q->saved_isc = VFIO_AP_ISC_INVALID;
125 	}
126 	if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
127 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
128 				 &q->saved_pfn, 1);
129 		q->saved_pfn = 0;
130 	}
131 }
132 
133 /**
134  * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
135  * @q: The vfio_ap_queue
136  *
137  * Uses ap_aqic to disable the interruption and in case of success, reset
138  * in progress or IRQ disable command already proceeded: calls
139  * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
140  * and calls vfio_ap_free_aqic_resources() to free the resources associated
141  * with the AP interrupt handling.
142  *
143  * In the case the AP is busy, or a reset is in progress,
144  * retries after 20ms, up to 5 times.
145  *
146  * Returns if ap_aqic function failed with invalid, deconfigured or
147  * checkstopped AP.
148  *
149  * Return: &struct ap_queue_status
150  */
151 static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
152 {
153 	struct ap_qirq_ctrl aqic_gisa = {};
154 	struct ap_queue_status status;
155 	int retries = 5;
156 
157 	do {
158 		status = ap_aqic(q->apqn, aqic_gisa, NULL);
159 		switch (status.response_code) {
160 		case AP_RESPONSE_OTHERWISE_CHANGED:
161 		case AP_RESPONSE_NORMAL:
162 			vfio_ap_wait_for_irqclear(q->apqn);
163 			goto end_free;
164 		case AP_RESPONSE_RESET_IN_PROGRESS:
165 		case AP_RESPONSE_BUSY:
166 			msleep(20);
167 			break;
168 		case AP_RESPONSE_Q_NOT_AVAIL:
169 		case AP_RESPONSE_DECONFIGURED:
170 		case AP_RESPONSE_CHECKSTOPPED:
171 		case AP_RESPONSE_INVALID_ADDRESS:
172 		default:
173 			/* All cases in default means AP not operational */
174 			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
175 				  status.response_code);
176 			goto end_free;
177 		}
178 	} while (retries--);
179 
180 	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
181 		  status.response_code);
182 end_free:
183 	vfio_ap_free_aqic_resources(q);
184 	q->matrix_mdev = NULL;
185 	return status;
186 }
187 
188 /**
189  * vfio_ap_validate_nib - validate a notification indicator byte (nib) address.
190  *
191  * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction.
192  * @nib: the location for storing the nib address.
193  * @g_pfn: the location for storing the page frame number of the page containing
194  *	   the nib.
195  *
196  * When the PQAP(AQIC) instruction is executed, general register 2 contains the
197  * address of the notification indicator byte (nib) used for IRQ notification.
198  * This function parses the nib from gr2 and calculates the page frame
199  * number for the guest of the page containing the nib. The values are
200  * stored in @nib and @g_pfn respectively.
201  *
202  * The g_pfn of the nib is then validated to ensure the nib address is valid.
203  *
204  * Return: returns zero if the nib address is a valid; otherwise, returns
205  *	   -EINVAL.
206  */
207 static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib,
208 				unsigned long *g_pfn)
209 {
210 	*nib = vcpu->run->s.regs.gprs[2];
211 	*g_pfn = *nib >> PAGE_SHIFT;
212 
213 	if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn)))
214 		return -EINVAL;
215 
216 	return 0;
217 }
218 
219 /**
220  * vfio_ap_irq_enable - Enable Interruption for a APQN
221  *
222  * @q:	 the vfio_ap_queue holding AQIC parameters
223  * @isc: the guest ISC to register with the GIB interface
224  * @vcpu: the vcpu object containing the registers specifying the parameters
225  *	  passed to the PQAP(AQIC) instruction.
226  *
227  * Pin the NIB saved in *q
228  * Register the guest ISC to GIB interface and retrieve the
229  * host ISC to issue the host side PQAP/AQIC
230  *
231  * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
232  * vfio_pin_pages failed.
233  *
234  * Otherwise return the ap_queue_status returned by the ap_aqic(),
235  * all retry handling will be done by the guest.
236  *
237  * Return: &struct ap_queue_status
238  */
239 static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
240 						 int isc,
241 						 struct kvm_vcpu *vcpu)
242 {
243 	unsigned long nib;
244 	struct ap_qirq_ctrl aqic_gisa = {};
245 	struct ap_queue_status status = {};
246 	struct kvm_s390_gisa *gisa;
247 	int nisc;
248 	struct kvm *kvm;
249 	unsigned long h_nib, g_pfn, h_pfn;
250 	int ret;
251 
252 	/* Verify that the notification indicator byte address is valid */
253 	if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) {
254 		VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
255 				 __func__, nib, g_pfn, q->apqn);
256 
257 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
258 		return status;
259 	}
260 
261 	ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
262 			     IOMMU_READ | IOMMU_WRITE, &h_pfn);
263 	switch (ret) {
264 	case 1:
265 		break;
266 	default:
267 		VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d,"
268 				 "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
269 				 __func__, ret, nib, g_pfn, q->apqn);
270 
271 		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
272 		return status;
273 	}
274 
275 	kvm = q->matrix_mdev->kvm;
276 	gisa = kvm->arch.gisa_int.origin;
277 
278 	h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
279 	aqic_gisa.gisc = isc;
280 
281 	nisc = kvm_s390_gisc_register(kvm, isc);
282 	if (nisc < 0) {
283 		VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
284 				 __func__, nisc, isc, q->apqn);
285 
286 		status.response_code = AP_RESPONSE_INVALID_GISA;
287 		return status;
288 	}
289 
290 	aqic_gisa.isc = nisc;
291 	aqic_gisa.ir = 1;
292 	aqic_gisa.gisa = (uint64_t)gisa >> 4;
293 
294 	status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
295 	switch (status.response_code) {
296 	case AP_RESPONSE_NORMAL:
297 		/* See if we did clear older IRQ configuration */
298 		vfio_ap_free_aqic_resources(q);
299 		q->saved_pfn = g_pfn;
300 		q->saved_isc = isc;
301 		break;
302 	case AP_RESPONSE_OTHERWISE_CHANGED:
303 		/* We could not modify IRQ setings: clear new configuration */
304 		vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
305 		kvm_s390_gisc_unregister(kvm, isc);
306 		break;
307 	default:
308 		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
309 			status.response_code);
310 		vfio_ap_irq_disable(q);
311 		break;
312 	}
313 
314 	if (status.response_code != AP_RESPONSE_NORMAL) {
315 		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: "
316 				 "zone=%#x, ir=%#x, gisc=%#x, f=%#x,"
317 				 "gisa=%#x, isc=%#x, apqn=%#04x\n",
318 				 __func__, status.response_code,
319 				 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc,
320 				 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc,
321 				 q->apqn);
322 	}
323 
324 	return status;
325 }
326 
327 /**
328  * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array
329  *				of big endian elements that can be passed by
330  *				value to an s390dbf sprintf event function to
331  *				format a UUID string.
332  *
333  * @guid: the object containing the little endian guid
334  * @uuid: a six-element array of long values that can be passed by value as
335  *	  arguments for a formatting string specifying a UUID.
336  *
337  * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf
338  * event functions if the memory for the passed string is available as long as
339  * the debug feature exists. Since a mediated device can be removed at any
340  * time, it's name can not be used because %s passes the reference to the string
341  * in memory and the reference will go stale once the device is removed .
342  *
343  * The s390dbf string formatting function allows a maximum of 9 arguments for a
344  * message to be displayed in the 'sprintf' view. In order to use the bytes
345  * comprising the mediated device's UUID to display the mediated device name,
346  * they will have to be converted into an array whose elements can be passed by
347  * value to sprintf. For example:
348  *
349  * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 }
350  * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804
351  * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 }
352  * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx"
353  */
354 static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid)
355 {
356 	/*
357 	 * The input guid is ordered in little endian, so it needs to be
358 	 * reordered for displaying a UUID as a string. This specifies the
359 	 * guid indices in proper order.
360 	 */
361 	uuid[0] = le32_to_cpup((__le32 *)guid);
362 	uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]);
363 	uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]);
364 	uuid[3] = *((__u16 *)&guid->b[8]);
365 	uuid[4] = *((__u16 *)&guid->b[10]);
366 	uuid[5] = *((__u32 *)&guid->b[12]);
367 }
368 
369 /**
370  * handle_pqap - PQAP instruction callback
371  *
372  * @vcpu: The vcpu on which we received the PQAP instruction
373  *
374  * Get the general register contents to initialize internal variables.
375  * REG[0]: APQN
376  * REG[1]: IR and ISC
377  * REG[2]: NIB
378  *
379  * Response.status may be set to following Response Code:
380  * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
381  * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
382  * - AP_RESPONSE_NORMAL (0) : in case of successs
383  *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
384  * We take the matrix_dev lock to ensure serialization on queues and
385  * mediated device access.
386  *
387  * Return: 0 if we could handle the request inside KVM.
388  * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
389  */
390 static int handle_pqap(struct kvm_vcpu *vcpu)
391 {
392 	uint64_t status;
393 	uint16_t apqn;
394 	unsigned long uuid[6];
395 	struct vfio_ap_queue *q;
396 	struct ap_queue_status qstatus = {
397 			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
398 	struct ap_matrix_mdev *matrix_mdev;
399 
400 	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
401 
402 	/* If we do not use the AIV facility just go to userland */
403 	if (!(vcpu->arch.sie_block->eca & ECA_AIV)) {
404 		VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n",
405 				 __func__, apqn, vcpu->arch.sie_block->eca);
406 
407 		return -EOPNOTSUPP;
408 	}
409 
410 	mutex_lock(&matrix_dev->lock);
411 	if (!vcpu->kvm->arch.crypto.pqap_hook) {
412 		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n",
413 				 __func__, apqn);
414 		goto out_unlock;
415 	}
416 
417 	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
418 				   struct ap_matrix_mdev, pqap_hook);
419 
420 	/* If the there is no guest using the mdev, there is nothing to do */
421 	if (!matrix_mdev->kvm) {
422 		vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid);
423 		VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n",
424 				 __func__, uuid[0],  uuid[1], uuid[2],
425 				 uuid[3], uuid[4], uuid[5], apqn);
426 		goto out_unlock;
427 	}
428 
429 	q = vfio_ap_get_queue(matrix_mdev, apqn);
430 	if (!q) {
431 		VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n",
432 				 __func__, AP_QID_CARD(apqn),
433 				 AP_QID_QUEUE(apqn));
434 		goto out_unlock;
435 	}
436 
437 	status = vcpu->run->s.regs.gprs[1];
438 
439 	/* If IR bit(16) is set we enable the interrupt */
440 	if ((status >> (63 - 16)) & 0x01)
441 		qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu);
442 	else
443 		qstatus = vfio_ap_irq_disable(q);
444 
445 out_unlock:
446 	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
447 	vcpu->run->s.regs.gprs[1] >>= 32;
448 	mutex_unlock(&matrix_dev->lock);
449 	return 0;
450 }
451 
452 static void vfio_ap_matrix_init(struct ap_config_info *info,
453 				struct ap_matrix *matrix)
454 {
455 	matrix->apm_max = info->apxa ? info->Na : 63;
456 	matrix->aqm_max = info->apxa ? info->Nd : 15;
457 	matrix->adm_max = info->apxa ? info->Nd : 15;
458 }
459 
460 static int vfio_ap_mdev_probe(struct mdev_device *mdev)
461 {
462 	struct ap_matrix_mdev *matrix_mdev;
463 	int ret;
464 
465 	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
466 		return -EPERM;
467 
468 	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
469 	if (!matrix_mdev) {
470 		ret = -ENOMEM;
471 		goto err_dec_available;
472 	}
473 	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
474 			    &vfio_ap_matrix_dev_ops);
475 
476 	matrix_mdev->mdev = mdev;
477 	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
478 	matrix_mdev->pqap_hook = handle_pqap;
479 	mutex_lock(&matrix_dev->lock);
480 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
481 	mutex_unlock(&matrix_dev->lock);
482 
483 	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
484 	if (ret)
485 		goto err_list;
486 	dev_set_drvdata(&mdev->dev, matrix_mdev);
487 	return 0;
488 
489 err_list:
490 	mutex_lock(&matrix_dev->lock);
491 	list_del(&matrix_mdev->node);
492 	mutex_unlock(&matrix_dev->lock);
493 	vfio_uninit_group_dev(&matrix_mdev->vdev);
494 	kfree(matrix_mdev);
495 err_dec_available:
496 	atomic_inc(&matrix_dev->available_instances);
497 	return ret;
498 }
499 
500 static void vfio_ap_mdev_remove(struct mdev_device *mdev)
501 {
502 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
503 
504 	vfio_unregister_group_dev(&matrix_mdev->vdev);
505 
506 	mutex_lock(&matrix_dev->lock);
507 	vfio_ap_mdev_reset_queues(matrix_mdev);
508 	list_del(&matrix_mdev->node);
509 	mutex_unlock(&matrix_dev->lock);
510 	vfio_uninit_group_dev(&matrix_mdev->vdev);
511 	kfree(matrix_mdev);
512 	atomic_inc(&matrix_dev->available_instances);
513 }
514 
515 static ssize_t name_show(struct mdev_type *mtype,
516 			 struct mdev_type_attribute *attr, char *buf)
517 {
518 	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
519 }
520 
521 static MDEV_TYPE_ATTR_RO(name);
522 
523 static ssize_t available_instances_show(struct mdev_type *mtype,
524 					struct mdev_type_attribute *attr,
525 					char *buf)
526 {
527 	return sprintf(buf, "%d\n",
528 		       atomic_read(&matrix_dev->available_instances));
529 }
530 
531 static MDEV_TYPE_ATTR_RO(available_instances);
532 
533 static ssize_t device_api_show(struct mdev_type *mtype,
534 			       struct mdev_type_attribute *attr, char *buf)
535 {
536 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
537 }
538 
539 static MDEV_TYPE_ATTR_RO(device_api);
540 
541 static struct attribute *vfio_ap_mdev_type_attrs[] = {
542 	&mdev_type_attr_name.attr,
543 	&mdev_type_attr_device_api.attr,
544 	&mdev_type_attr_available_instances.attr,
545 	NULL,
546 };
547 
548 static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
549 	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
550 	.attrs = vfio_ap_mdev_type_attrs,
551 };
552 
553 static struct attribute_group *vfio_ap_mdev_type_groups[] = {
554 	&vfio_ap_mdev_hwvirt_type_group,
555 	NULL,
556 };
557 
558 struct vfio_ap_queue_reserved {
559 	unsigned long *apid;
560 	unsigned long *apqi;
561 	bool reserved;
562 };
563 
564 /**
565  * vfio_ap_has_queue - determines if the AP queue containing the target in @data
566  *
567  * @dev: an AP queue device
568  * @data: a struct vfio_ap_queue_reserved reference
569  *
570  * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
571  * apid or apqi specified in @data:
572  *
573  * - If @data contains both an apid and apqi value, then @data will be flagged
574  *   as reserved if the APID and APQI fields for the AP queue device matches
575  *
576  * - If @data contains only an apid value, @data will be flagged as
577  *   reserved if the APID field in the AP queue device matches
578  *
579  * - If @data contains only an apqi value, @data will be flagged as
580  *   reserved if the APQI field in the AP queue device matches
581  *
582  * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
583  * @data does not contain either an apid or apqi.
584  */
585 static int vfio_ap_has_queue(struct device *dev, void *data)
586 {
587 	struct vfio_ap_queue_reserved *qres = data;
588 	struct ap_queue *ap_queue = to_ap_queue(dev);
589 	ap_qid_t qid;
590 	unsigned long id;
591 
592 	if (qres->apid && qres->apqi) {
593 		qid = AP_MKQID(*qres->apid, *qres->apqi);
594 		if (qid == ap_queue->qid)
595 			qres->reserved = true;
596 	} else if (qres->apid && !qres->apqi) {
597 		id = AP_QID_CARD(ap_queue->qid);
598 		if (id == *qres->apid)
599 			qres->reserved = true;
600 	} else if (!qres->apid && qres->apqi) {
601 		id = AP_QID_QUEUE(ap_queue->qid);
602 		if (id == *qres->apqi)
603 			qres->reserved = true;
604 	} else {
605 		return -EINVAL;
606 	}
607 
608 	return 0;
609 }
610 
611 /**
612  * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
613  * @apid or @aqpi is reserved
614  *
615  * @apid: an AP adapter ID
616  * @apqi: an AP queue index
617  *
618  * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
619  * driver according to the following rules:
620  *
621  * - If both @apid and @apqi are not NULL, then there must be an AP queue
622  *   device bound to the vfio_ap driver with the APQN identified by @apid and
623  *   @apqi
624  *
625  * - If only @apid is not NULL, then there must be an AP queue device bound
626  *   to the vfio_ap driver with an APQN containing @apid
627  *
628  * - If only @apqi is not NULL, then there must be an AP queue device bound
629  *   to the vfio_ap driver with an APQN containing @apqi
630  *
631  * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
632  */
633 static int vfio_ap_verify_queue_reserved(unsigned long *apid,
634 					 unsigned long *apqi)
635 {
636 	int ret;
637 	struct vfio_ap_queue_reserved qres;
638 
639 	qres.apid = apid;
640 	qres.apqi = apqi;
641 	qres.reserved = false;
642 
643 	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
644 				     &qres, vfio_ap_has_queue);
645 	if (ret)
646 		return ret;
647 
648 	if (qres.reserved)
649 		return 0;
650 
651 	return -EADDRNOTAVAIL;
652 }
653 
654 static int
655 vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
656 					     unsigned long apid)
657 {
658 	int ret;
659 	unsigned long apqi;
660 	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
661 
662 	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
663 		return vfio_ap_verify_queue_reserved(&apid, NULL);
664 
665 	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
666 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
667 		if (ret)
668 			return ret;
669 	}
670 
671 	return 0;
672 }
673 
674 /**
675  * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
676  *
677  * @matrix_mdev: the mediated matrix device
678  *
679  * Verifies that the APQNs derived from the cross product of the AP adapter IDs
680  * and AP queue indexes comprising the AP matrix are not configured for another
681  * mediated device. AP queue sharing is not allowed.
682  *
683  * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
684  */
685 static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
686 {
687 	struct ap_matrix_mdev *lstdev;
688 	DECLARE_BITMAP(apm, AP_DEVICES);
689 	DECLARE_BITMAP(aqm, AP_DOMAINS);
690 
691 	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
692 		if (matrix_mdev == lstdev)
693 			continue;
694 
695 		memset(apm, 0, sizeof(apm));
696 		memset(aqm, 0, sizeof(aqm));
697 
698 		/*
699 		 * We work on full longs, as we can only exclude the leftover
700 		 * bits in non-inverse order. The leftover is all zeros.
701 		 */
702 		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
703 				lstdev->matrix.apm, AP_DEVICES))
704 			continue;
705 
706 		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
707 				lstdev->matrix.aqm, AP_DOMAINS))
708 			continue;
709 
710 		return -EADDRINUSE;
711 	}
712 
713 	return 0;
714 }
715 
716 /**
717  * assign_adapter_store - parses the APID from @buf and sets the
718  * corresponding bit in the mediated matrix device's APM
719  *
720  * @dev:	the matrix device
721  * @attr:	the mediated matrix device's assign_adapter attribute
722  * @buf:	a buffer containing the AP adapter number (APID) to
723  *		be assigned
724  * @count:	the number of bytes in @buf
725  *
726  * Return: the number of bytes processed if the APID is valid; otherwise,
727  * returns one of the following errors:
728  *
729  *	1. -EINVAL
730  *	   The APID is not a valid number
731  *
732  *	2. -ENODEV
733  *	   The APID exceeds the maximum value configured for the system
734  *
735  *	3. -EADDRNOTAVAIL
736  *	   An APQN derived from the cross product of the APID being assigned
737  *	   and the APQIs previously assigned is not bound to the vfio_ap device
738  *	   driver; or, if no APQIs have yet been assigned, the APID is not
739  *	   contained in an APQN bound to the vfio_ap device driver.
740  *
741  *	4. -EADDRINUSE
742  *	   An APQN derived from the cross product of the APID being assigned
743  *	   and the APQIs previously assigned is being used by another mediated
744  *	   matrix device
745  */
746 static ssize_t assign_adapter_store(struct device *dev,
747 				    struct device_attribute *attr,
748 				    const char *buf, size_t count)
749 {
750 	int ret;
751 	unsigned long apid;
752 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
753 
754 	mutex_lock(&matrix_dev->lock);
755 
756 	/* If the KVM guest is running, disallow assignment of adapter */
757 	if (matrix_mdev->kvm) {
758 		ret = -EBUSY;
759 		goto done;
760 	}
761 
762 	ret = kstrtoul(buf, 0, &apid);
763 	if (ret)
764 		goto done;
765 
766 	if (apid > matrix_mdev->matrix.apm_max) {
767 		ret = -ENODEV;
768 		goto done;
769 	}
770 
771 	/*
772 	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
773 	 * number (APID). The bits in the mask, from most significant to least
774 	 * significant bit, correspond to APIDs 0-255.
775 	 */
776 	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
777 	if (ret)
778 		goto done;
779 
780 	set_bit_inv(apid, matrix_mdev->matrix.apm);
781 
782 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
783 	if (ret)
784 		goto share_err;
785 
786 	ret = count;
787 	goto done;
788 
789 share_err:
790 	clear_bit_inv(apid, matrix_mdev->matrix.apm);
791 done:
792 	mutex_unlock(&matrix_dev->lock);
793 
794 	return ret;
795 }
796 static DEVICE_ATTR_WO(assign_adapter);
797 
798 /**
799  * unassign_adapter_store - parses the APID from @buf and clears the
800  * corresponding bit in the mediated matrix device's APM
801  *
802  * @dev:	the matrix device
803  * @attr:	the mediated matrix device's unassign_adapter attribute
804  * @buf:	a buffer containing the adapter number (APID) to be unassigned
805  * @count:	the number of bytes in @buf
806  *
807  * Return: the number of bytes processed if the APID is valid; otherwise,
808  * returns one of the following errors:
809  *	-EINVAL if the APID is not a number
810  *	-ENODEV if the APID it exceeds the maximum value configured for the
811  *		system
812  */
813 static ssize_t unassign_adapter_store(struct device *dev,
814 				      struct device_attribute *attr,
815 				      const char *buf, size_t count)
816 {
817 	int ret;
818 	unsigned long apid;
819 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
820 
821 	mutex_lock(&matrix_dev->lock);
822 
823 	/* If the KVM guest is running, disallow unassignment of adapter */
824 	if (matrix_mdev->kvm) {
825 		ret = -EBUSY;
826 		goto done;
827 	}
828 
829 	ret = kstrtoul(buf, 0, &apid);
830 	if (ret)
831 		goto done;
832 
833 	if (apid > matrix_mdev->matrix.apm_max) {
834 		ret = -ENODEV;
835 		goto done;
836 	}
837 
838 	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
839 	ret = count;
840 done:
841 	mutex_unlock(&matrix_dev->lock);
842 	return ret;
843 }
844 static DEVICE_ATTR_WO(unassign_adapter);
845 
846 static int
847 vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
848 					     unsigned long apqi)
849 {
850 	int ret;
851 	unsigned long apid;
852 	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
853 
854 	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
855 		return vfio_ap_verify_queue_reserved(NULL, &apqi);
856 
857 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
858 		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
859 		if (ret)
860 			return ret;
861 	}
862 
863 	return 0;
864 }
865 
866 /**
867  * assign_domain_store - parses the APQI from @buf and sets the
868  * corresponding bit in the mediated matrix device's AQM
869  *
870  * @dev:	the matrix device
871  * @attr:	the mediated matrix device's assign_domain attribute
872  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
873  *		be assigned
874  * @count:	the number of bytes in @buf
875  *
876  * Return: the number of bytes processed if the APQI is valid; otherwise returns
877  * one of the following errors:
878  *
879  *	1. -EINVAL
880  *	   The APQI is not a valid number
881  *
882  *	2. -ENODEV
883  *	   The APQI exceeds the maximum value configured for the system
884  *
885  *	3. -EADDRNOTAVAIL
886  *	   An APQN derived from the cross product of the APQI being assigned
887  *	   and the APIDs previously assigned is not bound to the vfio_ap device
888  *	   driver; or, if no APIDs have yet been assigned, the APQI is not
889  *	   contained in an APQN bound to the vfio_ap device driver.
890  *
891  *	4. -EADDRINUSE
892  *	   An APQN derived from the cross product of the APQI being assigned
893  *	   and the APIDs previously assigned is being used by another mediated
894  *	   matrix device
895  */
896 static ssize_t assign_domain_store(struct device *dev,
897 				   struct device_attribute *attr,
898 				   const char *buf, size_t count)
899 {
900 	int ret;
901 	unsigned long apqi;
902 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
903 	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
904 
905 	mutex_lock(&matrix_dev->lock);
906 
907 	/* If the KVM guest is running, disallow assignment of domain */
908 	if (matrix_mdev->kvm) {
909 		ret = -EBUSY;
910 		goto done;
911 	}
912 
913 	ret = kstrtoul(buf, 0, &apqi);
914 	if (ret)
915 		goto done;
916 	if (apqi > max_apqi) {
917 		ret = -ENODEV;
918 		goto done;
919 	}
920 
921 	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
922 	if (ret)
923 		goto done;
924 
925 	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
926 
927 	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
928 	if (ret)
929 		goto share_err;
930 
931 	ret = count;
932 	goto done;
933 
934 share_err:
935 	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
936 done:
937 	mutex_unlock(&matrix_dev->lock);
938 
939 	return ret;
940 }
941 static DEVICE_ATTR_WO(assign_domain);
942 
943 
944 /**
945  * unassign_domain_store - parses the APQI from @buf and clears the
946  * corresponding bit in the mediated matrix device's AQM
947  *
948  * @dev:	the matrix device
949  * @attr:	the mediated matrix device's unassign_domain attribute
950  * @buf:	a buffer containing the AP queue index (APQI) of the domain to
951  *		be unassigned
952  * @count:	the number of bytes in @buf
953  *
954  * Return: the number of bytes processed if the APQI is valid; otherwise,
955  * returns one of the following errors:
956  *	-EINVAL if the APQI is not a number
957  *	-ENODEV if the APQI exceeds the maximum value configured for the system
958  */
959 static ssize_t unassign_domain_store(struct device *dev,
960 				     struct device_attribute *attr,
961 				     const char *buf, size_t count)
962 {
963 	int ret;
964 	unsigned long apqi;
965 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
966 
967 	mutex_lock(&matrix_dev->lock);
968 
969 	/* If the KVM guest is running, disallow unassignment of domain */
970 	if (matrix_mdev->kvm) {
971 		ret = -EBUSY;
972 		goto done;
973 	}
974 
975 	ret = kstrtoul(buf, 0, &apqi);
976 	if (ret)
977 		goto done;
978 
979 	if (apqi > matrix_mdev->matrix.aqm_max) {
980 		ret = -ENODEV;
981 		goto done;
982 	}
983 
984 	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
985 	ret = count;
986 
987 done:
988 	mutex_unlock(&matrix_dev->lock);
989 	return ret;
990 }
991 static DEVICE_ATTR_WO(unassign_domain);
992 
993 /**
994  * assign_control_domain_store - parses the domain ID from @buf and sets
995  * the corresponding bit in the mediated matrix device's ADM
996  *
997  * @dev:	the matrix device
998  * @attr:	the mediated matrix device's assign_control_domain attribute
999  * @buf:	a buffer containing the domain ID to be assigned
1000  * @count:	the number of bytes in @buf
1001  *
1002  * Return: the number of bytes processed if the domain ID is valid; otherwise,
1003  * returns one of the following errors:
1004  *	-EINVAL if the ID is not a number
1005  *	-ENODEV if the ID exceeds the maximum value configured for the system
1006  */
1007 static ssize_t assign_control_domain_store(struct device *dev,
1008 					   struct device_attribute *attr,
1009 					   const char *buf, size_t count)
1010 {
1011 	int ret;
1012 	unsigned long id;
1013 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1014 
1015 	mutex_lock(&matrix_dev->lock);
1016 
1017 	/* If the KVM guest is running, disallow assignment of control domain */
1018 	if (matrix_mdev->kvm) {
1019 		ret = -EBUSY;
1020 		goto done;
1021 	}
1022 
1023 	ret = kstrtoul(buf, 0, &id);
1024 	if (ret)
1025 		goto done;
1026 
1027 	if (id > matrix_mdev->matrix.adm_max) {
1028 		ret = -ENODEV;
1029 		goto done;
1030 	}
1031 
1032 	/* Set the bit in the ADM (bitmask) corresponding to the AP control
1033 	 * domain number (id). The bits in the mask, from most significant to
1034 	 * least significant, correspond to IDs 0 up to the one less than the
1035 	 * number of control domains that can be assigned.
1036 	 */
1037 	set_bit_inv(id, matrix_mdev->matrix.adm);
1038 	ret = count;
1039 done:
1040 	mutex_unlock(&matrix_dev->lock);
1041 	return ret;
1042 }
1043 static DEVICE_ATTR_WO(assign_control_domain);
1044 
1045 /**
1046  * unassign_control_domain_store - parses the domain ID from @buf and
1047  * clears the corresponding bit in the mediated matrix device's ADM
1048  *
1049  * @dev:	the matrix device
1050  * @attr:	the mediated matrix device's unassign_control_domain attribute
1051  * @buf:	a buffer containing the domain ID to be unassigned
1052  * @count:	the number of bytes in @buf
1053  *
1054  * Return: the number of bytes processed if the domain ID is valid; otherwise,
1055  * returns one of the following errors:
1056  *	-EINVAL if the ID is not a number
1057  *	-ENODEV if the ID exceeds the maximum value configured for the system
1058  */
1059 static ssize_t unassign_control_domain_store(struct device *dev,
1060 					     struct device_attribute *attr,
1061 					     const char *buf, size_t count)
1062 {
1063 	int ret;
1064 	unsigned long domid;
1065 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1066 	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
1067 
1068 	mutex_lock(&matrix_dev->lock);
1069 
1070 	/* If a KVM guest is running, disallow unassignment of control domain */
1071 	if (matrix_mdev->kvm) {
1072 		ret = -EBUSY;
1073 		goto done;
1074 	}
1075 
1076 	ret = kstrtoul(buf, 0, &domid);
1077 	if (ret)
1078 		goto done;
1079 	if (domid > max_domid) {
1080 		ret = -ENODEV;
1081 		goto done;
1082 	}
1083 
1084 	clear_bit_inv(domid, matrix_mdev->matrix.adm);
1085 	ret = count;
1086 done:
1087 	mutex_unlock(&matrix_dev->lock);
1088 	return ret;
1089 }
1090 static DEVICE_ATTR_WO(unassign_control_domain);
1091 
1092 static ssize_t control_domains_show(struct device *dev,
1093 				    struct device_attribute *dev_attr,
1094 				    char *buf)
1095 {
1096 	unsigned long id;
1097 	int nchars = 0;
1098 	int n;
1099 	char *bufpos = buf;
1100 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1101 	unsigned long max_domid = matrix_mdev->matrix.adm_max;
1102 
1103 	mutex_lock(&matrix_dev->lock);
1104 	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
1105 		n = sprintf(bufpos, "%04lx\n", id);
1106 		bufpos += n;
1107 		nchars += n;
1108 	}
1109 	mutex_unlock(&matrix_dev->lock);
1110 
1111 	return nchars;
1112 }
1113 static DEVICE_ATTR_RO(control_domains);
1114 
1115 static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
1116 			   char *buf)
1117 {
1118 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1119 	char *bufpos = buf;
1120 	unsigned long apid;
1121 	unsigned long apqi;
1122 	unsigned long apid1;
1123 	unsigned long apqi1;
1124 	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
1125 	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
1126 	int nchars = 0;
1127 	int n;
1128 
1129 	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1130 	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1131 
1132 	mutex_lock(&matrix_dev->lock);
1133 
1134 	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1135 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1136 			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1137 					     naqm_bits) {
1138 				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1139 					    apqi);
1140 				bufpos += n;
1141 				nchars += n;
1142 			}
1143 		}
1144 	} else if (apid1 < napm_bits) {
1145 		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1146 			n = sprintf(bufpos, "%02lx.\n", apid);
1147 			bufpos += n;
1148 			nchars += n;
1149 		}
1150 	} else if (apqi1 < naqm_bits) {
1151 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1152 			n = sprintf(bufpos, ".%04lx\n", apqi);
1153 			bufpos += n;
1154 			nchars += n;
1155 		}
1156 	}
1157 
1158 	mutex_unlock(&matrix_dev->lock);
1159 
1160 	return nchars;
1161 }
1162 static DEVICE_ATTR_RO(matrix);
1163 
1164 static struct attribute *vfio_ap_mdev_attrs[] = {
1165 	&dev_attr_assign_adapter.attr,
1166 	&dev_attr_unassign_adapter.attr,
1167 	&dev_attr_assign_domain.attr,
1168 	&dev_attr_unassign_domain.attr,
1169 	&dev_attr_assign_control_domain.attr,
1170 	&dev_attr_unassign_control_domain.attr,
1171 	&dev_attr_control_domains.attr,
1172 	&dev_attr_matrix.attr,
1173 	NULL,
1174 };
1175 
1176 static struct attribute_group vfio_ap_mdev_attr_group = {
1177 	.attrs = vfio_ap_mdev_attrs
1178 };
1179 
1180 static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1181 	&vfio_ap_mdev_attr_group,
1182 	NULL
1183 };
1184 
1185 /**
1186  * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
1187  * to manage AP resources for the guest whose state is represented by @kvm
1188  *
1189  * @matrix_mdev: a mediated matrix device
1190  * @kvm: reference to KVM instance
1191  *
1192  * Return: 0 if no other mediated matrix device has a reference to @kvm;
1193  * otherwise, returns an -EPERM.
1194  */
1195 static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1196 				struct kvm *kvm)
1197 {
1198 	struct ap_matrix_mdev *m;
1199 
1200 	if (kvm->arch.crypto.crycbd) {
1201 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1202 		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1203 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1204 
1205 		mutex_lock(&kvm->lock);
1206 		mutex_lock(&matrix_dev->lock);
1207 
1208 		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1209 			if (m != matrix_mdev && m->kvm == kvm) {
1210 				mutex_unlock(&kvm->lock);
1211 				mutex_unlock(&matrix_dev->lock);
1212 				return -EPERM;
1213 			}
1214 		}
1215 
1216 		kvm_get_kvm(kvm);
1217 		matrix_mdev->kvm = kvm;
1218 		kvm_arch_crypto_set_masks(kvm,
1219 					  matrix_mdev->matrix.apm,
1220 					  matrix_mdev->matrix.aqm,
1221 					  matrix_mdev->matrix.adm);
1222 
1223 		mutex_unlock(&kvm->lock);
1224 		mutex_unlock(&matrix_dev->lock);
1225 	}
1226 
1227 	return 0;
1228 }
1229 
1230 /**
1231  * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
1232  *
1233  * @nb: The notifier block
1234  * @action: Action to be taken
1235  * @data: data associated with the request
1236  *
1237  * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1238  * pinned before). Other requests are ignored.
1239  *
1240  * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
1241  */
1242 static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1243 				       unsigned long action, void *data)
1244 {
1245 	struct ap_matrix_mdev *matrix_mdev;
1246 
1247 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1248 
1249 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1250 		struct vfio_iommu_type1_dma_unmap *unmap = data;
1251 		unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1252 
1253 		vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
1254 		return NOTIFY_OK;
1255 	}
1256 
1257 	return NOTIFY_DONE;
1258 }
1259 
1260 /**
1261  * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
1262  * by @matrix_mdev.
1263  *
1264  * @matrix_mdev: a matrix mediated device
1265  */
1266 static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
1267 {
1268 	struct kvm *kvm = matrix_mdev->kvm;
1269 
1270 	if (kvm && kvm->arch.crypto.crycbd) {
1271 		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1272 		kvm->arch.crypto.pqap_hook = NULL;
1273 		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1274 
1275 		mutex_lock(&kvm->lock);
1276 		mutex_lock(&matrix_dev->lock);
1277 
1278 		kvm_arch_crypto_clear_masks(kvm);
1279 		vfio_ap_mdev_reset_queues(matrix_mdev);
1280 		kvm_put_kvm(kvm);
1281 		matrix_mdev->kvm = NULL;
1282 
1283 		mutex_unlock(&kvm->lock);
1284 		mutex_unlock(&matrix_dev->lock);
1285 	}
1286 }
1287 
1288 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
1289 				       unsigned long action, void *data)
1290 {
1291 	int notify_rc = NOTIFY_OK;
1292 	struct ap_matrix_mdev *matrix_mdev;
1293 
1294 	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
1295 		return NOTIFY_OK;
1296 
1297 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
1298 
1299 	if (!data)
1300 		vfio_ap_mdev_unset_kvm(matrix_mdev);
1301 	else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
1302 		notify_rc = NOTIFY_DONE;
1303 
1304 	return notify_rc;
1305 }
1306 
1307 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1308 {
1309 	struct device *dev;
1310 	struct vfio_ap_queue *q = NULL;
1311 
1312 	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1313 				 &apqn, match_apqn);
1314 	if (dev) {
1315 		q = dev_get_drvdata(dev);
1316 		put_device(dev);
1317 	}
1318 
1319 	return q;
1320 }
1321 
1322 int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1323 			     unsigned int retry)
1324 {
1325 	struct ap_queue_status status;
1326 	int ret;
1327 	int retry2 = 2;
1328 
1329 	if (!q)
1330 		return 0;
1331 
1332 retry_zapq:
1333 	status = ap_zapq(q->apqn);
1334 	switch (status.response_code) {
1335 	case AP_RESPONSE_NORMAL:
1336 		ret = 0;
1337 		break;
1338 	case AP_RESPONSE_RESET_IN_PROGRESS:
1339 		if (retry--) {
1340 			msleep(20);
1341 			goto retry_zapq;
1342 		}
1343 		ret = -EBUSY;
1344 		break;
1345 	case AP_RESPONSE_Q_NOT_AVAIL:
1346 	case AP_RESPONSE_DECONFIGURED:
1347 	case AP_RESPONSE_CHECKSTOPPED:
1348 		WARN_ON_ONCE(status.irq_enabled);
1349 		ret = -EBUSY;
1350 		goto free_resources;
1351 	default:
1352 		/* things are really broken, give up */
1353 		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1354 		     status.response_code);
1355 		return -EIO;
1356 	}
1357 
1358 	/* wait for the reset to take effect */
1359 	while (retry2--) {
1360 		if (status.queue_empty && !status.irq_enabled)
1361 			break;
1362 		msleep(20);
1363 		status = ap_tapq(q->apqn, NULL);
1364 	}
1365 	WARN_ON_ONCE(retry2 <= 0);
1366 
1367 free_resources:
1368 	vfio_ap_free_aqic_resources(q);
1369 
1370 	return ret;
1371 }
1372 
1373 static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
1374 {
1375 	int ret;
1376 	int rc = 0;
1377 	unsigned long apid, apqi;
1378 	struct vfio_ap_queue *q;
1379 
1380 	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1381 			     matrix_mdev->matrix.apm_max + 1) {
1382 		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1383 				     matrix_mdev->matrix.aqm_max + 1) {
1384 			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1385 			ret = vfio_ap_mdev_reset_queue(q, 1);
1386 			/*
1387 			 * Regardless whether a queue turns out to be busy, or
1388 			 * is not operational, we need to continue resetting
1389 			 * the remaining queues.
1390 			 */
1391 			if (ret)
1392 				rc = ret;
1393 		}
1394 	}
1395 
1396 	return rc;
1397 }
1398 
1399 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
1400 {
1401 	struct ap_matrix_mdev *matrix_mdev =
1402 		container_of(vdev, struct ap_matrix_mdev, vdev);
1403 	unsigned long events;
1404 	int ret;
1405 
1406 	matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
1407 	events = VFIO_GROUP_NOTIFY_SET_KVM;
1408 
1409 	ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1410 				     &events, &matrix_mdev->group_notifier);
1411 	if (ret)
1412 		return ret;
1413 
1414 	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1415 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1416 	ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
1417 				     &events, &matrix_mdev->iommu_notifier);
1418 	if (ret)
1419 		goto out_unregister_group;
1420 	return 0;
1421 
1422 out_unregister_group:
1423 	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1424 				 &matrix_mdev->group_notifier);
1425 	return ret;
1426 }
1427 
1428 static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
1429 {
1430 	struct ap_matrix_mdev *matrix_mdev =
1431 		container_of(vdev, struct ap_matrix_mdev, vdev);
1432 
1433 	vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY,
1434 				 &matrix_mdev->iommu_notifier);
1435 	vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY,
1436 				 &matrix_mdev->group_notifier);
1437 	vfio_ap_mdev_unset_kvm(matrix_mdev);
1438 }
1439 
1440 static int vfio_ap_mdev_get_device_info(unsigned long arg)
1441 {
1442 	unsigned long minsz;
1443 	struct vfio_device_info info;
1444 
1445 	minsz = offsetofend(struct vfio_device_info, num_irqs);
1446 
1447 	if (copy_from_user(&info, (void __user *)arg, minsz))
1448 		return -EFAULT;
1449 
1450 	if (info.argsz < minsz)
1451 		return -EINVAL;
1452 
1453 	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1454 	info.num_regions = 0;
1455 	info.num_irqs = 0;
1456 
1457 	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1458 }
1459 
1460 static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
1461 				    unsigned int cmd, unsigned long arg)
1462 {
1463 	struct ap_matrix_mdev *matrix_mdev =
1464 		container_of(vdev, struct ap_matrix_mdev, vdev);
1465 	int ret;
1466 
1467 	mutex_lock(&matrix_dev->lock);
1468 	switch (cmd) {
1469 	case VFIO_DEVICE_GET_INFO:
1470 		ret = vfio_ap_mdev_get_device_info(arg);
1471 		break;
1472 	case VFIO_DEVICE_RESET:
1473 		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
1474 		break;
1475 	default:
1476 		ret = -EOPNOTSUPP;
1477 		break;
1478 	}
1479 	mutex_unlock(&matrix_dev->lock);
1480 
1481 	return ret;
1482 }
1483 
1484 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
1485 	.open_device = vfio_ap_mdev_open_device,
1486 	.close_device = vfio_ap_mdev_close_device,
1487 	.ioctl = vfio_ap_mdev_ioctl,
1488 };
1489 
1490 static struct mdev_driver vfio_ap_matrix_driver = {
1491 	.driver = {
1492 		.name = "vfio_ap_mdev",
1493 		.owner = THIS_MODULE,
1494 		.mod_name = KBUILD_MODNAME,
1495 		.dev_groups = vfio_ap_mdev_attr_groups,
1496 	},
1497 	.probe = vfio_ap_mdev_probe,
1498 	.remove = vfio_ap_mdev_remove,
1499 };
1500 
1501 static const struct mdev_parent_ops vfio_ap_matrix_ops = {
1502 	.owner			= THIS_MODULE,
1503 	.device_driver		= &vfio_ap_matrix_driver,
1504 	.supported_type_groups	= vfio_ap_mdev_type_groups,
1505 };
1506 
1507 int vfio_ap_mdev_register(void)
1508 {
1509 	int ret;
1510 
1511 	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1512 
1513 	ret = mdev_register_driver(&vfio_ap_matrix_driver);
1514 	if (ret)
1515 		return ret;
1516 
1517 	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
1518 	if (ret)
1519 		goto err_driver;
1520 	return 0;
1521 
1522 err_driver:
1523 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1524 	return ret;
1525 }
1526 
1527 void vfio_ap_mdev_unregister(void)
1528 {
1529 	mdev_unregister_device(&matrix_dev->device);
1530 	mdev_unregister_driver(&vfio_ap_matrix_driver);
1531 }
1532