xref: /linux/drivers/iommu/io-pgfault.c (revision 13c6bba601ac2928e330e14e178c7ebfabb19392)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Handle device page faults
4  *
5  * Copyright (C) 2020 ARM Ltd.
6  */
7 
8 #include <linux/iommu.h>
9 #include <linux/list.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/workqueue.h>
13 
14 #include "iommu-priv.h"
15 
16 /*
17  * Return the fault parameter of a device if it exists. Otherwise, return NULL.
18  * On a successful return, the caller takes a reference of this parameter and
19  * should put it after use by calling iopf_put_dev_fault_param().
20  */
iopf_get_dev_fault_param(struct device * dev)21 static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev)
22 {
23 	struct dev_iommu *param = dev->iommu;
24 	struct iommu_fault_param *fault_param;
25 
26 	rcu_read_lock();
27 	fault_param = rcu_dereference(param->fault_param);
28 	if (fault_param && !refcount_inc_not_zero(&fault_param->users))
29 		fault_param = NULL;
30 	rcu_read_unlock();
31 
32 	return fault_param;
33 }
34 
35 /* Caller must hold a reference of the fault parameter. */
iopf_put_dev_fault_param(struct iommu_fault_param * fault_param)36 static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
37 {
38 	if (refcount_dec_and_test(&fault_param->users))
39 		kfree_rcu(fault_param, rcu);
40 }
41 
__iopf_free_group(struct iopf_group * group)42 static void __iopf_free_group(struct iopf_group *group)
43 {
44 	struct iopf_fault *iopf, *next;
45 
46 	list_for_each_entry_safe(iopf, next, &group->faults, list) {
47 		if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
48 			kfree(iopf);
49 	}
50 
51 	/* Pair with iommu_report_device_fault(). */
52 	iopf_put_dev_fault_param(group->fault_param);
53 }
54 
iopf_free_group(struct iopf_group * group)55 void iopf_free_group(struct iopf_group *group)
56 {
57 	__iopf_free_group(group);
58 	kfree(group);
59 }
60 EXPORT_SYMBOL_GPL(iopf_free_group);
61 
62 /* Non-last request of a group. Postpone until the last one. */
report_partial_fault(struct iommu_fault_param * fault_param,struct iommu_fault * fault)63 static int report_partial_fault(struct iommu_fault_param *fault_param,
64 				struct iommu_fault *fault)
65 {
66 	struct iopf_fault *iopf;
67 
68 	iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
69 	if (!iopf)
70 		return -ENOMEM;
71 
72 	iopf->fault = *fault;
73 
74 	mutex_lock(&fault_param->lock);
75 	list_add(&iopf->list, &fault_param->partial);
76 	mutex_unlock(&fault_param->lock);
77 
78 	return 0;
79 }
80 
iopf_group_alloc(struct iommu_fault_param * iopf_param,struct iopf_fault * evt,struct iopf_group * abort_group)81 static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
82 					   struct iopf_fault *evt,
83 					   struct iopf_group *abort_group)
84 {
85 	struct iopf_fault *iopf, *next;
86 	struct iopf_group *group;
87 
88 	group = kzalloc(sizeof(*group), GFP_KERNEL);
89 	if (!group) {
90 		/*
91 		 * We always need to construct the group as we need it to abort
92 		 * the request at the driver if it can't be handled.
93 		 */
94 		group = abort_group;
95 	}
96 
97 	group->fault_param = iopf_param;
98 	group->last_fault.fault = evt->fault;
99 	INIT_LIST_HEAD(&group->faults);
100 	INIT_LIST_HEAD(&group->pending_node);
101 	list_add(&group->last_fault.list, &group->faults);
102 
103 	/* See if we have partial faults for this group */
104 	mutex_lock(&iopf_param->lock);
105 	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
106 		if (iopf->fault.prm.grpid == evt->fault.prm.grpid)
107 			/* Insert *before* the last fault */
108 			list_move(&iopf->list, &group->faults);
109 	}
110 	list_add(&group->pending_node, &iopf_param->faults);
111 	mutex_unlock(&iopf_param->lock);
112 
113 	group->fault_count = list_count_nodes(&group->faults);
114 
115 	return group;
116 }
117 
find_fault_handler(struct device * dev,struct iopf_fault * evt)118 static struct iommu_attach_handle *find_fault_handler(struct device *dev,
119 						     struct iopf_fault *evt)
120 {
121 	struct iommu_fault *fault = &evt->fault;
122 	struct iommu_attach_handle *attach_handle;
123 
124 	if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
125 		attach_handle = iommu_attach_handle_get(dev->iommu_group,
126 				fault->prm.pasid, 0);
127 		if (IS_ERR(attach_handle)) {
128 			const struct iommu_ops *ops = dev_iommu_ops(dev);
129 
130 			if (!ops->user_pasid_table)
131 				return NULL;
132 			/*
133 			 * The iommu driver for this device supports user-
134 			 * managed PASID table. Therefore page faults for
135 			 * any PASID should go through the NESTING domain
136 			 * attached to the device RID.
137 			 */
138 			attach_handle = iommu_attach_handle_get(
139 					dev->iommu_group, IOMMU_NO_PASID,
140 					IOMMU_DOMAIN_NESTED);
141 			if (IS_ERR(attach_handle))
142 				return NULL;
143 		}
144 	} else {
145 		attach_handle = iommu_attach_handle_get(dev->iommu_group,
146 				IOMMU_NO_PASID, 0);
147 
148 		if (IS_ERR(attach_handle))
149 			return NULL;
150 	}
151 
152 	if (!attach_handle->domain->iopf_handler)
153 		return NULL;
154 
155 	return attach_handle;
156 }
157 
iopf_error_response(struct device * dev,struct iopf_fault * evt)158 static void iopf_error_response(struct device *dev, struct iopf_fault *evt)
159 {
160 	const struct iommu_ops *ops = dev_iommu_ops(dev);
161 	struct iommu_fault *fault = &evt->fault;
162 	struct iommu_page_response resp = {
163 		.pasid = fault->prm.pasid,
164 		.grpid = fault->prm.grpid,
165 		.code = IOMMU_PAGE_RESP_INVALID
166 	};
167 
168 	ops->page_response(dev, evt, &resp);
169 }
170 
171 /**
172  * iommu_report_device_fault() - Report fault event to device driver
173  * @dev: the device
174  * @evt: fault event data
175  *
176  * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
177  * handler. If this function fails then ops->page_response() was called to
178  * complete evt if required.
179  *
180  * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
181  * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
182  * expect a response. It may be generated when disabling a PASID (issuing a
183  * PASID stop request) by some PCI devices.
184  *
185  * The PASID stop request is issued by the device driver before unbind(). Once
186  * it completes, no page request is generated for this PASID anymore and
187  * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
188  * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
189  * for all outstanding page requests to come back with a response before
190  * completing the PASID stop request. Others do not wait for page responses, and
191  * instead issue this Stop Marker that tells us when the PASID can be
192  * reallocated.
193  *
194  * It is safe to discard the Stop Marker because it is an optimization.
195  * a. Page requests, which are posted requests, have been flushed to the IOMMU
196  *    when the stop request completes.
197  * b. The IOMMU driver flushes all fault queues on unbind() before freeing the
198  *    PASID.
199  *
200  * So even though the Stop Marker might be issued by the device *after* the stop
201  * request completes, outstanding faults will have been dealt with by the time
202  * the PASID is freed.
203  *
204  * Any valid page fault will be eventually routed to an iommu domain and the
205  * page fault handler installed there will get called. The users of this
206  * handling framework should guarantee that the iommu domain could only be
207  * freed after the device has stopped generating page faults (or the iommu
208  * hardware has been set to block the page faults) and the pending page faults
209  * have been flushed. In case no page fault handler is attached or no iopf params
210  * are setup, then the ops->page_response() is called to complete the evt.
211  *
212  * Returns 0 on success, or an error in case of a bad/failed iopf setup.
213  */
iommu_report_device_fault(struct device * dev,struct iopf_fault * evt)214 int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
215 {
216 	struct iommu_attach_handle *attach_handle;
217 	struct iommu_fault *fault = &evt->fault;
218 	struct iommu_fault_param *iopf_param;
219 	struct iopf_group abort_group = {};
220 	struct iopf_group *group;
221 
222 	attach_handle = find_fault_handler(dev, evt);
223 	if (!attach_handle)
224 		goto err_bad_iopf;
225 
226 	/*
227 	 * Something has gone wrong if a fault capable domain is attached but no
228 	 * iopf_param is setup
229 	 */
230 	iopf_param = iopf_get_dev_fault_param(dev);
231 	if (WARN_ON(!iopf_param))
232 		goto err_bad_iopf;
233 
234 	if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
235 		int ret;
236 
237 		ret = report_partial_fault(iopf_param, fault);
238 		iopf_put_dev_fault_param(iopf_param);
239 		/* A request that is not the last does not need to be ack'd */
240 
241 		return ret;
242 	}
243 
244 	/*
245 	 * This is the last page fault of a group. Allocate an iopf group and
246 	 * pass it to domain's page fault handler. The group holds a reference
247 	 * count of the fault parameter. It will be released after response or
248 	 * error path of this function. If an error is returned, the caller
249 	 * will send a response to the hardware. We need to clean up before
250 	 * leaving, otherwise partial faults will be stuck.
251 	 */
252 	group = iopf_group_alloc(iopf_param, evt, &abort_group);
253 	if (group == &abort_group)
254 		goto err_abort;
255 
256 	group->attach_handle = attach_handle;
257 
258 	/*
259 	 * On success iopf_handler must call iopf_group_response() and
260 	 * iopf_free_group()
261 	 */
262 	if (group->attach_handle->domain->iopf_handler(group))
263 		goto err_abort;
264 
265 	return 0;
266 
267 err_abort:
268 	dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
269 			     fault->prm.pasid);
270 	iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
271 	if (group == &abort_group)
272 		__iopf_free_group(group);
273 	else
274 		iopf_free_group(group);
275 
276 	return 0;
277 
278 err_bad_iopf:
279 	if (fault->type == IOMMU_FAULT_PAGE_REQ)
280 		iopf_error_response(dev, evt);
281 
282 	return -EINVAL;
283 }
284 EXPORT_SYMBOL_GPL(iommu_report_device_fault);
285 
286 /**
287  * iopf_queue_flush_dev - Ensure that all queued faults have been processed
288  * @dev: the endpoint whose faults need to be flushed.
289  *
290  * The IOMMU driver calls this before releasing a PASID, to ensure that all
291  * pending faults for this PASID have been handled, and won't hit the address
292  * space of the next process that uses this PASID. The driver must make sure
293  * that no new fault is added to the queue. In particular it must flush its
294  * low-level queue before calling this function.
295  *
296  * Return: 0 on success and <0 on error.
297  */
iopf_queue_flush_dev(struct device * dev)298 int iopf_queue_flush_dev(struct device *dev)
299 {
300 	struct iommu_fault_param *iopf_param;
301 
302 	/*
303 	 * It's a driver bug to be here after iopf_queue_remove_device().
304 	 * Therefore, it's safe to dereference the fault parameter without
305 	 * holding the lock.
306 	 */
307 	iopf_param = rcu_dereference_check(dev->iommu->fault_param, true);
308 	if (WARN_ON(!iopf_param))
309 		return -ENODEV;
310 
311 	flush_workqueue(iopf_param->queue->wq);
312 
313 	return 0;
314 }
315 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
316 
317 /**
318  * iopf_group_response - Respond a group of page faults
319  * @group: the group of faults with the same group id
320  * @status: the response code
321  */
iopf_group_response(struct iopf_group * group,enum iommu_page_response_code status)322 void iopf_group_response(struct iopf_group *group,
323 			 enum iommu_page_response_code status)
324 {
325 	struct iommu_fault_param *fault_param = group->fault_param;
326 	struct iopf_fault *iopf = &group->last_fault;
327 	struct device *dev = group->fault_param->dev;
328 	const struct iommu_ops *ops = dev_iommu_ops(dev);
329 	struct iommu_page_response resp = {
330 		.pasid = iopf->fault.prm.pasid,
331 		.grpid = iopf->fault.prm.grpid,
332 		.code = status,
333 	};
334 
335 	/* Only send response if there is a fault report pending */
336 	mutex_lock(&fault_param->lock);
337 	if (!list_empty(&group->pending_node)) {
338 		ops->page_response(dev, &group->last_fault, &resp);
339 		list_del_init(&group->pending_node);
340 	}
341 	mutex_unlock(&fault_param->lock);
342 }
343 EXPORT_SYMBOL_GPL(iopf_group_response);
344 
345 /**
346  * iopf_queue_discard_partial - Remove all pending partial fault
347  * @queue: the queue whose partial faults need to be discarded
348  *
349  * When the hardware queue overflows, last page faults in a group may have been
350  * lost and the IOMMU driver calls this to discard all partial faults. The
351  * driver shouldn't be adding new faults to this queue concurrently.
352  *
353  * Return: 0 on success and <0 on error.
354  */
iopf_queue_discard_partial(struct iopf_queue * queue)355 int iopf_queue_discard_partial(struct iopf_queue *queue)
356 {
357 	struct iopf_fault *iopf, *next;
358 	struct iommu_fault_param *iopf_param;
359 
360 	if (!queue)
361 		return -EINVAL;
362 
363 	mutex_lock(&queue->lock);
364 	list_for_each_entry(iopf_param, &queue->devices, queue_list) {
365 		mutex_lock(&iopf_param->lock);
366 		list_for_each_entry_safe(iopf, next, &iopf_param->partial,
367 					 list) {
368 			list_del(&iopf->list);
369 			kfree(iopf);
370 		}
371 		mutex_unlock(&iopf_param->lock);
372 	}
373 	mutex_unlock(&queue->lock);
374 	return 0;
375 }
376 EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
377 
378 /**
379  * iopf_queue_add_device - Add producer to the fault queue
380  * @queue: IOPF queue
381  * @dev: device to add
382  *
383  * Return: 0 on success and <0 on error.
384  */
iopf_queue_add_device(struct iopf_queue * queue,struct device * dev)385 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
386 {
387 	int ret = 0;
388 	struct dev_iommu *param = dev->iommu;
389 	struct iommu_fault_param *fault_param;
390 	const struct iommu_ops *ops = dev_iommu_ops(dev);
391 
392 	if (!ops->page_response)
393 		return -ENODEV;
394 
395 	mutex_lock(&queue->lock);
396 	mutex_lock(&param->lock);
397 	if (rcu_dereference_check(param->fault_param,
398 				  lockdep_is_held(&param->lock))) {
399 		ret = -EBUSY;
400 		goto done_unlock;
401 	}
402 
403 	fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL);
404 	if (!fault_param) {
405 		ret = -ENOMEM;
406 		goto done_unlock;
407 	}
408 
409 	mutex_init(&fault_param->lock);
410 	INIT_LIST_HEAD(&fault_param->faults);
411 	INIT_LIST_HEAD(&fault_param->partial);
412 	fault_param->dev = dev;
413 	refcount_set(&fault_param->users, 1);
414 	list_add(&fault_param->queue_list, &queue->devices);
415 	fault_param->queue = queue;
416 
417 	rcu_assign_pointer(param->fault_param, fault_param);
418 
419 done_unlock:
420 	mutex_unlock(&param->lock);
421 	mutex_unlock(&queue->lock);
422 
423 	return ret;
424 }
425 EXPORT_SYMBOL_GPL(iopf_queue_add_device);
426 
427 /**
428  * iopf_queue_remove_device - Remove producer from fault queue
429  * @queue: IOPF queue
430  * @dev: device to remove
431  *
432  * Removing a device from an iopf_queue. It's recommended to follow these
433  * steps when removing a device:
434  *
435  * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware
436  *   and flush any hardware page request queues. This should be done before
437  *   calling into this helper.
438  * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding
439  *   page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should
440  *   not retry. This helper function handles this.
441  * - Disable PRI on the device: After calling this helper, the caller could
442  *   then disable PRI on the device.
443  *
444  * Calling iopf_queue_remove_device() essentially disassociates the device.
445  * The fault_param might still exist, but iommu_page_response() will do
446  * nothing. The device fault parameter reference count has been properly
447  * passed from iommu_report_device_fault() to the fault handling work, and
448  * will eventually be released after iommu_page_response().
449  */
iopf_queue_remove_device(struct iopf_queue * queue,struct device * dev)450 void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
451 {
452 	struct iopf_fault *partial_iopf;
453 	struct iopf_fault *next;
454 	struct iopf_group *group, *temp;
455 	struct dev_iommu *param = dev->iommu;
456 	struct iommu_fault_param *fault_param;
457 	const struct iommu_ops *ops = dev_iommu_ops(dev);
458 
459 	mutex_lock(&queue->lock);
460 	mutex_lock(&param->lock);
461 	fault_param = rcu_dereference_check(param->fault_param,
462 					    lockdep_is_held(&param->lock));
463 
464 	if (WARN_ON(!fault_param || fault_param->queue != queue))
465 		goto unlock;
466 
467 	mutex_lock(&fault_param->lock);
468 	list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list)
469 		kfree(partial_iopf);
470 
471 	list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) {
472 		struct iopf_fault *iopf = &group->last_fault;
473 		struct iommu_page_response resp = {
474 			.pasid = iopf->fault.prm.pasid,
475 			.grpid = iopf->fault.prm.grpid,
476 			.code = IOMMU_PAGE_RESP_INVALID
477 		};
478 
479 		ops->page_response(dev, iopf, &resp);
480 		list_del_init(&group->pending_node);
481 	}
482 	mutex_unlock(&fault_param->lock);
483 
484 	list_del(&fault_param->queue_list);
485 
486 	/* dec the ref owned by iopf_queue_add_device() */
487 	rcu_assign_pointer(param->fault_param, NULL);
488 	iopf_put_dev_fault_param(fault_param);
489 unlock:
490 	mutex_unlock(&param->lock);
491 	mutex_unlock(&queue->lock);
492 }
493 EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
494 
495 /**
496  * iopf_queue_alloc - Allocate and initialize a fault queue
497  * @name: a unique string identifying the queue (for workqueue)
498  *
499  * Return: the queue on success and NULL on error.
500  */
iopf_queue_alloc(const char * name)501 struct iopf_queue *iopf_queue_alloc(const char *name)
502 {
503 	struct iopf_queue *queue;
504 
505 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
506 	if (!queue)
507 		return NULL;
508 
509 	/*
510 	 * The WQ is unordered because the low-level handler enqueues faults by
511 	 * group. PRI requests within a group have to be ordered, but once
512 	 * that's dealt with, the high-level function can handle groups out of
513 	 * order.
514 	 */
515 	queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name);
516 	if (!queue->wq) {
517 		kfree(queue);
518 		return NULL;
519 	}
520 
521 	INIT_LIST_HEAD(&queue->devices);
522 	mutex_init(&queue->lock);
523 
524 	return queue;
525 }
526 EXPORT_SYMBOL_GPL(iopf_queue_alloc);
527 
528 /**
529  * iopf_queue_free - Free IOPF queue
530  * @queue: queue to free
531  *
532  * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or
533  * adding/removing devices on this queue anymore.
534  */
iopf_queue_free(struct iopf_queue * queue)535 void iopf_queue_free(struct iopf_queue *queue)
536 {
537 	struct iommu_fault_param *iopf_param, *next;
538 
539 	if (!queue)
540 		return;
541 
542 	list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list)
543 		iopf_queue_remove_device(queue, iopf_param->dev);
544 
545 	destroy_workqueue(queue->wq);
546 	kfree(queue);
547 }
548 EXPORT_SYMBOL_GPL(iopf_queue_free);
549