xref: /linux/drivers/iommu/iommufd/fault.c (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2024 Intel Corporation
3  */
4 #define pr_fmt(fmt) "iommufd: " fmt
5 
6 #include <linux/anon_inodes.h>
7 #include <linux/file.h>
8 #include <linux/fs.h>
9 #include <linux/iommufd.h>
10 #include <linux/module.h>
11 #include <linux/mutex.h>
12 #include <linux/pci.h>
13 #include <linux/pci-ats.h>
14 #include <linux/poll.h>
15 #include <uapi/linux/iommufd.h>
16 
17 #include "../iommu-priv.h"
18 #include "iommufd_private.h"
19 
iommufd_fault_iopf_enable(struct iommufd_device * idev)20 static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
21 {
22 	struct device *dev = idev->dev;
23 	int ret;
24 
25 	/*
26 	 * Once we turn on PCI/PRI support for VF, the response failure code
27 	 * should not be forwarded to the hardware due to PRI being a shared
28 	 * resource between PF and VFs. There is no coordination for this
29 	 * shared capability. This waits for a vPRI reset to recover.
30 	 */
31 	if (dev_is_pci(dev)) {
32 		struct pci_dev *pdev = to_pci_dev(dev);
33 
34 		if (pdev->is_virtfn && pci_pri_supported(pdev))
35 			return -EINVAL;
36 	}
37 
38 	mutex_lock(&idev->iopf_lock);
39 	/* Device iopf has already been on. */
40 	if (++idev->iopf_enabled > 1) {
41 		mutex_unlock(&idev->iopf_lock);
42 		return 0;
43 	}
44 
45 	ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
46 	if (ret)
47 		--idev->iopf_enabled;
48 	mutex_unlock(&idev->iopf_lock);
49 
50 	return ret;
51 }
52 
iommufd_fault_iopf_disable(struct iommufd_device * idev)53 static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
54 {
55 	mutex_lock(&idev->iopf_lock);
56 	if (!WARN_ON(idev->iopf_enabled == 0)) {
57 		if (--idev->iopf_enabled == 0)
58 			iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
59 	}
60 	mutex_unlock(&idev->iopf_lock);
61 }
62 
__fault_domain_attach_dev(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)63 static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
64 				     struct iommufd_device *idev)
65 {
66 	struct iommufd_attach_handle *handle;
67 	int ret;
68 
69 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
70 	if (!handle)
71 		return -ENOMEM;
72 
73 	handle->idev = idev;
74 	ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
75 					&handle->handle);
76 	if (ret)
77 		kfree(handle);
78 
79 	return ret;
80 }
81 
iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)82 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
83 				    struct iommufd_device *idev)
84 {
85 	int ret;
86 
87 	if (!hwpt->fault)
88 		return -EINVAL;
89 
90 	ret = iommufd_fault_iopf_enable(idev);
91 	if (ret)
92 		return ret;
93 
94 	ret = __fault_domain_attach_dev(hwpt, idev);
95 	if (ret)
96 		iommufd_fault_iopf_disable(idev);
97 
98 	return ret;
99 }
100 
iommufd_auto_response_faults(struct iommufd_hw_pagetable * hwpt,struct iommufd_attach_handle * handle)101 static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
102 					 struct iommufd_attach_handle *handle)
103 {
104 	struct iommufd_fault *fault = hwpt->fault;
105 	struct iopf_group *group, *next;
106 	struct list_head free_list;
107 	unsigned long index;
108 
109 	if (!fault)
110 		return;
111 	INIT_LIST_HEAD(&free_list);
112 
113 	mutex_lock(&fault->mutex);
114 	spin_lock(&fault->lock);
115 	list_for_each_entry_safe(group, next, &fault->deliver, node) {
116 		if (group->attach_handle != &handle->handle)
117 			continue;
118 		list_move(&group->node, &free_list);
119 	}
120 	spin_unlock(&fault->lock);
121 
122 	list_for_each_entry_safe(group, next, &free_list, node) {
123 		list_del(&group->node);
124 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
125 		iopf_free_group(group);
126 	}
127 
128 	xa_for_each(&fault->response, index, group) {
129 		if (group->attach_handle != &handle->handle)
130 			continue;
131 		xa_erase(&fault->response, index);
132 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
133 		iopf_free_group(group);
134 	}
135 	mutex_unlock(&fault->mutex);
136 }
137 
138 static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device * idev)139 iommufd_device_get_attach_handle(struct iommufd_device *idev)
140 {
141 	struct iommu_attach_handle *handle;
142 
143 	handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
144 	if (IS_ERR(handle))
145 		return NULL;
146 
147 	return to_iommufd_handle(handle);
148 }
149 
iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)150 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
151 				     struct iommufd_device *idev)
152 {
153 	struct iommufd_attach_handle *handle;
154 
155 	handle = iommufd_device_get_attach_handle(idev);
156 	iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
157 	iommufd_auto_response_faults(hwpt, handle);
158 	iommufd_fault_iopf_disable(idev);
159 	kfree(handle);
160 }
161 
__fault_domain_replace_dev(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)162 static int __fault_domain_replace_dev(struct iommufd_device *idev,
163 				      struct iommufd_hw_pagetable *hwpt,
164 				      struct iommufd_hw_pagetable *old)
165 {
166 	struct iommufd_attach_handle *handle, *curr = NULL;
167 	int ret;
168 
169 	if (old->fault)
170 		curr = iommufd_device_get_attach_handle(idev);
171 
172 	if (hwpt->fault) {
173 		handle = kzalloc(sizeof(*handle), GFP_KERNEL);
174 		if (!handle)
175 			return -ENOMEM;
176 
177 		handle->idev = idev;
178 		ret = iommu_replace_group_handle(idev->igroup->group,
179 						 hwpt->domain, &handle->handle);
180 	} else {
181 		ret = iommu_replace_group_handle(idev->igroup->group,
182 						 hwpt->domain, NULL);
183 	}
184 
185 	if (!ret && curr) {
186 		iommufd_auto_response_faults(old, curr);
187 		kfree(curr);
188 	}
189 
190 	return ret;
191 }
192 
iommufd_fault_domain_replace_dev(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)193 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
194 				     struct iommufd_hw_pagetable *hwpt,
195 				     struct iommufd_hw_pagetable *old)
196 {
197 	bool iopf_off = !hwpt->fault && old->fault;
198 	bool iopf_on = hwpt->fault && !old->fault;
199 	int ret;
200 
201 	if (iopf_on) {
202 		ret = iommufd_fault_iopf_enable(idev);
203 		if (ret)
204 			return ret;
205 	}
206 
207 	ret = __fault_domain_replace_dev(idev, hwpt, old);
208 	if (ret) {
209 		if (iopf_on)
210 			iommufd_fault_iopf_disable(idev);
211 		return ret;
212 	}
213 
214 	if (iopf_off)
215 		iommufd_fault_iopf_disable(idev);
216 
217 	return 0;
218 }
219 
iommufd_fault_destroy(struct iommufd_object * obj)220 void iommufd_fault_destroy(struct iommufd_object *obj)
221 {
222 	struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
223 	struct iopf_group *group, *next;
224 	unsigned long index;
225 
226 	/*
227 	 * The iommufd object's reference count is zero at this point.
228 	 * We can be confident that no other threads are currently
229 	 * accessing this pointer. Therefore, acquiring the mutex here
230 	 * is unnecessary.
231 	 */
232 	list_for_each_entry_safe(group, next, &fault->deliver, node) {
233 		list_del(&group->node);
234 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
235 		iopf_free_group(group);
236 	}
237 	xa_for_each(&fault->response, index, group) {
238 		xa_erase(&fault->response, index);
239 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
240 		iopf_free_group(group);
241 	}
242 	xa_destroy(&fault->response);
243 	mutex_destroy(&fault->mutex);
244 }
245 
iommufd_compose_fault_message(struct iommu_fault * fault,struct iommu_hwpt_pgfault * hwpt_fault,struct iommufd_device * idev,u32 cookie)246 static void iommufd_compose_fault_message(struct iommu_fault *fault,
247 					  struct iommu_hwpt_pgfault *hwpt_fault,
248 					  struct iommufd_device *idev,
249 					  u32 cookie)
250 {
251 	hwpt_fault->flags = fault->prm.flags;
252 	hwpt_fault->dev_id = idev->obj.id;
253 	hwpt_fault->pasid = fault->prm.pasid;
254 	hwpt_fault->grpid = fault->prm.grpid;
255 	hwpt_fault->perm = fault->prm.perm;
256 	hwpt_fault->addr = fault->prm.addr;
257 	hwpt_fault->length = 0;
258 	hwpt_fault->cookie = cookie;
259 }
260 
iommufd_fault_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)261 static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
262 				       size_t count, loff_t *ppos)
263 {
264 	size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
265 	struct iommufd_fault *fault = filep->private_data;
266 	struct iommu_hwpt_pgfault data = {};
267 	struct iommufd_device *idev;
268 	struct iopf_group *group;
269 	struct iopf_fault *iopf;
270 	size_t done = 0;
271 	int rc = 0;
272 
273 	if (*ppos || count % fault_size)
274 		return -ESPIPE;
275 
276 	mutex_lock(&fault->mutex);
277 	while ((group = iommufd_fault_deliver_fetch(fault))) {
278 		if (done >= count ||
279 		    group->fault_count * fault_size > count - done) {
280 			iommufd_fault_deliver_restore(fault, group);
281 			break;
282 		}
283 
284 		rc = xa_alloc(&fault->response, &group->cookie, group,
285 			      xa_limit_32b, GFP_KERNEL);
286 		if (rc) {
287 			iommufd_fault_deliver_restore(fault, group);
288 			break;
289 		}
290 
291 		idev = to_iommufd_handle(group->attach_handle)->idev;
292 		list_for_each_entry(iopf, &group->faults, list) {
293 			iommufd_compose_fault_message(&iopf->fault,
294 						      &data, idev,
295 						      group->cookie);
296 			if (copy_to_user(buf + done, &data, fault_size)) {
297 				xa_erase(&fault->response, group->cookie);
298 				iommufd_fault_deliver_restore(fault, group);
299 				rc = -EFAULT;
300 				break;
301 			}
302 			done += fault_size;
303 		}
304 	}
305 	mutex_unlock(&fault->mutex);
306 
307 	return done == 0 ? rc : done;
308 }
309 
iommufd_fault_fops_write(struct file * filep,const char __user * buf,size_t count,loff_t * ppos)310 static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
311 					size_t count, loff_t *ppos)
312 {
313 	size_t response_size = sizeof(struct iommu_hwpt_page_response);
314 	struct iommufd_fault *fault = filep->private_data;
315 	struct iommu_hwpt_page_response response;
316 	struct iopf_group *group;
317 	size_t done = 0;
318 	int rc = 0;
319 
320 	if (*ppos || count % response_size)
321 		return -ESPIPE;
322 
323 	mutex_lock(&fault->mutex);
324 	while (count > done) {
325 		rc = copy_from_user(&response, buf + done, response_size);
326 		if (rc)
327 			break;
328 
329 		static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
330 			      (int)IOMMU_PAGE_RESP_SUCCESS);
331 		static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
332 			      (int)IOMMU_PAGE_RESP_INVALID);
333 		if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
334 		    response.code != IOMMUFD_PAGE_RESP_INVALID) {
335 			rc = -EINVAL;
336 			break;
337 		}
338 
339 		group = xa_erase(&fault->response, response.cookie);
340 		if (!group) {
341 			rc = -EINVAL;
342 			break;
343 		}
344 
345 		iopf_group_response(group, response.code);
346 		iopf_free_group(group);
347 		done += response_size;
348 	}
349 	mutex_unlock(&fault->mutex);
350 
351 	return done == 0 ? rc : done;
352 }
353 
iommufd_fault_fops_poll(struct file * filep,struct poll_table_struct * wait)354 static __poll_t iommufd_fault_fops_poll(struct file *filep,
355 					struct poll_table_struct *wait)
356 {
357 	struct iommufd_fault *fault = filep->private_data;
358 	__poll_t pollflags = EPOLLOUT;
359 
360 	poll_wait(filep, &fault->wait_queue, wait);
361 	spin_lock(&fault->lock);
362 	if (!list_empty(&fault->deliver))
363 		pollflags |= EPOLLIN | EPOLLRDNORM;
364 	spin_unlock(&fault->lock);
365 
366 	return pollflags;
367 }
368 
iommufd_fault_fops_release(struct inode * inode,struct file * filep)369 static int iommufd_fault_fops_release(struct inode *inode, struct file *filep)
370 {
371 	struct iommufd_fault *fault = filep->private_data;
372 
373 	refcount_dec(&fault->obj.users);
374 	iommufd_ctx_put(fault->ictx);
375 	return 0;
376 }
377 
378 static const struct file_operations iommufd_fault_fops = {
379 	.owner		= THIS_MODULE,
380 	.open		= nonseekable_open,
381 	.read		= iommufd_fault_fops_read,
382 	.write		= iommufd_fault_fops_write,
383 	.poll		= iommufd_fault_fops_poll,
384 	.release	= iommufd_fault_fops_release,
385 };
386 
iommufd_fault_alloc(struct iommufd_ucmd * ucmd)387 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
388 {
389 	struct iommu_fault_alloc *cmd = ucmd->cmd;
390 	struct iommufd_fault *fault;
391 	struct file *filep;
392 	int fdno;
393 	int rc;
394 
395 	if (cmd->flags)
396 		return -EOPNOTSUPP;
397 
398 	fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
399 	if (IS_ERR(fault))
400 		return PTR_ERR(fault);
401 
402 	fault->ictx = ucmd->ictx;
403 	INIT_LIST_HEAD(&fault->deliver);
404 	xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
405 	mutex_init(&fault->mutex);
406 	spin_lock_init(&fault->lock);
407 	init_waitqueue_head(&fault->wait_queue);
408 
409 	filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
410 				   fault, O_RDWR);
411 	if (IS_ERR(filep)) {
412 		rc = PTR_ERR(filep);
413 		goto out_abort;
414 	}
415 
416 	refcount_inc(&fault->obj.users);
417 	iommufd_ctx_get(fault->ictx);
418 	fault->filep = filep;
419 
420 	fdno = get_unused_fd_flags(O_CLOEXEC);
421 	if (fdno < 0) {
422 		rc = fdno;
423 		goto out_fput;
424 	}
425 
426 	cmd->out_fault_id = fault->obj.id;
427 	cmd->out_fault_fd = fdno;
428 
429 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
430 	if (rc)
431 		goto out_put_fdno;
432 	iommufd_object_finalize(ucmd->ictx, &fault->obj);
433 
434 	fd_install(fdno, fault->filep);
435 
436 	return 0;
437 out_put_fdno:
438 	put_unused_fd(fdno);
439 out_fput:
440 	fput(filep);
441 out_abort:
442 	iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
443 
444 	return rc;
445 }
446 
iommufd_fault_iopf_handler(struct iopf_group * group)447 int iommufd_fault_iopf_handler(struct iopf_group *group)
448 {
449 	struct iommufd_hw_pagetable *hwpt;
450 	struct iommufd_fault *fault;
451 
452 	hwpt = group->attach_handle->domain->fault_data;
453 	fault = hwpt->fault;
454 
455 	spin_lock(&fault->lock);
456 	list_add_tail(&group->node, &fault->deliver);
457 	spin_unlock(&fault->lock);
458 
459 	wake_up_interruptible(&fault->wait_queue);
460 
461 	return 0;
462 }
463