xref: /linux/drivers/iommu/iommufd/iommufd_private.h (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #ifndef __IOMMUFD_PRIVATE_H
5 #define __IOMMUFD_PRIVATE_H
6 
7 #include <linux/iommu.h>
8 #include <linux/iommufd.h>
9 #include <linux/iova_bitmap.h>
10 #include <linux/rwsem.h>
11 #include <linux/uaccess.h>
12 #include <linux/xarray.h>
13 #include <uapi/linux/iommufd.h>
14 
15 #include "../iommu-priv.h"
16 
17 struct iommu_domain;
18 struct iommu_group;
19 struct iommu_option;
20 struct iommufd_device;
21 
22 struct iommufd_ctx {
23 	struct file *file;
24 	struct xarray objects;
25 	struct xarray groups;
26 	wait_queue_head_t destroy_wait;
27 	struct rw_semaphore ioas_creation_lock;
28 
29 	u8 account_mode;
30 	/* Compatibility with VFIO no iommu */
31 	u8 no_iommu_mode;
32 	struct iommufd_ioas *vfio_ioas;
33 };
34 
35 /*
36  * The IOVA to PFN map. The map automatically copies the PFNs into multiple
37  * domains and permits sharing of PFNs between io_pagetable instances. This
38  * supports both a design where IOAS's are 1:1 with a domain (eg because the
39  * domain is HW customized), or where the IOAS is 1:N with multiple generic
40  * domains.  The io_pagetable holds an interval tree of iopt_areas which point
41  * to shared iopt_pages which hold the pfns mapped to the page table.
42  *
43  * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
44  */
45 struct io_pagetable {
46 	struct rw_semaphore domains_rwsem;
47 	struct xarray domains;
48 	struct xarray access_list;
49 	unsigned int next_domain_id;
50 
51 	struct rw_semaphore iova_rwsem;
52 	struct rb_root_cached area_itree;
53 	/* IOVA that cannot become reserved, struct iopt_allowed */
54 	struct rb_root_cached allowed_itree;
55 	/* IOVA that cannot be allocated, struct iopt_reserved */
56 	struct rb_root_cached reserved_itree;
57 	u8 disable_large_pages;
58 	unsigned long iova_alignment;
59 };
60 
61 void iopt_init_table(struct io_pagetable *iopt);
62 void iopt_destroy_table(struct io_pagetable *iopt);
63 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
64 		   unsigned long length, struct list_head *pages_list);
65 void iopt_free_pages_list(struct list_head *pages_list);
66 enum {
67 	IOPT_ALLOC_IOVA = 1 << 0,
68 };
69 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
70 			unsigned long *iova, void __user *uptr,
71 			unsigned long length, int iommu_prot,
72 			unsigned int flags);
73 int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
74 			unsigned long *iova, struct file *file,
75 			unsigned long start, unsigned long length,
76 			int iommu_prot, unsigned int flags);
77 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
78 		   unsigned long length, unsigned long *dst_iova,
79 		   int iommu_prot, unsigned int flags);
80 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
81 		    unsigned long length, unsigned long *unmapped);
82 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
83 
84 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
85 				   struct iommu_domain *domain,
86 				   unsigned long flags,
87 				   struct iommu_hwpt_get_dirty_bitmap *bitmap);
88 int iopt_set_dirty_tracking(struct io_pagetable *iopt,
89 			    struct iommu_domain *domain, bool enable);
90 
91 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
92 				 unsigned long length);
93 int iopt_table_add_domain(struct io_pagetable *iopt,
94 			  struct iommu_domain *domain);
95 void iopt_table_remove_domain(struct io_pagetable *iopt,
96 			      struct iommu_domain *domain);
97 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
98 					struct device *dev,
99 					phys_addr_t *sw_msi_start);
100 int iopt_set_allow_iova(struct io_pagetable *iopt,
101 			struct rb_root_cached *allowed_iova);
102 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
103 		      unsigned long last, void *owner);
104 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
105 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
106 		  size_t num_iovas);
107 void iopt_enable_large_pages(struct io_pagetable *iopt);
108 int iopt_disable_large_pages(struct io_pagetable *iopt);
109 
110 struct iommufd_ucmd {
111 	struct iommufd_ctx *ictx;
112 	void __user *ubuffer;
113 	u32 user_size;
114 	void *cmd;
115 };
116 
117 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
118 		       unsigned long arg);
119 
120 /* Copy the response in ucmd->cmd back to userspace. */
121 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
122 				       size_t cmd_len)
123 {
124 	if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
125 			 min_t(size_t, ucmd->user_size, cmd_len)))
126 		return -EFAULT;
127 	return 0;
128 }
129 
130 static inline bool iommufd_lock_obj(struct iommufd_object *obj)
131 {
132 	if (!refcount_inc_not_zero(&obj->users))
133 		return false;
134 	if (!refcount_inc_not_zero(&obj->shortterm_users)) {
135 		/*
136 		 * If the caller doesn't already have a ref on obj this must be
137 		 * called under the xa_lock. Otherwise the caller is holding a
138 		 * ref on users. Thus it cannot be one before this decrement.
139 		 */
140 		refcount_dec(&obj->users);
141 		return false;
142 	}
143 	return true;
144 }
145 
146 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
147 					  enum iommufd_object_type type);
148 static inline void iommufd_put_object(struct iommufd_ctx *ictx,
149 				      struct iommufd_object *obj)
150 {
151 	/*
152 	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
153 	 * a spurious !0 users with a 0 shortterm_users.
154 	 */
155 	refcount_dec(&obj->users);
156 	if (refcount_dec_and_test(&obj->shortterm_users))
157 		wake_up_interruptible_all(&ictx->destroy_wait);
158 }
159 
160 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
161 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
162 				      struct iommufd_object *obj);
163 void iommufd_object_finalize(struct iommufd_ctx *ictx,
164 			     struct iommufd_object *obj);
165 
166 enum {
167 	REMOVE_WAIT_SHORTTERM = 1,
168 };
169 int iommufd_object_remove(struct iommufd_ctx *ictx,
170 			  struct iommufd_object *to_destroy, u32 id,
171 			  unsigned int flags);
172 
173 /*
174  * The caller holds a users refcount and wants to destroy the object. At this
175  * point the caller has no shortterm_users reference and at least the xarray
176  * will be holding one.
177  */
178 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
179 					       struct iommufd_object *obj)
180 {
181 	int ret;
182 
183 	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
184 
185 	/*
186 	 * If there is a bug and we couldn't destroy the object then we did put
187 	 * back the caller's users refcount and will eventually try to free it
188 	 * again during close.
189 	 */
190 	WARN_ON(ret);
191 }
192 
193 /*
194  * The HWPT allocated by autodomains is used in possibly many devices and
195  * is automatically destroyed when its refcount reaches zero.
196  *
197  * If userspace uses the HWPT manually, even for a short term, then it will
198  * disrupt this refcounting and the auto-free in the kernel will not work.
199  * Userspace that tries to use the automatically allocated HWPT must be careful
200  * to ensure that it is consistently destroyed, eg by not racing accesses
201  * and by not attaching an automatic HWPT to a device manually.
202  */
203 static inline void
204 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
205 				   struct iommufd_object *obj)
206 {
207 	iommufd_object_remove(ictx, obj, obj->id, 0);
208 }
209 
210 #define __iommufd_object_alloc(ictx, ptr, type, obj)                           \
211 	container_of(_iommufd_object_alloc(                                    \
212 			     ictx,                                             \
213 			     sizeof(*(ptr)) + BUILD_BUG_ON_ZERO(               \
214 						      offsetof(typeof(*(ptr)), \
215 							       obj) != 0),     \
216 			     type),                                            \
217 		     typeof(*(ptr)), obj)
218 
219 #define iommufd_object_alloc(ictx, ptr, type) \
220 	__iommufd_object_alloc(ictx, ptr, type, obj)
221 
222 /*
223  * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
224  * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
225  * mapping is copied into all of the associated domains and made available to
226  * in-kernel users.
227  *
228  * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
229  * object. When we go to attach a device to an IOAS we need to get an
230  * iommu_domain and wrapping iommufd_hw_pagetable for it.
231  *
232  * An iommu_domain & iommfd_hw_pagetable will be automatically selected
233  * for a device based on the hwpt_list. If no suitable iommu_domain
234  * is found a new iommu_domain will be created.
235  */
236 struct iommufd_ioas {
237 	struct iommufd_object obj;
238 	struct io_pagetable iopt;
239 	struct mutex mutex;
240 	struct list_head hwpt_list;
241 };
242 
243 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
244 						    u32 id)
245 {
246 	return container_of(iommufd_get_object(ictx, id,
247 					       IOMMUFD_OBJ_IOAS),
248 			    struct iommufd_ioas, obj);
249 }
250 
251 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
252 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
253 void iommufd_ioas_destroy(struct iommufd_object *obj);
254 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
255 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
256 int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
257 int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
258 int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
259 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
260 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
261 int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
262 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
263 			       struct iommufd_ctx *ictx);
264 
265 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
266 int iommufd_check_iova_range(struct io_pagetable *iopt,
267 			     struct iommu_hwpt_get_dirty_bitmap *bitmap);
268 
269 /*
270  * A HW pagetable is called an iommu_domain inside the kernel. This user object
271  * allows directly creating and inspecting the domains. Domains that have kernel
272  * owned page tables will be associated with an iommufd_ioas that provides the
273  * IOVA to PFN map.
274  */
275 struct iommufd_hw_pagetable {
276 	struct iommufd_object obj;
277 	struct iommu_domain *domain;
278 	struct iommufd_fault *fault;
279 };
280 
281 struct iommufd_hwpt_paging {
282 	struct iommufd_hw_pagetable common;
283 	struct iommufd_ioas *ioas;
284 	bool auto_domain : 1;
285 	bool enforce_cache_coherency : 1;
286 	bool msi_cookie : 1;
287 	bool nest_parent : 1;
288 	/* Head at iommufd_ioas::hwpt_list */
289 	struct list_head hwpt_item;
290 };
291 
292 struct iommufd_hwpt_nested {
293 	struct iommufd_hw_pagetable common;
294 	struct iommufd_hwpt_paging *parent;
295 	struct iommufd_viommu *viommu;
296 };
297 
298 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
299 {
300 	return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
301 }
302 
303 static inline struct iommufd_hwpt_paging *
304 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
305 {
306 	return container_of(hwpt, struct iommufd_hwpt_paging, common);
307 }
308 
309 static inline struct iommufd_hwpt_nested *
310 to_hwpt_nested(struct iommufd_hw_pagetable *hwpt)
311 {
312 	return container_of(hwpt, struct iommufd_hwpt_nested, common);
313 }
314 
315 static inline struct iommufd_hwpt_paging *
316 find_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
317 {
318 	switch (hwpt->obj.type) {
319 	case IOMMUFD_OBJ_HWPT_PAGING:
320 		return to_hwpt_paging(hwpt);
321 	case IOMMUFD_OBJ_HWPT_NESTED:
322 		return to_hwpt_nested(hwpt)->parent;
323 	default:
324 		return NULL;
325 	}
326 }
327 
328 static inline struct iommufd_hwpt_paging *
329 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id)
330 {
331 	return container_of(iommufd_get_object(ucmd->ictx, id,
332 					       IOMMUFD_OBJ_HWPT_PAGING),
333 			    struct iommufd_hwpt_paging, common.obj);
334 }
335 
336 static inline struct iommufd_hw_pagetable *
337 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id)
338 {
339 	return container_of(iommufd_get_object(ucmd->ictx, id,
340 					       IOMMUFD_OBJ_HWPT_NESTED),
341 			    struct iommufd_hw_pagetable, obj);
342 }
343 
344 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
345 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
346 
347 struct iommufd_hwpt_paging *
348 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
349 			  struct iommufd_device *idev, u32 flags,
350 			  bool immediate_attach,
351 			  const struct iommu_user_data *user_data);
352 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
353 				struct iommufd_device *idev);
354 struct iommufd_hw_pagetable *
355 iommufd_hw_pagetable_detach(struct iommufd_device *idev);
356 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
357 void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
358 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
359 void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
360 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
361 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
362 
363 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
364 					    struct iommufd_hw_pagetable *hwpt)
365 {
366 	if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
367 		struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
368 
369 		lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
370 
371 		if (hwpt_paging->auto_domain) {
372 			iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
373 			return;
374 		}
375 	}
376 	refcount_dec(&hwpt->obj.users);
377 }
378 
379 struct iommufd_group {
380 	struct kref ref;
381 	struct mutex lock;
382 	struct iommufd_ctx *ictx;
383 	struct iommu_group *group;
384 	struct iommufd_hw_pagetable *hwpt;
385 	struct list_head device_list;
386 	phys_addr_t sw_msi_start;
387 };
388 
389 /*
390  * A iommufd_device object represents the binding relationship between a
391  * consuming driver and the iommufd. These objects are created/destroyed by
392  * external drivers, not by userspace.
393  */
394 struct iommufd_device {
395 	struct iommufd_object obj;
396 	struct iommufd_ctx *ictx;
397 	struct iommufd_group *igroup;
398 	struct list_head group_item;
399 	/* always the physical device */
400 	struct device *dev;
401 	bool enforce_cache_coherency;
402 	/* protect iopf_enabled counter */
403 	struct mutex iopf_lock;
404 	unsigned int iopf_enabled;
405 };
406 
407 static inline struct iommufd_device *
408 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
409 {
410 	return container_of(iommufd_get_object(ucmd->ictx, id,
411 					       IOMMUFD_OBJ_DEVICE),
412 			    struct iommufd_device, obj);
413 }
414 
415 void iommufd_device_destroy(struct iommufd_object *obj);
416 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
417 
418 struct iommufd_access {
419 	struct iommufd_object obj;
420 	struct iommufd_ctx *ictx;
421 	struct iommufd_ioas *ioas;
422 	struct iommufd_ioas *ioas_unpin;
423 	struct mutex ioas_lock;
424 	const struct iommufd_access_ops *ops;
425 	void *data;
426 	unsigned long iova_alignment;
427 	u32 iopt_access_list_id;
428 };
429 
430 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
431 void iopt_remove_access(struct io_pagetable *iopt,
432 			struct iommufd_access *access,
433 			u32 iopt_access_list_id);
434 void iommufd_access_destroy_object(struct iommufd_object *obj);
435 
436 /*
437  * An iommufd_fault object represents an interface to deliver I/O page faults
438  * to the user space. These objects are created/destroyed by the user space and
439  * associated with hardware page table objects during page-table allocation.
440  */
441 struct iommufd_fault {
442 	struct iommufd_object obj;
443 	struct iommufd_ctx *ictx;
444 	struct file *filep;
445 
446 	/* The lists of outstanding faults protected by below mutex. */
447 	struct mutex mutex;
448 	struct list_head deliver;
449 	struct xarray response;
450 
451 	struct wait_queue_head wait_queue;
452 };
453 
454 struct iommufd_attach_handle {
455 	struct iommu_attach_handle handle;
456 	struct iommufd_device *idev;
457 };
458 
459 /* Convert an iommu attach handle to iommufd handle. */
460 #define to_iommufd_handle(hdl)	container_of(hdl, struct iommufd_attach_handle, handle)
461 
462 static inline struct iommufd_fault *
463 iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
464 {
465 	return container_of(iommufd_get_object(ucmd->ictx, id,
466 					       IOMMUFD_OBJ_FAULT),
467 			    struct iommufd_fault, obj);
468 }
469 
470 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
471 void iommufd_fault_destroy(struct iommufd_object *obj);
472 int iommufd_fault_iopf_handler(struct iopf_group *group);
473 
474 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
475 				    struct iommufd_device *idev);
476 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
477 				     struct iommufd_device *idev);
478 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
479 				     struct iommufd_hw_pagetable *hwpt,
480 				     struct iommufd_hw_pagetable *old);
481 
482 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
483 					     struct iommufd_device *idev)
484 {
485 	if (hwpt->fault)
486 		return iommufd_fault_domain_attach_dev(hwpt, idev);
487 
488 	return iommu_attach_group(hwpt->domain, idev->igroup->group);
489 }
490 
491 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
492 					      struct iommufd_device *idev)
493 {
494 	if (hwpt->fault) {
495 		iommufd_fault_domain_detach_dev(hwpt, idev);
496 		return;
497 	}
498 
499 	iommu_detach_group(hwpt->domain, idev->igroup->group);
500 }
501 
502 static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
503 					      struct iommufd_hw_pagetable *hwpt,
504 					      struct iommufd_hw_pagetable *old)
505 {
506 	if (old->fault || hwpt->fault)
507 		return iommufd_fault_domain_replace_dev(idev, hwpt, old);
508 
509 	return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
510 }
511 
512 static inline struct iommufd_viommu *
513 iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
514 {
515 	return container_of(iommufd_get_object(ucmd->ictx, id,
516 					       IOMMUFD_OBJ_VIOMMU),
517 			    struct iommufd_viommu, obj);
518 }
519 
520 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
521 void iommufd_viommu_destroy(struct iommufd_object *obj);
522 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
523 void iommufd_vdevice_destroy(struct iommufd_object *obj);
524 
525 struct iommufd_vdevice {
526 	struct iommufd_object obj;
527 	struct iommufd_ctx *ictx;
528 	struct iommufd_viommu *viommu;
529 	struct device *dev;
530 	u64 id; /* per-vIOMMU virtual ID */
531 };
532 
533 #ifdef CONFIG_IOMMUFD_TEST
534 int iommufd_test(struct iommufd_ucmd *ucmd);
535 void iommufd_selftest_destroy(struct iommufd_object *obj);
536 extern size_t iommufd_test_memory_limit;
537 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
538 				   unsigned int ioas_id, u64 *iova, u32 *flags);
539 bool iommufd_should_fail(void);
540 int __init iommufd_test_init(void);
541 void iommufd_test_exit(void);
542 bool iommufd_selftest_is_mock_dev(struct device *dev);
543 #else
544 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
545 						 unsigned int ioas_id,
546 						 u64 *iova, u32 *flags)
547 {
548 }
549 static inline bool iommufd_should_fail(void)
550 {
551 	return false;
552 }
553 static inline int __init iommufd_test_init(void)
554 {
555 	return 0;
556 }
557 static inline void iommufd_test_exit(void)
558 {
559 }
560 static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
561 {
562 	return false;
563 }
564 #endif
565 #endif
566