xref: /linux/drivers/iommu/iommufd/iommufd_private.h (revision 566ab427f827b0256d3e8ce0235d088e6a9c28bd)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #ifndef __IOMMUFD_PRIVATE_H
5 #define __IOMMUFD_PRIVATE_H
6 
7 #include <linux/iommu.h>
8 #include <linux/iova_bitmap.h>
9 #include <linux/refcount.h>
10 #include <linux/rwsem.h>
11 #include <linux/uaccess.h>
12 #include <linux/xarray.h>
13 #include <uapi/linux/iommufd.h>
14 
15 #include "../iommu-priv.h"
16 
17 struct iommu_domain;
18 struct iommu_group;
19 struct iommu_option;
20 struct iommufd_device;
21 
22 struct iommufd_ctx {
23 	struct file *file;
24 	struct xarray objects;
25 	struct xarray groups;
26 	wait_queue_head_t destroy_wait;
27 
28 	u8 account_mode;
29 	/* Compatibility with VFIO no iommu */
30 	u8 no_iommu_mode;
31 	struct iommufd_ioas *vfio_ioas;
32 };
33 
34 /*
35  * The IOVA to PFN map. The map automatically copies the PFNs into multiple
36  * domains and permits sharing of PFNs between io_pagetable instances. This
37  * supports both a design where IOAS's are 1:1 with a domain (eg because the
38  * domain is HW customized), or where the IOAS is 1:N with multiple generic
39  * domains.  The io_pagetable holds an interval tree of iopt_areas which point
40  * to shared iopt_pages which hold the pfns mapped to the page table.
41  *
42  * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
43  */
44 struct io_pagetable {
45 	struct rw_semaphore domains_rwsem;
46 	struct xarray domains;
47 	struct xarray access_list;
48 	unsigned int next_domain_id;
49 
50 	struct rw_semaphore iova_rwsem;
51 	struct rb_root_cached area_itree;
52 	/* IOVA that cannot become reserved, struct iopt_allowed */
53 	struct rb_root_cached allowed_itree;
54 	/* IOVA that cannot be allocated, struct iopt_reserved */
55 	struct rb_root_cached reserved_itree;
56 	u8 disable_large_pages;
57 	unsigned long iova_alignment;
58 };
59 
60 void iopt_init_table(struct io_pagetable *iopt);
61 void iopt_destroy_table(struct io_pagetable *iopt);
62 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
63 		   unsigned long length, struct list_head *pages_list);
64 void iopt_free_pages_list(struct list_head *pages_list);
65 enum {
66 	IOPT_ALLOC_IOVA = 1 << 0,
67 };
68 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
69 			unsigned long *iova, void __user *uptr,
70 			unsigned long length, int iommu_prot,
71 			unsigned int flags);
72 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
73 		   unsigned long length, unsigned long *dst_iova,
74 		   int iommu_prot, unsigned int flags);
75 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
76 		    unsigned long length, unsigned long *unmapped);
77 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
78 
79 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
80 				   struct iommu_domain *domain,
81 				   unsigned long flags,
82 				   struct iommu_hwpt_get_dirty_bitmap *bitmap);
83 int iopt_set_dirty_tracking(struct io_pagetable *iopt,
84 			    struct iommu_domain *domain, bool enable);
85 
86 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
87 				 unsigned long length);
88 int iopt_table_add_domain(struct io_pagetable *iopt,
89 			  struct iommu_domain *domain);
90 void iopt_table_remove_domain(struct io_pagetable *iopt,
91 			      struct iommu_domain *domain);
92 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
93 					struct device *dev,
94 					phys_addr_t *sw_msi_start);
95 int iopt_set_allow_iova(struct io_pagetable *iopt,
96 			struct rb_root_cached *allowed_iova);
97 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
98 		      unsigned long last, void *owner);
99 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
100 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
101 		  size_t num_iovas);
102 void iopt_enable_large_pages(struct io_pagetable *iopt);
103 int iopt_disable_large_pages(struct io_pagetable *iopt);
104 
105 struct iommufd_ucmd {
106 	struct iommufd_ctx *ictx;
107 	void __user *ubuffer;
108 	u32 user_size;
109 	void *cmd;
110 };
111 
112 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
113 		       unsigned long arg);
114 
115 /* Copy the response in ucmd->cmd back to userspace. */
116 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
117 				       size_t cmd_len)
118 {
119 	if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
120 			 min_t(size_t, ucmd->user_size, cmd_len)))
121 		return -EFAULT;
122 	return 0;
123 }
124 
125 enum iommufd_object_type {
126 	IOMMUFD_OBJ_NONE,
127 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
128 	IOMMUFD_OBJ_DEVICE,
129 	IOMMUFD_OBJ_HWPT_PAGING,
130 	IOMMUFD_OBJ_HWPT_NESTED,
131 	IOMMUFD_OBJ_IOAS,
132 	IOMMUFD_OBJ_ACCESS,
133 	IOMMUFD_OBJ_FAULT,
134 #ifdef CONFIG_IOMMUFD_TEST
135 	IOMMUFD_OBJ_SELFTEST,
136 #endif
137 	IOMMUFD_OBJ_MAX,
138 };
139 
140 /* Base struct for all objects with a userspace ID handle. */
141 struct iommufd_object {
142 	refcount_t shortterm_users;
143 	refcount_t users;
144 	enum iommufd_object_type type;
145 	unsigned int id;
146 };
147 
148 static inline bool iommufd_lock_obj(struct iommufd_object *obj)
149 {
150 	if (!refcount_inc_not_zero(&obj->users))
151 		return false;
152 	if (!refcount_inc_not_zero(&obj->shortterm_users)) {
153 		/*
154 		 * If the caller doesn't already have a ref on obj this must be
155 		 * called under the xa_lock. Otherwise the caller is holding a
156 		 * ref on users. Thus it cannot be one before this decrement.
157 		 */
158 		refcount_dec(&obj->users);
159 		return false;
160 	}
161 	return true;
162 }
163 
164 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
165 					  enum iommufd_object_type type);
166 static inline void iommufd_put_object(struct iommufd_ctx *ictx,
167 				      struct iommufd_object *obj)
168 {
169 	/*
170 	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
171 	 * a spurious !0 users with a 0 shortterm_users.
172 	 */
173 	refcount_dec(&obj->users);
174 	if (refcount_dec_and_test(&obj->shortterm_users))
175 		wake_up_interruptible_all(&ictx->destroy_wait);
176 }
177 
178 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
179 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
180 				      struct iommufd_object *obj);
181 void iommufd_object_finalize(struct iommufd_ctx *ictx,
182 			     struct iommufd_object *obj);
183 
184 enum {
185 	REMOVE_WAIT_SHORTTERM = 1,
186 };
187 int iommufd_object_remove(struct iommufd_ctx *ictx,
188 			  struct iommufd_object *to_destroy, u32 id,
189 			  unsigned int flags);
190 
191 /*
192  * The caller holds a users refcount and wants to destroy the object. At this
193  * point the caller has no shortterm_users reference and at least the xarray
194  * will be holding one.
195  */
196 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
197 					       struct iommufd_object *obj)
198 {
199 	int ret;
200 
201 	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
202 
203 	/*
204 	 * If there is a bug and we couldn't destroy the object then we did put
205 	 * back the caller's users refcount and will eventually try to free it
206 	 * again during close.
207 	 */
208 	WARN_ON(ret);
209 }
210 
211 /*
212  * The HWPT allocated by autodomains is used in possibly many devices and
213  * is automatically destroyed when its refcount reaches zero.
214  *
215  * If userspace uses the HWPT manually, even for a short term, then it will
216  * disrupt this refcounting and the auto-free in the kernel will not work.
217  * Userspace that tries to use the automatically allocated HWPT must be careful
218  * to ensure that it is consistently destroyed, eg by not racing accesses
219  * and by not attaching an automatic HWPT to a device manually.
220  */
221 static inline void
222 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
223 				   struct iommufd_object *obj)
224 {
225 	iommufd_object_remove(ictx, obj, obj->id, 0);
226 }
227 
228 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
229 					     size_t size,
230 					     enum iommufd_object_type type);
231 
232 #define __iommufd_object_alloc(ictx, ptr, type, obj)                           \
233 	container_of(_iommufd_object_alloc(                                    \
234 			     ictx,                                             \
235 			     sizeof(*(ptr)) + BUILD_BUG_ON_ZERO(               \
236 						      offsetof(typeof(*(ptr)), \
237 							       obj) != 0),     \
238 			     type),                                            \
239 		     typeof(*(ptr)), obj)
240 
241 #define iommufd_object_alloc(ictx, ptr, type) \
242 	__iommufd_object_alloc(ictx, ptr, type, obj)
243 
244 /*
245  * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
246  * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
247  * mapping is copied into all of the associated domains and made available to
248  * in-kernel users.
249  *
250  * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
251  * object. When we go to attach a device to an IOAS we need to get an
252  * iommu_domain and wrapping iommufd_hw_pagetable for it.
253  *
254  * An iommu_domain & iommfd_hw_pagetable will be automatically selected
255  * for a device based on the hwpt_list. If no suitable iommu_domain
256  * is found a new iommu_domain will be created.
257  */
258 struct iommufd_ioas {
259 	struct iommufd_object obj;
260 	struct io_pagetable iopt;
261 	struct mutex mutex;
262 	struct list_head hwpt_list;
263 };
264 
265 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
266 						    u32 id)
267 {
268 	return container_of(iommufd_get_object(ictx, id,
269 					       IOMMUFD_OBJ_IOAS),
270 			    struct iommufd_ioas, obj);
271 }
272 
273 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
274 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
275 void iommufd_ioas_destroy(struct iommufd_object *obj);
276 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
277 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
278 int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
279 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
280 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
281 int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
282 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
283 			       struct iommufd_ctx *ictx);
284 
285 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
286 int iommufd_check_iova_range(struct io_pagetable *iopt,
287 			     struct iommu_hwpt_get_dirty_bitmap *bitmap);
288 
289 /*
290  * A HW pagetable is called an iommu_domain inside the kernel. This user object
291  * allows directly creating and inspecting the domains. Domains that have kernel
292  * owned page tables will be associated with an iommufd_ioas that provides the
293  * IOVA to PFN map.
294  */
295 struct iommufd_hw_pagetable {
296 	struct iommufd_object obj;
297 	struct iommu_domain *domain;
298 	struct iommufd_fault *fault;
299 };
300 
301 struct iommufd_hwpt_paging {
302 	struct iommufd_hw_pagetable common;
303 	struct iommufd_ioas *ioas;
304 	bool auto_domain : 1;
305 	bool enforce_cache_coherency : 1;
306 	bool msi_cookie : 1;
307 	bool nest_parent : 1;
308 	/* Head at iommufd_ioas::hwpt_list */
309 	struct list_head hwpt_item;
310 };
311 
312 struct iommufd_hwpt_nested {
313 	struct iommufd_hw_pagetable common;
314 	struct iommufd_hwpt_paging *parent;
315 };
316 
317 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
318 {
319 	return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
320 }
321 
322 static inline struct iommufd_hwpt_paging *
323 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
324 {
325 	return container_of(hwpt, struct iommufd_hwpt_paging, common);
326 }
327 
328 static inline struct iommufd_hwpt_nested *
329 to_hwpt_nested(struct iommufd_hw_pagetable *hwpt)
330 {
331 	return container_of(hwpt, struct iommufd_hwpt_nested, common);
332 }
333 
334 static inline struct iommufd_hwpt_paging *
335 find_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
336 {
337 	switch (hwpt->obj.type) {
338 	case IOMMUFD_OBJ_HWPT_PAGING:
339 		return to_hwpt_paging(hwpt);
340 	case IOMMUFD_OBJ_HWPT_NESTED:
341 		return to_hwpt_nested(hwpt)->parent;
342 	default:
343 		return NULL;
344 	}
345 }
346 
347 static inline struct iommufd_hwpt_paging *
348 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id)
349 {
350 	return container_of(iommufd_get_object(ucmd->ictx, id,
351 					       IOMMUFD_OBJ_HWPT_PAGING),
352 			    struct iommufd_hwpt_paging, common.obj);
353 }
354 
355 static inline struct iommufd_hw_pagetable *
356 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id)
357 {
358 	return container_of(iommufd_get_object(ucmd->ictx, id,
359 					       IOMMUFD_OBJ_HWPT_NESTED),
360 			    struct iommufd_hw_pagetable, obj);
361 }
362 
363 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
364 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
365 
366 struct iommufd_hwpt_paging *
367 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
368 			  struct iommufd_device *idev, u32 flags,
369 			  bool immediate_attach,
370 			  const struct iommu_user_data *user_data);
371 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
372 				struct iommufd_device *idev);
373 struct iommufd_hw_pagetable *
374 iommufd_hw_pagetable_detach(struct iommufd_device *idev);
375 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
376 void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
377 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
378 void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
379 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
380 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
381 
382 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
383 					    struct iommufd_hw_pagetable *hwpt)
384 {
385 	if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
386 		struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
387 
388 		lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
389 
390 		if (hwpt_paging->auto_domain) {
391 			iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
392 			return;
393 		}
394 	}
395 	refcount_dec(&hwpt->obj.users);
396 }
397 
398 struct iommufd_group {
399 	struct kref ref;
400 	struct mutex lock;
401 	struct iommufd_ctx *ictx;
402 	struct iommu_group *group;
403 	struct iommufd_hw_pagetable *hwpt;
404 	struct list_head device_list;
405 	phys_addr_t sw_msi_start;
406 };
407 
408 /*
409  * A iommufd_device object represents the binding relationship between a
410  * consuming driver and the iommufd. These objects are created/destroyed by
411  * external drivers, not by userspace.
412  */
413 struct iommufd_device {
414 	struct iommufd_object obj;
415 	struct iommufd_ctx *ictx;
416 	struct iommufd_group *igroup;
417 	struct list_head group_item;
418 	/* always the physical device */
419 	struct device *dev;
420 	bool enforce_cache_coherency;
421 	/* protect iopf_enabled counter */
422 	struct mutex iopf_lock;
423 	unsigned int iopf_enabled;
424 };
425 
426 static inline struct iommufd_device *
427 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
428 {
429 	return container_of(iommufd_get_object(ucmd->ictx, id,
430 					       IOMMUFD_OBJ_DEVICE),
431 			    struct iommufd_device, obj);
432 }
433 
434 void iommufd_device_destroy(struct iommufd_object *obj);
435 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
436 
437 struct iommufd_access {
438 	struct iommufd_object obj;
439 	struct iommufd_ctx *ictx;
440 	struct iommufd_ioas *ioas;
441 	struct iommufd_ioas *ioas_unpin;
442 	struct mutex ioas_lock;
443 	const struct iommufd_access_ops *ops;
444 	void *data;
445 	unsigned long iova_alignment;
446 	u32 iopt_access_list_id;
447 };
448 
449 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
450 void iopt_remove_access(struct io_pagetable *iopt,
451 			struct iommufd_access *access,
452 			u32 iopt_access_list_id);
453 void iommufd_access_destroy_object(struct iommufd_object *obj);
454 
455 /*
456  * An iommufd_fault object represents an interface to deliver I/O page faults
457  * to the user space. These objects are created/destroyed by the user space and
458  * associated with hardware page table objects during page-table allocation.
459  */
460 struct iommufd_fault {
461 	struct iommufd_object obj;
462 	struct iommufd_ctx *ictx;
463 	struct file *filep;
464 
465 	/* The lists of outstanding faults protected by below mutex. */
466 	struct mutex mutex;
467 	struct list_head deliver;
468 	struct xarray response;
469 
470 	struct wait_queue_head wait_queue;
471 };
472 
473 struct iommufd_attach_handle {
474 	struct iommu_attach_handle handle;
475 	struct iommufd_device *idev;
476 };
477 
478 /* Convert an iommu attach handle to iommufd handle. */
479 #define to_iommufd_handle(hdl)	container_of(hdl, struct iommufd_attach_handle, handle)
480 
481 static inline struct iommufd_fault *
482 iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
483 {
484 	return container_of(iommufd_get_object(ucmd->ictx, id,
485 					       IOMMUFD_OBJ_FAULT),
486 			    struct iommufd_fault, obj);
487 }
488 
489 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
490 void iommufd_fault_destroy(struct iommufd_object *obj);
491 int iommufd_fault_iopf_handler(struct iopf_group *group);
492 
493 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
494 				    struct iommufd_device *idev);
495 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
496 				     struct iommufd_device *idev);
497 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
498 				     struct iommufd_hw_pagetable *hwpt,
499 				     struct iommufd_hw_pagetable *old);
500 
501 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
502 					     struct iommufd_device *idev)
503 {
504 	if (hwpt->fault)
505 		return iommufd_fault_domain_attach_dev(hwpt, idev);
506 
507 	return iommu_attach_group(hwpt->domain, idev->igroup->group);
508 }
509 
510 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
511 					      struct iommufd_device *idev)
512 {
513 	if (hwpt->fault) {
514 		iommufd_fault_domain_detach_dev(hwpt, idev);
515 		return;
516 	}
517 
518 	iommu_detach_group(hwpt->domain, idev->igroup->group);
519 }
520 
521 static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
522 					      struct iommufd_hw_pagetable *hwpt,
523 					      struct iommufd_hw_pagetable *old)
524 {
525 	if (old->fault || hwpt->fault)
526 		return iommufd_fault_domain_replace_dev(idev, hwpt, old);
527 
528 	return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
529 }
530 
531 #ifdef CONFIG_IOMMUFD_TEST
532 int iommufd_test(struct iommufd_ucmd *ucmd);
533 void iommufd_selftest_destroy(struct iommufd_object *obj);
534 extern size_t iommufd_test_memory_limit;
535 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
536 				   unsigned int ioas_id, u64 *iova, u32 *flags);
537 bool iommufd_should_fail(void);
538 int __init iommufd_test_init(void);
539 void iommufd_test_exit(void);
540 bool iommufd_selftest_is_mock_dev(struct device *dev);
541 #else
542 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
543 						 unsigned int ioas_id,
544 						 u64 *iova, u32 *flags)
545 {
546 }
547 static inline bool iommufd_should_fail(void)
548 {
549 	return false;
550 }
551 static inline int __init iommufd_test_init(void)
552 {
553 	return 0;
554 }
555 static inline void iommufd_test_exit(void)
556 {
557 }
558 static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
559 {
560 	return false;
561 }
562 #endif
563 #endif
564