xref: /linux/drivers/iommu/iommufd/iommufd_private.h (revision d261f9ebcf424535fe04e720a1cfa023be409f52)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #ifndef __IOMMUFD_PRIVATE_H
5 #define __IOMMUFD_PRIVATE_H
6 
7 #include <linux/rwsem.h>
8 #include <linux/xarray.h>
9 #include <linux/refcount.h>
10 #include <linux/uaccess.h>
11 #include <linux/iommu.h>
12 #include <linux/iova_bitmap.h>
13 #include <uapi/linux/iommufd.h>
14 #include "../iommu-priv.h"
15 
16 struct iommu_domain;
17 struct iommu_group;
18 struct iommu_option;
19 struct iommufd_device;
20 
21 struct iommufd_ctx {
22 	struct file *file;
23 	struct xarray objects;
24 	struct xarray groups;
25 	wait_queue_head_t destroy_wait;
26 
27 	u8 account_mode;
28 	/* Compatibility with VFIO no iommu */
29 	u8 no_iommu_mode;
30 	struct iommufd_ioas *vfio_ioas;
31 };
32 
33 /*
34  * The IOVA to PFN map. The map automatically copies the PFNs into multiple
35  * domains and permits sharing of PFNs between io_pagetable instances. This
36  * supports both a design where IOAS's are 1:1 with a domain (eg because the
37  * domain is HW customized), or where the IOAS is 1:N with multiple generic
38  * domains.  The io_pagetable holds an interval tree of iopt_areas which point
39  * to shared iopt_pages which hold the pfns mapped to the page table.
40  *
41  * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
42  */
43 struct io_pagetable {
44 	struct rw_semaphore domains_rwsem;
45 	struct xarray domains;
46 	struct xarray access_list;
47 	unsigned int next_domain_id;
48 
49 	struct rw_semaphore iova_rwsem;
50 	struct rb_root_cached area_itree;
51 	/* IOVA that cannot become reserved, struct iopt_allowed */
52 	struct rb_root_cached allowed_itree;
53 	/* IOVA that cannot be allocated, struct iopt_reserved */
54 	struct rb_root_cached reserved_itree;
55 	u8 disable_large_pages;
56 	unsigned long iova_alignment;
57 };
58 
59 void iopt_init_table(struct io_pagetable *iopt);
60 void iopt_destroy_table(struct io_pagetable *iopt);
61 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
62 		   unsigned long length, struct list_head *pages_list);
63 void iopt_free_pages_list(struct list_head *pages_list);
64 enum {
65 	IOPT_ALLOC_IOVA = 1 << 0,
66 };
67 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
68 			unsigned long *iova, void __user *uptr,
69 			unsigned long length, int iommu_prot,
70 			unsigned int flags);
71 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
72 		   unsigned long length, unsigned long *dst_iova,
73 		   int iommu_prot, unsigned int flags);
74 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
75 		    unsigned long length, unsigned long *unmapped);
76 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
77 
78 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
79 				   struct iommu_domain *domain,
80 				   unsigned long flags,
81 				   struct iommu_hwpt_get_dirty_bitmap *bitmap);
82 int iopt_set_dirty_tracking(struct io_pagetable *iopt,
83 			    struct iommu_domain *domain, bool enable);
84 
85 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
86 				 unsigned long length);
87 int iopt_table_add_domain(struct io_pagetable *iopt,
88 			  struct iommu_domain *domain);
89 void iopt_table_remove_domain(struct io_pagetable *iopt,
90 			      struct iommu_domain *domain);
91 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
92 					struct device *dev,
93 					phys_addr_t *sw_msi_start);
94 int iopt_set_allow_iova(struct io_pagetable *iopt,
95 			struct rb_root_cached *allowed_iova);
96 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
97 		      unsigned long last, void *owner);
98 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
99 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
100 		  size_t num_iovas);
101 void iopt_enable_large_pages(struct io_pagetable *iopt);
102 int iopt_disable_large_pages(struct io_pagetable *iopt);
103 
104 struct iommufd_ucmd {
105 	struct iommufd_ctx *ictx;
106 	void __user *ubuffer;
107 	u32 user_size;
108 	void *cmd;
109 };
110 
111 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
112 		       unsigned long arg);
113 
114 /* Copy the response in ucmd->cmd back to userspace. */
115 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
116 				       size_t cmd_len)
117 {
118 	if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
119 			 min_t(size_t, ucmd->user_size, cmd_len)))
120 		return -EFAULT;
121 	return 0;
122 }
123 
124 enum iommufd_object_type {
125 	IOMMUFD_OBJ_NONE,
126 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
127 	IOMMUFD_OBJ_DEVICE,
128 	IOMMUFD_OBJ_HWPT_PAGING,
129 	IOMMUFD_OBJ_HWPT_NESTED,
130 	IOMMUFD_OBJ_IOAS,
131 	IOMMUFD_OBJ_ACCESS,
132 	IOMMUFD_OBJ_FAULT,
133 #ifdef CONFIG_IOMMUFD_TEST
134 	IOMMUFD_OBJ_SELFTEST,
135 #endif
136 	IOMMUFD_OBJ_MAX,
137 };
138 
139 /* Base struct for all objects with a userspace ID handle. */
140 struct iommufd_object {
141 	refcount_t shortterm_users;
142 	refcount_t users;
143 	enum iommufd_object_type type;
144 	unsigned int id;
145 };
146 
147 static inline bool iommufd_lock_obj(struct iommufd_object *obj)
148 {
149 	if (!refcount_inc_not_zero(&obj->users))
150 		return false;
151 	if (!refcount_inc_not_zero(&obj->shortterm_users)) {
152 		/*
153 		 * If the caller doesn't already have a ref on obj this must be
154 		 * called under the xa_lock. Otherwise the caller is holding a
155 		 * ref on users. Thus it cannot be one before this decrement.
156 		 */
157 		refcount_dec(&obj->users);
158 		return false;
159 	}
160 	return true;
161 }
162 
163 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
164 					  enum iommufd_object_type type);
165 static inline void iommufd_put_object(struct iommufd_ctx *ictx,
166 				      struct iommufd_object *obj)
167 {
168 	/*
169 	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
170 	 * a spurious !0 users with a 0 shortterm_users.
171 	 */
172 	refcount_dec(&obj->users);
173 	if (refcount_dec_and_test(&obj->shortterm_users))
174 		wake_up_interruptible_all(&ictx->destroy_wait);
175 }
176 
177 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
178 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
179 				      struct iommufd_object *obj);
180 void iommufd_object_finalize(struct iommufd_ctx *ictx,
181 			     struct iommufd_object *obj);
182 
183 enum {
184 	REMOVE_WAIT_SHORTTERM = 1,
185 };
186 int iommufd_object_remove(struct iommufd_ctx *ictx,
187 			  struct iommufd_object *to_destroy, u32 id,
188 			  unsigned int flags);
189 
190 /*
191  * The caller holds a users refcount and wants to destroy the object. At this
192  * point the caller has no shortterm_users reference and at least the xarray
193  * will be holding one.
194  */
195 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
196 					       struct iommufd_object *obj)
197 {
198 	int ret;
199 
200 	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
201 
202 	/*
203 	 * If there is a bug and we couldn't destroy the object then we did put
204 	 * back the caller's users refcount and will eventually try to free it
205 	 * again during close.
206 	 */
207 	WARN_ON(ret);
208 }
209 
210 /*
211  * The HWPT allocated by autodomains is used in possibly many devices and
212  * is automatically destroyed when its refcount reaches zero.
213  *
214  * If userspace uses the HWPT manually, even for a short term, then it will
215  * disrupt this refcounting and the auto-free in the kernel will not work.
216  * Userspace that tries to use the automatically allocated HWPT must be careful
217  * to ensure that it is consistently destroyed, eg by not racing accesses
218  * and by not attaching an automatic HWPT to a device manually.
219  */
220 static inline void
221 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
222 				   struct iommufd_object *obj)
223 {
224 	iommufd_object_remove(ictx, obj, obj->id, 0);
225 }
226 
227 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
228 					     size_t size,
229 					     enum iommufd_object_type type);
230 
231 #define __iommufd_object_alloc(ictx, ptr, type, obj)                           \
232 	container_of(_iommufd_object_alloc(                                    \
233 			     ictx,                                             \
234 			     sizeof(*(ptr)) + BUILD_BUG_ON_ZERO(               \
235 						      offsetof(typeof(*(ptr)), \
236 							       obj) != 0),     \
237 			     type),                                            \
238 		     typeof(*(ptr)), obj)
239 
240 #define iommufd_object_alloc(ictx, ptr, type) \
241 	__iommufd_object_alloc(ictx, ptr, type, obj)
242 
243 /*
244  * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
245  * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
246  * mapping is copied into all of the associated domains and made available to
247  * in-kernel users.
248  *
249  * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
250  * object. When we go to attach a device to an IOAS we need to get an
251  * iommu_domain and wrapping iommufd_hw_pagetable for it.
252  *
253  * An iommu_domain & iommfd_hw_pagetable will be automatically selected
254  * for a device based on the hwpt_list. If no suitable iommu_domain
255  * is found a new iommu_domain will be created.
256  */
257 struct iommufd_ioas {
258 	struct iommufd_object obj;
259 	struct io_pagetable iopt;
260 	struct mutex mutex;
261 	struct list_head hwpt_list;
262 };
263 
264 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
265 						    u32 id)
266 {
267 	return container_of(iommufd_get_object(ictx, id,
268 					       IOMMUFD_OBJ_IOAS),
269 			    struct iommufd_ioas, obj);
270 }
271 
272 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
273 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
274 void iommufd_ioas_destroy(struct iommufd_object *obj);
275 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
276 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
277 int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
278 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
279 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
280 int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
281 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
282 			       struct iommufd_ctx *ictx);
283 
284 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
285 int iommufd_check_iova_range(struct io_pagetable *iopt,
286 			     struct iommu_hwpt_get_dirty_bitmap *bitmap);
287 
288 /*
289  * A HW pagetable is called an iommu_domain inside the kernel. This user object
290  * allows directly creating and inspecting the domains. Domains that have kernel
291  * owned page tables will be associated with an iommufd_ioas that provides the
292  * IOVA to PFN map.
293  */
294 struct iommufd_hw_pagetable {
295 	struct iommufd_object obj;
296 	struct iommu_domain *domain;
297 	struct iommufd_fault *fault;
298 };
299 
300 struct iommufd_hwpt_paging {
301 	struct iommufd_hw_pagetable common;
302 	struct iommufd_ioas *ioas;
303 	bool auto_domain : 1;
304 	bool enforce_cache_coherency : 1;
305 	bool msi_cookie : 1;
306 	bool nest_parent : 1;
307 	/* Head at iommufd_ioas::hwpt_list */
308 	struct list_head hwpt_item;
309 };
310 
311 struct iommufd_hwpt_nested {
312 	struct iommufd_hw_pagetable common;
313 	struct iommufd_hwpt_paging *parent;
314 };
315 
316 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
317 {
318 	return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
319 }
320 
321 static inline struct iommufd_hwpt_paging *
322 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
323 {
324 	return container_of(hwpt, struct iommufd_hwpt_paging, common);
325 }
326 
327 static inline struct iommufd_hwpt_paging *
328 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id)
329 {
330 	return container_of(iommufd_get_object(ucmd->ictx, id,
331 					       IOMMUFD_OBJ_HWPT_PAGING),
332 			    struct iommufd_hwpt_paging, common.obj);
333 }
334 
335 static inline struct iommufd_hw_pagetable *
336 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id)
337 {
338 	return container_of(iommufd_get_object(ucmd->ictx, id,
339 					       IOMMUFD_OBJ_HWPT_NESTED),
340 			    struct iommufd_hw_pagetable, obj);
341 }
342 
343 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
344 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
345 
346 struct iommufd_hwpt_paging *
347 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
348 			  struct iommufd_device *idev, u32 flags,
349 			  bool immediate_attach,
350 			  const struct iommu_user_data *user_data);
351 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
352 				struct iommufd_device *idev);
353 struct iommufd_hw_pagetable *
354 iommufd_hw_pagetable_detach(struct iommufd_device *idev);
355 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
356 void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
357 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
358 void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
359 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
360 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
361 
362 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
363 					    struct iommufd_hw_pagetable *hwpt)
364 {
365 	if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
366 		struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
367 
368 		lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
369 
370 		if (hwpt_paging->auto_domain) {
371 			iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
372 			return;
373 		}
374 	}
375 	refcount_dec(&hwpt->obj.users);
376 }
377 
378 struct iommufd_group {
379 	struct kref ref;
380 	struct mutex lock;
381 	struct iommufd_ctx *ictx;
382 	struct iommu_group *group;
383 	struct iommufd_hw_pagetable *hwpt;
384 	struct list_head device_list;
385 	phys_addr_t sw_msi_start;
386 };
387 
388 /*
389  * A iommufd_device object represents the binding relationship between a
390  * consuming driver and the iommufd. These objects are created/destroyed by
391  * external drivers, not by userspace.
392  */
393 struct iommufd_device {
394 	struct iommufd_object obj;
395 	struct iommufd_ctx *ictx;
396 	struct iommufd_group *igroup;
397 	struct list_head group_item;
398 	/* always the physical device */
399 	struct device *dev;
400 	bool enforce_cache_coherency;
401 	/* protect iopf_enabled counter */
402 	struct mutex iopf_lock;
403 	unsigned int iopf_enabled;
404 };
405 
406 static inline struct iommufd_device *
407 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
408 {
409 	return container_of(iommufd_get_object(ucmd->ictx, id,
410 					       IOMMUFD_OBJ_DEVICE),
411 			    struct iommufd_device, obj);
412 }
413 
414 void iommufd_device_destroy(struct iommufd_object *obj);
415 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
416 
417 struct iommufd_access {
418 	struct iommufd_object obj;
419 	struct iommufd_ctx *ictx;
420 	struct iommufd_ioas *ioas;
421 	struct iommufd_ioas *ioas_unpin;
422 	struct mutex ioas_lock;
423 	const struct iommufd_access_ops *ops;
424 	void *data;
425 	unsigned long iova_alignment;
426 	u32 iopt_access_list_id;
427 };
428 
429 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
430 void iopt_remove_access(struct io_pagetable *iopt,
431 			struct iommufd_access *access,
432 			u32 iopt_access_list_id);
433 void iommufd_access_destroy_object(struct iommufd_object *obj);
434 
435 /*
436  * An iommufd_fault object represents an interface to deliver I/O page faults
437  * to the user space. These objects are created/destroyed by the user space and
438  * associated with hardware page table objects during page-table allocation.
439  */
440 struct iommufd_fault {
441 	struct iommufd_object obj;
442 	struct iommufd_ctx *ictx;
443 	struct file *filep;
444 
445 	/* The lists of outstanding faults protected by below mutex. */
446 	struct mutex mutex;
447 	struct list_head deliver;
448 	struct xarray response;
449 
450 	struct wait_queue_head wait_queue;
451 };
452 
453 struct iommufd_attach_handle {
454 	struct iommu_attach_handle handle;
455 	struct iommufd_device *idev;
456 };
457 
458 /* Convert an iommu attach handle to iommufd handle. */
459 #define to_iommufd_handle(hdl)	container_of(hdl, struct iommufd_attach_handle, handle)
460 
461 static inline struct iommufd_fault *
462 iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
463 {
464 	return container_of(iommufd_get_object(ucmd->ictx, id,
465 					       IOMMUFD_OBJ_FAULT),
466 			    struct iommufd_fault, obj);
467 }
468 
469 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
470 void iommufd_fault_destroy(struct iommufd_object *obj);
471 int iommufd_fault_iopf_handler(struct iopf_group *group);
472 
473 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
474 				    struct iommufd_device *idev);
475 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
476 				     struct iommufd_device *idev);
477 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
478 				     struct iommufd_hw_pagetable *hwpt,
479 				     struct iommufd_hw_pagetable *old);
480 
481 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
482 					     struct iommufd_device *idev)
483 {
484 	if (hwpt->fault)
485 		return iommufd_fault_domain_attach_dev(hwpt, idev);
486 
487 	return iommu_attach_group(hwpt->domain, idev->igroup->group);
488 }
489 
490 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
491 					      struct iommufd_device *idev)
492 {
493 	if (hwpt->fault)
494 		iommufd_fault_domain_detach_dev(hwpt, idev);
495 
496 	iommu_detach_group(hwpt->domain, idev->igroup->group);
497 }
498 
499 static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
500 					      struct iommufd_hw_pagetable *hwpt,
501 					      struct iommufd_hw_pagetable *old)
502 {
503 	if (old->fault || hwpt->fault)
504 		return iommufd_fault_domain_replace_dev(idev, hwpt, old);
505 
506 	return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
507 }
508 
509 #ifdef CONFIG_IOMMUFD_TEST
510 int iommufd_test(struct iommufd_ucmd *ucmd);
511 void iommufd_selftest_destroy(struct iommufd_object *obj);
512 extern size_t iommufd_test_memory_limit;
513 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
514 				   unsigned int ioas_id, u64 *iova, u32 *flags);
515 bool iommufd_should_fail(void);
516 int __init iommufd_test_init(void);
517 void iommufd_test_exit(void);
518 bool iommufd_selftest_is_mock_dev(struct device *dev);
519 #else
520 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
521 						 unsigned int ioas_id,
522 						 u64 *iova, u32 *flags)
523 {
524 }
525 static inline bool iommufd_should_fail(void)
526 {
527 	return false;
528 }
529 static inline int __init iommufd_test_init(void)
530 {
531 	return 0;
532 }
533 static inline void iommufd_test_exit(void)
534 {
535 }
536 static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
537 {
538 	return false;
539 }
540 #endif
541 #endif
542