xref: /linux/drivers/iommu/iommufd/iommufd_private.h (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #ifndef __IOMMUFD_PRIVATE_H
5 #define __IOMMUFD_PRIVATE_H
6 
7 #include <linux/rwsem.h>
8 #include <linux/xarray.h>
9 #include <linux/refcount.h>
10 #include <linux/uaccess.h>
11 #include <linux/iommu.h>
12 #include <linux/iova_bitmap.h>
13 #include <uapi/linux/iommufd.h>
14 
15 struct iommu_domain;
16 struct iommu_group;
17 struct iommu_option;
18 struct iommufd_device;
19 
20 struct iommufd_ctx {
21 	struct file *file;
22 	struct xarray objects;
23 	struct xarray groups;
24 	wait_queue_head_t destroy_wait;
25 
26 	u8 account_mode;
27 	/* Compatibility with VFIO no iommu */
28 	u8 no_iommu_mode;
29 	struct iommufd_ioas *vfio_ioas;
30 };
31 
32 /*
33  * The IOVA to PFN map. The map automatically copies the PFNs into multiple
34  * domains and permits sharing of PFNs between io_pagetable instances. This
35  * supports both a design where IOAS's are 1:1 with a domain (eg because the
36  * domain is HW customized), or where the IOAS is 1:N with multiple generic
37  * domains.  The io_pagetable holds an interval tree of iopt_areas which point
38  * to shared iopt_pages which hold the pfns mapped to the page table.
39  *
40  * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
41  */
42 struct io_pagetable {
43 	struct rw_semaphore domains_rwsem;
44 	struct xarray domains;
45 	struct xarray access_list;
46 	unsigned int next_domain_id;
47 
48 	struct rw_semaphore iova_rwsem;
49 	struct rb_root_cached area_itree;
50 	/* IOVA that cannot become reserved, struct iopt_allowed */
51 	struct rb_root_cached allowed_itree;
52 	/* IOVA that cannot be allocated, struct iopt_reserved */
53 	struct rb_root_cached reserved_itree;
54 	u8 disable_large_pages;
55 	unsigned long iova_alignment;
56 };
57 
58 void iopt_init_table(struct io_pagetable *iopt);
59 void iopt_destroy_table(struct io_pagetable *iopt);
60 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
61 		   unsigned long length, struct list_head *pages_list);
62 void iopt_free_pages_list(struct list_head *pages_list);
63 enum {
64 	IOPT_ALLOC_IOVA = 1 << 0,
65 };
66 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
67 			unsigned long *iova, void __user *uptr,
68 			unsigned long length, int iommu_prot,
69 			unsigned int flags);
70 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
71 		   unsigned long length, unsigned long *dst_iova,
72 		   int iommu_prot, unsigned int flags);
73 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
74 		    unsigned long length, unsigned long *unmapped);
75 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
76 
77 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
78 				   struct iommu_domain *domain,
79 				   unsigned long flags,
80 				   struct iommu_hwpt_get_dirty_bitmap *bitmap);
81 int iopt_set_dirty_tracking(struct io_pagetable *iopt,
82 			    struct iommu_domain *domain, bool enable);
83 
84 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
85 				 unsigned long length);
86 int iopt_table_add_domain(struct io_pagetable *iopt,
87 			  struct iommu_domain *domain);
88 void iopt_table_remove_domain(struct io_pagetable *iopt,
89 			      struct iommu_domain *domain);
90 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
91 					struct device *dev,
92 					phys_addr_t *sw_msi_start);
93 int iopt_set_allow_iova(struct io_pagetable *iopt,
94 			struct rb_root_cached *allowed_iova);
95 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
96 		      unsigned long last, void *owner);
97 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
98 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
99 		  size_t num_iovas);
100 void iopt_enable_large_pages(struct io_pagetable *iopt);
101 int iopt_disable_large_pages(struct io_pagetable *iopt);
102 
103 struct iommufd_ucmd {
104 	struct iommufd_ctx *ictx;
105 	void __user *ubuffer;
106 	u32 user_size;
107 	void *cmd;
108 };
109 
110 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
111 		       unsigned long arg);
112 
113 /* Copy the response in ucmd->cmd back to userspace. */
114 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
115 				       size_t cmd_len)
116 {
117 	if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
118 			 min_t(size_t, ucmd->user_size, cmd_len)))
119 		return -EFAULT;
120 	return 0;
121 }
122 
123 enum iommufd_object_type {
124 	IOMMUFD_OBJ_NONE,
125 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
126 	IOMMUFD_OBJ_DEVICE,
127 	IOMMUFD_OBJ_HWPT_PAGING,
128 	IOMMUFD_OBJ_HWPT_NESTED,
129 	IOMMUFD_OBJ_IOAS,
130 	IOMMUFD_OBJ_ACCESS,
131 #ifdef CONFIG_IOMMUFD_TEST
132 	IOMMUFD_OBJ_SELFTEST,
133 #endif
134 	IOMMUFD_OBJ_MAX,
135 };
136 
137 /* Base struct for all objects with a userspace ID handle. */
138 struct iommufd_object {
139 	refcount_t shortterm_users;
140 	refcount_t users;
141 	enum iommufd_object_type type;
142 	unsigned int id;
143 };
144 
145 static inline bool iommufd_lock_obj(struct iommufd_object *obj)
146 {
147 	if (!refcount_inc_not_zero(&obj->users))
148 		return false;
149 	if (!refcount_inc_not_zero(&obj->shortterm_users)) {
150 		/*
151 		 * If the caller doesn't already have a ref on obj this must be
152 		 * called under the xa_lock. Otherwise the caller is holding a
153 		 * ref on users. Thus it cannot be one before this decrement.
154 		 */
155 		refcount_dec(&obj->users);
156 		return false;
157 	}
158 	return true;
159 }
160 
161 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
162 					  enum iommufd_object_type type);
163 static inline void iommufd_put_object(struct iommufd_ctx *ictx,
164 				      struct iommufd_object *obj)
165 {
166 	/*
167 	 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
168 	 * a spurious !0 users with a 0 shortterm_users.
169 	 */
170 	refcount_dec(&obj->users);
171 	if (refcount_dec_and_test(&obj->shortterm_users))
172 		wake_up_interruptible_all(&ictx->destroy_wait);
173 }
174 
175 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
176 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
177 				      struct iommufd_object *obj);
178 void iommufd_object_finalize(struct iommufd_ctx *ictx,
179 			     struct iommufd_object *obj);
180 
181 enum {
182 	REMOVE_WAIT_SHORTTERM = 1,
183 };
184 int iommufd_object_remove(struct iommufd_ctx *ictx,
185 			  struct iommufd_object *to_destroy, u32 id,
186 			  unsigned int flags);
187 
188 /*
189  * The caller holds a users refcount and wants to destroy the object. At this
190  * point the caller has no shortterm_users reference and at least the xarray
191  * will be holding one.
192  */
193 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
194 					       struct iommufd_object *obj)
195 {
196 	int ret;
197 
198 	ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
199 
200 	/*
201 	 * If there is a bug and we couldn't destroy the object then we did put
202 	 * back the caller's users refcount and will eventually try to free it
203 	 * again during close.
204 	 */
205 	WARN_ON(ret);
206 }
207 
208 /*
209  * The HWPT allocated by autodomains is used in possibly many devices and
210  * is automatically destroyed when its refcount reaches zero.
211  *
212  * If userspace uses the HWPT manually, even for a short term, then it will
213  * disrupt this refcounting and the auto-free in the kernel will not work.
214  * Userspace that tries to use the automatically allocated HWPT must be careful
215  * to ensure that it is consistently destroyed, eg by not racing accesses
216  * and by not attaching an automatic HWPT to a device manually.
217  */
218 static inline void
219 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
220 				   struct iommufd_object *obj)
221 {
222 	iommufd_object_remove(ictx, obj, obj->id, 0);
223 }
224 
225 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
226 					     size_t size,
227 					     enum iommufd_object_type type);
228 
229 #define __iommufd_object_alloc(ictx, ptr, type, obj)                           \
230 	container_of(_iommufd_object_alloc(                                    \
231 			     ictx,                                             \
232 			     sizeof(*(ptr)) + BUILD_BUG_ON_ZERO(               \
233 						      offsetof(typeof(*(ptr)), \
234 							       obj) != 0),     \
235 			     type),                                            \
236 		     typeof(*(ptr)), obj)
237 
238 #define iommufd_object_alloc(ictx, ptr, type) \
239 	__iommufd_object_alloc(ictx, ptr, type, obj)
240 
241 /*
242  * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
243  * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
244  * mapping is copied into all of the associated domains and made available to
245  * in-kernel users.
246  *
247  * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
248  * object. When we go to attach a device to an IOAS we need to get an
249  * iommu_domain and wrapping iommufd_hw_pagetable for it.
250  *
251  * An iommu_domain & iommfd_hw_pagetable will be automatically selected
252  * for a device based on the hwpt_list. If no suitable iommu_domain
253  * is found a new iommu_domain will be created.
254  */
255 struct iommufd_ioas {
256 	struct iommufd_object obj;
257 	struct io_pagetable iopt;
258 	struct mutex mutex;
259 	struct list_head hwpt_list;
260 };
261 
262 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
263 						    u32 id)
264 {
265 	return container_of(iommufd_get_object(ictx, id,
266 					       IOMMUFD_OBJ_IOAS),
267 			    struct iommufd_ioas, obj);
268 }
269 
270 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
271 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
272 void iommufd_ioas_destroy(struct iommufd_object *obj);
273 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
274 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
275 int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
276 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
277 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
278 int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
279 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
280 			       struct iommufd_ctx *ictx);
281 
282 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
283 int iommufd_check_iova_range(struct io_pagetable *iopt,
284 			     struct iommu_hwpt_get_dirty_bitmap *bitmap);
285 
286 /*
287  * A HW pagetable is called an iommu_domain inside the kernel. This user object
288  * allows directly creating and inspecting the domains. Domains that have kernel
289  * owned page tables will be associated with an iommufd_ioas that provides the
290  * IOVA to PFN map.
291  */
292 struct iommufd_hw_pagetable {
293 	struct iommufd_object obj;
294 	struct iommu_domain *domain;
295 };
296 
297 struct iommufd_hwpt_paging {
298 	struct iommufd_hw_pagetable common;
299 	struct iommufd_ioas *ioas;
300 	bool auto_domain : 1;
301 	bool enforce_cache_coherency : 1;
302 	bool msi_cookie : 1;
303 	bool nest_parent : 1;
304 	/* Head at iommufd_ioas::hwpt_list */
305 	struct list_head hwpt_item;
306 };
307 
308 struct iommufd_hwpt_nested {
309 	struct iommufd_hw_pagetable common;
310 	struct iommufd_hwpt_paging *parent;
311 };
312 
313 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
314 {
315 	return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING;
316 }
317 
318 static inline struct iommufd_hwpt_paging *
319 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
320 {
321 	return container_of(hwpt, struct iommufd_hwpt_paging, common);
322 }
323 
324 static inline struct iommufd_hwpt_paging *
325 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id)
326 {
327 	return container_of(iommufd_get_object(ucmd->ictx, id,
328 					       IOMMUFD_OBJ_HWPT_PAGING),
329 			    struct iommufd_hwpt_paging, common.obj);
330 }
331 
332 static inline struct iommufd_hw_pagetable *
333 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id)
334 {
335 	return container_of(iommufd_get_object(ucmd->ictx, id,
336 					       IOMMUFD_OBJ_HWPT_NESTED),
337 			    struct iommufd_hw_pagetable, obj);
338 }
339 
340 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
341 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
342 
343 struct iommufd_hwpt_paging *
344 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
345 			  struct iommufd_device *idev, u32 flags,
346 			  bool immediate_attach,
347 			  const struct iommu_user_data *user_data);
348 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
349 				struct iommufd_device *idev);
350 struct iommufd_hw_pagetable *
351 iommufd_hw_pagetable_detach(struct iommufd_device *idev);
352 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
353 void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
354 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
355 void iommufd_hwpt_nested_abort(struct iommufd_object *obj);
356 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd);
357 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd);
358 
359 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
360 					    struct iommufd_hw_pagetable *hwpt)
361 {
362 	if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) {
363 		struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
364 
365 		lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
366 
367 		if (hwpt_paging->auto_domain) {
368 			iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
369 			return;
370 		}
371 	}
372 	refcount_dec(&hwpt->obj.users);
373 }
374 
375 struct iommufd_group {
376 	struct kref ref;
377 	struct mutex lock;
378 	struct iommufd_ctx *ictx;
379 	struct iommu_group *group;
380 	struct iommufd_hw_pagetable *hwpt;
381 	struct list_head device_list;
382 	phys_addr_t sw_msi_start;
383 };
384 
385 /*
386  * A iommufd_device object represents the binding relationship between a
387  * consuming driver and the iommufd. These objects are created/destroyed by
388  * external drivers, not by userspace.
389  */
390 struct iommufd_device {
391 	struct iommufd_object obj;
392 	struct iommufd_ctx *ictx;
393 	struct iommufd_group *igroup;
394 	struct list_head group_item;
395 	/* always the physical device */
396 	struct device *dev;
397 	bool enforce_cache_coherency;
398 };
399 
400 static inline struct iommufd_device *
401 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
402 {
403 	return container_of(iommufd_get_object(ucmd->ictx, id,
404 					       IOMMUFD_OBJ_DEVICE),
405 			    struct iommufd_device, obj);
406 }
407 
408 void iommufd_device_destroy(struct iommufd_object *obj);
409 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
410 
411 struct iommufd_access {
412 	struct iommufd_object obj;
413 	struct iommufd_ctx *ictx;
414 	struct iommufd_ioas *ioas;
415 	struct iommufd_ioas *ioas_unpin;
416 	struct mutex ioas_lock;
417 	const struct iommufd_access_ops *ops;
418 	void *data;
419 	unsigned long iova_alignment;
420 	u32 iopt_access_list_id;
421 };
422 
423 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
424 void iopt_remove_access(struct io_pagetable *iopt,
425 			struct iommufd_access *access,
426 			u32 iopt_access_list_id);
427 void iommufd_access_destroy_object(struct iommufd_object *obj);
428 
429 #ifdef CONFIG_IOMMUFD_TEST
430 int iommufd_test(struct iommufd_ucmd *ucmd);
431 void iommufd_selftest_destroy(struct iommufd_object *obj);
432 extern size_t iommufd_test_memory_limit;
433 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
434 				   unsigned int ioas_id, u64 *iova, u32 *flags);
435 bool iommufd_should_fail(void);
436 int __init iommufd_test_init(void);
437 void iommufd_test_exit(void);
438 bool iommufd_selftest_is_mock_dev(struct device *dev);
439 #else
440 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
441 						 unsigned int ioas_id,
442 						 u64 *iova, u32 *flags)
443 {
444 }
445 static inline bool iommufd_should_fail(void)
446 {
447 	return false;
448 }
449 static inline int __init iommufd_test_init(void)
450 {
451 	return 0;
452 }
453 static inline void iommufd_test_exit(void)
454 {
455 }
456 static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
457 {
458 	return false;
459 }
460 #endif
461 #endif
462