1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #ifndef __IOMMUFD_PRIVATE_H 5 #define __IOMMUFD_PRIVATE_H 6 7 #include <linux/rwsem.h> 8 #include <linux/xarray.h> 9 #include <linux/refcount.h> 10 #include <linux/uaccess.h> 11 #include <linux/iommu.h> 12 #include <linux/iova_bitmap.h> 13 #include <uapi/linux/iommufd.h> 14 15 struct iommu_domain; 16 struct iommu_group; 17 struct iommu_option; 18 struct iommufd_device; 19 20 struct iommufd_ctx { 21 struct file *file; 22 struct xarray objects; 23 struct xarray groups; 24 wait_queue_head_t destroy_wait; 25 26 u8 account_mode; 27 /* Compatibility with VFIO no iommu */ 28 u8 no_iommu_mode; 29 struct iommufd_ioas *vfio_ioas; 30 }; 31 32 /* 33 * The IOVA to PFN map. The map automatically copies the PFNs into multiple 34 * domains and permits sharing of PFNs between io_pagetable instances. This 35 * supports both a design where IOAS's are 1:1 with a domain (eg because the 36 * domain is HW customized), or where the IOAS is 1:N with multiple generic 37 * domains. The io_pagetable holds an interval tree of iopt_areas which point 38 * to shared iopt_pages which hold the pfns mapped to the page table. 39 * 40 * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex 41 */ 42 struct io_pagetable { 43 struct rw_semaphore domains_rwsem; 44 struct xarray domains; 45 struct xarray access_list; 46 unsigned int next_domain_id; 47 48 struct rw_semaphore iova_rwsem; 49 struct rb_root_cached area_itree; 50 /* IOVA that cannot become reserved, struct iopt_allowed */ 51 struct rb_root_cached allowed_itree; 52 /* IOVA that cannot be allocated, struct iopt_reserved */ 53 struct rb_root_cached reserved_itree; 54 u8 disable_large_pages; 55 unsigned long iova_alignment; 56 }; 57 58 void iopt_init_table(struct io_pagetable *iopt); 59 void iopt_destroy_table(struct io_pagetable *iopt); 60 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 61 unsigned long length, struct list_head *pages_list); 62 void iopt_free_pages_list(struct list_head *pages_list); 63 enum { 64 IOPT_ALLOC_IOVA = 1 << 0, 65 }; 66 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 67 unsigned long *iova, void __user *uptr, 68 unsigned long length, int iommu_prot, 69 unsigned int flags); 70 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 71 unsigned long length, unsigned long *dst_iova, 72 int iommu_prot, unsigned int flags); 73 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 74 unsigned long length, unsigned long *unmapped); 75 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); 76 77 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 78 struct iommu_domain *domain, 79 unsigned long flags, 80 struct iommu_hwpt_get_dirty_bitmap *bitmap); 81 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 82 struct iommu_domain *domain, bool enable); 83 84 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 85 unsigned long length); 86 int iopt_table_add_domain(struct io_pagetable *iopt, 87 struct iommu_domain *domain); 88 void iopt_table_remove_domain(struct io_pagetable *iopt, 89 struct iommu_domain *domain); 90 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 91 struct device *dev, 92 phys_addr_t *sw_msi_start); 93 int iopt_set_allow_iova(struct io_pagetable *iopt, 94 struct rb_root_cached *allowed_iova); 95 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 96 unsigned long last, void *owner); 97 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); 98 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 99 size_t num_iovas); 100 void iopt_enable_large_pages(struct io_pagetable *iopt); 101 int iopt_disable_large_pages(struct io_pagetable *iopt); 102 103 struct iommufd_ucmd { 104 struct iommufd_ctx *ictx; 105 void __user *ubuffer; 106 u32 user_size; 107 void *cmd; 108 }; 109 110 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, 111 unsigned long arg); 112 113 /* Copy the response in ucmd->cmd back to userspace. */ 114 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, 115 size_t cmd_len) 116 { 117 if (copy_to_user(ucmd->ubuffer, ucmd->cmd, 118 min_t(size_t, ucmd->user_size, cmd_len))) 119 return -EFAULT; 120 return 0; 121 } 122 123 enum iommufd_object_type { 124 IOMMUFD_OBJ_NONE, 125 IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, 126 IOMMUFD_OBJ_DEVICE, 127 IOMMUFD_OBJ_HWPT_PAGING, 128 IOMMUFD_OBJ_HWPT_NESTED, 129 IOMMUFD_OBJ_IOAS, 130 IOMMUFD_OBJ_ACCESS, 131 #ifdef CONFIG_IOMMUFD_TEST 132 IOMMUFD_OBJ_SELFTEST, 133 #endif 134 IOMMUFD_OBJ_MAX, 135 }; 136 137 /* Base struct for all objects with a userspace ID handle. */ 138 struct iommufd_object { 139 refcount_t shortterm_users; 140 refcount_t users; 141 enum iommufd_object_type type; 142 unsigned int id; 143 }; 144 145 static inline bool iommufd_lock_obj(struct iommufd_object *obj) 146 { 147 if (!refcount_inc_not_zero(&obj->users)) 148 return false; 149 if (!refcount_inc_not_zero(&obj->shortterm_users)) { 150 /* 151 * If the caller doesn't already have a ref on obj this must be 152 * called under the xa_lock. Otherwise the caller is holding a 153 * ref on users. Thus it cannot be one before this decrement. 154 */ 155 refcount_dec(&obj->users); 156 return false; 157 } 158 return true; 159 } 160 161 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, 162 enum iommufd_object_type type); 163 static inline void iommufd_put_object(struct iommufd_ctx *ictx, 164 struct iommufd_object *obj) 165 { 166 /* 167 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees 168 * a spurious !0 users with a 0 shortterm_users. 169 */ 170 refcount_dec(&obj->users); 171 if (refcount_dec_and_test(&obj->shortterm_users)) 172 wake_up_interruptible_all(&ictx->destroy_wait); 173 } 174 175 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); 176 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 177 struct iommufd_object *obj); 178 void iommufd_object_finalize(struct iommufd_ctx *ictx, 179 struct iommufd_object *obj); 180 181 enum { 182 REMOVE_WAIT_SHORTTERM = 1, 183 }; 184 int iommufd_object_remove(struct iommufd_ctx *ictx, 185 struct iommufd_object *to_destroy, u32 id, 186 unsigned int flags); 187 188 /* 189 * The caller holds a users refcount and wants to destroy the object. At this 190 * point the caller has no shortterm_users reference and at least the xarray 191 * will be holding one. 192 */ 193 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, 194 struct iommufd_object *obj) 195 { 196 int ret; 197 198 ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); 199 200 /* 201 * If there is a bug and we couldn't destroy the object then we did put 202 * back the caller's users refcount and will eventually try to free it 203 * again during close. 204 */ 205 WARN_ON(ret); 206 } 207 208 /* 209 * The HWPT allocated by autodomains is used in possibly many devices and 210 * is automatically destroyed when its refcount reaches zero. 211 * 212 * If userspace uses the HWPT manually, even for a short term, then it will 213 * disrupt this refcounting and the auto-free in the kernel will not work. 214 * Userspace that tries to use the automatically allocated HWPT must be careful 215 * to ensure that it is consistently destroyed, eg by not racing accesses 216 * and by not attaching an automatic HWPT to a device manually. 217 */ 218 static inline void 219 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, 220 struct iommufd_object *obj) 221 { 222 iommufd_object_remove(ictx, obj, obj->id, 0); 223 } 224 225 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, 226 size_t size, 227 enum iommufd_object_type type); 228 229 #define __iommufd_object_alloc(ictx, ptr, type, obj) \ 230 container_of(_iommufd_object_alloc( \ 231 ictx, \ 232 sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ 233 offsetof(typeof(*(ptr)), \ 234 obj) != 0), \ 235 type), \ 236 typeof(*(ptr)), obj) 237 238 #define iommufd_object_alloc(ictx, ptr, type) \ 239 __iommufd_object_alloc(ictx, ptr, type, obj) 240 241 /* 242 * The IO Address Space (IOAS) pagetable is a virtual page table backed by the 243 * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The 244 * mapping is copied into all of the associated domains and made available to 245 * in-kernel users. 246 * 247 * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable 248 * object. When we go to attach a device to an IOAS we need to get an 249 * iommu_domain and wrapping iommufd_hw_pagetable for it. 250 * 251 * An iommu_domain & iommfd_hw_pagetable will be automatically selected 252 * for a device based on the hwpt_list. If no suitable iommu_domain 253 * is found a new iommu_domain will be created. 254 */ 255 struct iommufd_ioas { 256 struct iommufd_object obj; 257 struct io_pagetable iopt; 258 struct mutex mutex; 259 struct list_head hwpt_list; 260 }; 261 262 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, 263 u32 id) 264 { 265 return container_of(iommufd_get_object(ictx, id, 266 IOMMUFD_OBJ_IOAS), 267 struct iommufd_ioas, obj); 268 } 269 270 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); 271 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); 272 void iommufd_ioas_destroy(struct iommufd_object *obj); 273 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); 274 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); 275 int iommufd_ioas_map(struct iommufd_ucmd *ucmd); 276 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); 277 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); 278 int iommufd_ioas_option(struct iommufd_ucmd *ucmd); 279 int iommufd_option_rlimit_mode(struct iommu_option *cmd, 280 struct iommufd_ctx *ictx); 281 282 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); 283 int iommufd_check_iova_range(struct io_pagetable *iopt, 284 struct iommu_hwpt_get_dirty_bitmap *bitmap); 285 286 /* 287 * A HW pagetable is called an iommu_domain inside the kernel. This user object 288 * allows directly creating and inspecting the domains. Domains that have kernel 289 * owned page tables will be associated with an iommufd_ioas that provides the 290 * IOVA to PFN map. 291 */ 292 struct iommufd_hw_pagetable { 293 struct iommufd_object obj; 294 struct iommu_domain *domain; 295 }; 296 297 struct iommufd_hwpt_paging { 298 struct iommufd_hw_pagetable common; 299 struct iommufd_ioas *ioas; 300 bool auto_domain : 1; 301 bool enforce_cache_coherency : 1; 302 bool msi_cookie : 1; 303 bool nest_parent : 1; 304 /* Head at iommufd_ioas::hwpt_list */ 305 struct list_head hwpt_item; 306 }; 307 308 struct iommufd_hwpt_nested { 309 struct iommufd_hw_pagetable common; 310 struct iommufd_hwpt_paging *parent; 311 }; 312 313 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) 314 { 315 return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; 316 } 317 318 static inline struct iommufd_hwpt_paging * 319 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) 320 { 321 return container_of(hwpt, struct iommufd_hwpt_paging, common); 322 } 323 324 static inline struct iommufd_hwpt_paging * 325 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) 326 { 327 return container_of(iommufd_get_object(ucmd->ictx, id, 328 IOMMUFD_OBJ_HWPT_PAGING), 329 struct iommufd_hwpt_paging, common.obj); 330 } 331 332 static inline struct iommufd_hw_pagetable * 333 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) 334 { 335 return container_of(iommufd_get_object(ucmd->ictx, id, 336 IOMMUFD_OBJ_HWPT_NESTED), 337 struct iommufd_hw_pagetable, obj); 338 } 339 340 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); 341 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); 342 343 struct iommufd_hwpt_paging * 344 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, 345 struct iommufd_device *idev, u32 flags, 346 bool immediate_attach, 347 const struct iommu_user_data *user_data); 348 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 349 struct iommufd_device *idev); 350 struct iommufd_hw_pagetable * 351 iommufd_hw_pagetable_detach(struct iommufd_device *idev); 352 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); 353 void iommufd_hwpt_paging_abort(struct iommufd_object *obj); 354 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); 355 void iommufd_hwpt_nested_abort(struct iommufd_object *obj); 356 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); 357 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); 358 359 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, 360 struct iommufd_hw_pagetable *hwpt) 361 { 362 if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { 363 struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); 364 365 lockdep_assert_not_held(&hwpt_paging->ioas->mutex); 366 367 if (hwpt_paging->auto_domain) { 368 iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); 369 return; 370 } 371 } 372 refcount_dec(&hwpt->obj.users); 373 } 374 375 struct iommufd_group { 376 struct kref ref; 377 struct mutex lock; 378 struct iommufd_ctx *ictx; 379 struct iommu_group *group; 380 struct iommufd_hw_pagetable *hwpt; 381 struct list_head device_list; 382 phys_addr_t sw_msi_start; 383 }; 384 385 /* 386 * A iommufd_device object represents the binding relationship between a 387 * consuming driver and the iommufd. These objects are created/destroyed by 388 * external drivers, not by userspace. 389 */ 390 struct iommufd_device { 391 struct iommufd_object obj; 392 struct iommufd_ctx *ictx; 393 struct iommufd_group *igroup; 394 struct list_head group_item; 395 /* always the physical device */ 396 struct device *dev; 397 bool enforce_cache_coherency; 398 }; 399 400 static inline struct iommufd_device * 401 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) 402 { 403 return container_of(iommufd_get_object(ucmd->ictx, id, 404 IOMMUFD_OBJ_DEVICE), 405 struct iommufd_device, obj); 406 } 407 408 void iommufd_device_destroy(struct iommufd_object *obj); 409 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); 410 411 struct iommufd_access { 412 struct iommufd_object obj; 413 struct iommufd_ctx *ictx; 414 struct iommufd_ioas *ioas; 415 struct iommufd_ioas *ioas_unpin; 416 struct mutex ioas_lock; 417 const struct iommufd_access_ops *ops; 418 void *data; 419 unsigned long iova_alignment; 420 u32 iopt_access_list_id; 421 }; 422 423 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); 424 void iopt_remove_access(struct io_pagetable *iopt, 425 struct iommufd_access *access, 426 u32 iopt_access_list_id); 427 void iommufd_access_destroy_object(struct iommufd_object *obj); 428 429 #ifdef CONFIG_IOMMUFD_TEST 430 int iommufd_test(struct iommufd_ucmd *ucmd); 431 void iommufd_selftest_destroy(struct iommufd_object *obj); 432 extern size_t iommufd_test_memory_limit; 433 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 434 unsigned int ioas_id, u64 *iova, u32 *flags); 435 bool iommufd_should_fail(void); 436 int __init iommufd_test_init(void); 437 void iommufd_test_exit(void); 438 bool iommufd_selftest_is_mock_dev(struct device *dev); 439 #else 440 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 441 unsigned int ioas_id, 442 u64 *iova, u32 *flags) 443 { 444 } 445 static inline bool iommufd_should_fail(void) 446 { 447 return false; 448 } 449 static inline int __init iommufd_test_init(void) 450 { 451 return 0; 452 } 453 static inline void iommufd_test_exit(void) 454 { 455 } 456 static inline bool iommufd_selftest_is_mock_dev(struct device *dev) 457 { 458 return false; 459 } 460 #endif 461 #endif 462