1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #ifndef __IOMMUFD_PRIVATE_H 5 #define __IOMMUFD_PRIVATE_H 6 7 #include <linux/rwsem.h> 8 #include <linux/xarray.h> 9 #include <linux/refcount.h> 10 #include <linux/uaccess.h> 11 #include <linux/iommu.h> 12 #include <linux/iova_bitmap.h> 13 #include <uapi/linux/iommufd.h> 14 #include "../iommu-priv.h" 15 16 struct iommu_domain; 17 struct iommu_group; 18 struct iommu_option; 19 struct iommufd_device; 20 21 struct iommufd_ctx { 22 struct file *file; 23 struct xarray objects; 24 struct xarray groups; 25 wait_queue_head_t destroy_wait; 26 27 u8 account_mode; 28 /* Compatibility with VFIO no iommu */ 29 u8 no_iommu_mode; 30 struct iommufd_ioas *vfio_ioas; 31 }; 32 33 /* 34 * The IOVA to PFN map. The map automatically copies the PFNs into multiple 35 * domains and permits sharing of PFNs between io_pagetable instances. This 36 * supports both a design where IOAS's are 1:1 with a domain (eg because the 37 * domain is HW customized), or where the IOAS is 1:N with multiple generic 38 * domains. The io_pagetable holds an interval tree of iopt_areas which point 39 * to shared iopt_pages which hold the pfns mapped to the page table. 40 * 41 * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex 42 */ 43 struct io_pagetable { 44 struct rw_semaphore domains_rwsem; 45 struct xarray domains; 46 struct xarray access_list; 47 unsigned int next_domain_id; 48 49 struct rw_semaphore iova_rwsem; 50 struct rb_root_cached area_itree; 51 /* IOVA that cannot become reserved, struct iopt_allowed */ 52 struct rb_root_cached allowed_itree; 53 /* IOVA that cannot be allocated, struct iopt_reserved */ 54 struct rb_root_cached reserved_itree; 55 u8 disable_large_pages; 56 unsigned long iova_alignment; 57 }; 58 59 void iopt_init_table(struct io_pagetable *iopt); 60 void iopt_destroy_table(struct io_pagetable *iopt); 61 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 62 unsigned long length, struct list_head *pages_list); 63 void iopt_free_pages_list(struct list_head *pages_list); 64 enum { 65 IOPT_ALLOC_IOVA = 1 << 0, 66 }; 67 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 68 unsigned long *iova, void __user *uptr, 69 unsigned long length, int iommu_prot, 70 unsigned int flags); 71 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 72 unsigned long length, unsigned long *dst_iova, 73 int iommu_prot, unsigned int flags); 74 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 75 unsigned long length, unsigned long *unmapped); 76 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); 77 78 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 79 struct iommu_domain *domain, 80 unsigned long flags, 81 struct iommu_hwpt_get_dirty_bitmap *bitmap); 82 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 83 struct iommu_domain *domain, bool enable); 84 85 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 86 unsigned long length); 87 int iopt_table_add_domain(struct io_pagetable *iopt, 88 struct iommu_domain *domain); 89 void iopt_table_remove_domain(struct io_pagetable *iopt, 90 struct iommu_domain *domain); 91 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 92 struct device *dev, 93 phys_addr_t *sw_msi_start); 94 int iopt_set_allow_iova(struct io_pagetable *iopt, 95 struct rb_root_cached *allowed_iova); 96 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 97 unsigned long last, void *owner); 98 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); 99 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 100 size_t num_iovas); 101 void iopt_enable_large_pages(struct io_pagetable *iopt); 102 int iopt_disable_large_pages(struct io_pagetable *iopt); 103 104 struct iommufd_ucmd { 105 struct iommufd_ctx *ictx; 106 void __user *ubuffer; 107 u32 user_size; 108 void *cmd; 109 }; 110 111 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, 112 unsigned long arg); 113 114 /* Copy the response in ucmd->cmd back to userspace. */ 115 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, 116 size_t cmd_len) 117 { 118 if (copy_to_user(ucmd->ubuffer, ucmd->cmd, 119 min_t(size_t, ucmd->user_size, cmd_len))) 120 return -EFAULT; 121 return 0; 122 } 123 124 enum iommufd_object_type { 125 IOMMUFD_OBJ_NONE, 126 IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, 127 IOMMUFD_OBJ_DEVICE, 128 IOMMUFD_OBJ_HWPT_PAGING, 129 IOMMUFD_OBJ_HWPT_NESTED, 130 IOMMUFD_OBJ_IOAS, 131 IOMMUFD_OBJ_ACCESS, 132 IOMMUFD_OBJ_FAULT, 133 #ifdef CONFIG_IOMMUFD_TEST 134 IOMMUFD_OBJ_SELFTEST, 135 #endif 136 IOMMUFD_OBJ_MAX, 137 }; 138 139 /* Base struct for all objects with a userspace ID handle. */ 140 struct iommufd_object { 141 refcount_t shortterm_users; 142 refcount_t users; 143 enum iommufd_object_type type; 144 unsigned int id; 145 }; 146 147 static inline bool iommufd_lock_obj(struct iommufd_object *obj) 148 { 149 if (!refcount_inc_not_zero(&obj->users)) 150 return false; 151 if (!refcount_inc_not_zero(&obj->shortterm_users)) { 152 /* 153 * If the caller doesn't already have a ref on obj this must be 154 * called under the xa_lock. Otherwise the caller is holding a 155 * ref on users. Thus it cannot be one before this decrement. 156 */ 157 refcount_dec(&obj->users); 158 return false; 159 } 160 return true; 161 } 162 163 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, 164 enum iommufd_object_type type); 165 static inline void iommufd_put_object(struct iommufd_ctx *ictx, 166 struct iommufd_object *obj) 167 { 168 /* 169 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees 170 * a spurious !0 users with a 0 shortterm_users. 171 */ 172 refcount_dec(&obj->users); 173 if (refcount_dec_and_test(&obj->shortterm_users)) 174 wake_up_interruptible_all(&ictx->destroy_wait); 175 } 176 177 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); 178 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 179 struct iommufd_object *obj); 180 void iommufd_object_finalize(struct iommufd_ctx *ictx, 181 struct iommufd_object *obj); 182 183 enum { 184 REMOVE_WAIT_SHORTTERM = 1, 185 }; 186 int iommufd_object_remove(struct iommufd_ctx *ictx, 187 struct iommufd_object *to_destroy, u32 id, 188 unsigned int flags); 189 190 /* 191 * The caller holds a users refcount and wants to destroy the object. At this 192 * point the caller has no shortterm_users reference and at least the xarray 193 * will be holding one. 194 */ 195 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, 196 struct iommufd_object *obj) 197 { 198 int ret; 199 200 ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); 201 202 /* 203 * If there is a bug and we couldn't destroy the object then we did put 204 * back the caller's users refcount and will eventually try to free it 205 * again during close. 206 */ 207 WARN_ON(ret); 208 } 209 210 /* 211 * The HWPT allocated by autodomains is used in possibly many devices and 212 * is automatically destroyed when its refcount reaches zero. 213 * 214 * If userspace uses the HWPT manually, even for a short term, then it will 215 * disrupt this refcounting and the auto-free in the kernel will not work. 216 * Userspace that tries to use the automatically allocated HWPT must be careful 217 * to ensure that it is consistently destroyed, eg by not racing accesses 218 * and by not attaching an automatic HWPT to a device manually. 219 */ 220 static inline void 221 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, 222 struct iommufd_object *obj) 223 { 224 iommufd_object_remove(ictx, obj, obj->id, 0); 225 } 226 227 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, 228 size_t size, 229 enum iommufd_object_type type); 230 231 #define __iommufd_object_alloc(ictx, ptr, type, obj) \ 232 container_of(_iommufd_object_alloc( \ 233 ictx, \ 234 sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ 235 offsetof(typeof(*(ptr)), \ 236 obj) != 0), \ 237 type), \ 238 typeof(*(ptr)), obj) 239 240 #define iommufd_object_alloc(ictx, ptr, type) \ 241 __iommufd_object_alloc(ictx, ptr, type, obj) 242 243 /* 244 * The IO Address Space (IOAS) pagetable is a virtual page table backed by the 245 * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The 246 * mapping is copied into all of the associated domains and made available to 247 * in-kernel users. 248 * 249 * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable 250 * object. When we go to attach a device to an IOAS we need to get an 251 * iommu_domain and wrapping iommufd_hw_pagetable for it. 252 * 253 * An iommu_domain & iommfd_hw_pagetable will be automatically selected 254 * for a device based on the hwpt_list. If no suitable iommu_domain 255 * is found a new iommu_domain will be created. 256 */ 257 struct iommufd_ioas { 258 struct iommufd_object obj; 259 struct io_pagetable iopt; 260 struct mutex mutex; 261 struct list_head hwpt_list; 262 }; 263 264 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, 265 u32 id) 266 { 267 return container_of(iommufd_get_object(ictx, id, 268 IOMMUFD_OBJ_IOAS), 269 struct iommufd_ioas, obj); 270 } 271 272 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); 273 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); 274 void iommufd_ioas_destroy(struct iommufd_object *obj); 275 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); 276 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); 277 int iommufd_ioas_map(struct iommufd_ucmd *ucmd); 278 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); 279 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); 280 int iommufd_ioas_option(struct iommufd_ucmd *ucmd); 281 int iommufd_option_rlimit_mode(struct iommu_option *cmd, 282 struct iommufd_ctx *ictx); 283 284 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); 285 int iommufd_check_iova_range(struct io_pagetable *iopt, 286 struct iommu_hwpt_get_dirty_bitmap *bitmap); 287 288 /* 289 * A HW pagetable is called an iommu_domain inside the kernel. This user object 290 * allows directly creating and inspecting the domains. Domains that have kernel 291 * owned page tables will be associated with an iommufd_ioas that provides the 292 * IOVA to PFN map. 293 */ 294 struct iommufd_hw_pagetable { 295 struct iommufd_object obj; 296 struct iommu_domain *domain; 297 struct iommufd_fault *fault; 298 }; 299 300 struct iommufd_hwpt_paging { 301 struct iommufd_hw_pagetable common; 302 struct iommufd_ioas *ioas; 303 bool auto_domain : 1; 304 bool enforce_cache_coherency : 1; 305 bool msi_cookie : 1; 306 bool nest_parent : 1; 307 /* Head at iommufd_ioas::hwpt_list */ 308 struct list_head hwpt_item; 309 }; 310 311 struct iommufd_hwpt_nested { 312 struct iommufd_hw_pagetable common; 313 struct iommufd_hwpt_paging *parent; 314 }; 315 316 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) 317 { 318 return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; 319 } 320 321 static inline struct iommufd_hwpt_paging * 322 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) 323 { 324 return container_of(hwpt, struct iommufd_hwpt_paging, common); 325 } 326 327 static inline struct iommufd_hwpt_paging * 328 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) 329 { 330 return container_of(iommufd_get_object(ucmd->ictx, id, 331 IOMMUFD_OBJ_HWPT_PAGING), 332 struct iommufd_hwpt_paging, common.obj); 333 } 334 335 static inline struct iommufd_hw_pagetable * 336 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) 337 { 338 return container_of(iommufd_get_object(ucmd->ictx, id, 339 IOMMUFD_OBJ_HWPT_NESTED), 340 struct iommufd_hw_pagetable, obj); 341 } 342 343 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); 344 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); 345 346 struct iommufd_hwpt_paging * 347 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, 348 struct iommufd_device *idev, u32 flags, 349 bool immediate_attach, 350 const struct iommu_user_data *user_data); 351 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 352 struct iommufd_device *idev); 353 struct iommufd_hw_pagetable * 354 iommufd_hw_pagetable_detach(struct iommufd_device *idev); 355 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); 356 void iommufd_hwpt_paging_abort(struct iommufd_object *obj); 357 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); 358 void iommufd_hwpt_nested_abort(struct iommufd_object *obj); 359 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); 360 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); 361 362 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, 363 struct iommufd_hw_pagetable *hwpt) 364 { 365 if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { 366 struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); 367 368 lockdep_assert_not_held(&hwpt_paging->ioas->mutex); 369 370 if (hwpt_paging->auto_domain) { 371 iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); 372 return; 373 } 374 } 375 refcount_dec(&hwpt->obj.users); 376 } 377 378 struct iommufd_group { 379 struct kref ref; 380 struct mutex lock; 381 struct iommufd_ctx *ictx; 382 struct iommu_group *group; 383 struct iommufd_hw_pagetable *hwpt; 384 struct list_head device_list; 385 phys_addr_t sw_msi_start; 386 }; 387 388 /* 389 * A iommufd_device object represents the binding relationship between a 390 * consuming driver and the iommufd. These objects are created/destroyed by 391 * external drivers, not by userspace. 392 */ 393 struct iommufd_device { 394 struct iommufd_object obj; 395 struct iommufd_ctx *ictx; 396 struct iommufd_group *igroup; 397 struct list_head group_item; 398 /* always the physical device */ 399 struct device *dev; 400 bool enforce_cache_coherency; 401 /* protect iopf_enabled counter */ 402 struct mutex iopf_lock; 403 unsigned int iopf_enabled; 404 }; 405 406 static inline struct iommufd_device * 407 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) 408 { 409 return container_of(iommufd_get_object(ucmd->ictx, id, 410 IOMMUFD_OBJ_DEVICE), 411 struct iommufd_device, obj); 412 } 413 414 void iommufd_device_destroy(struct iommufd_object *obj); 415 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); 416 417 struct iommufd_access { 418 struct iommufd_object obj; 419 struct iommufd_ctx *ictx; 420 struct iommufd_ioas *ioas; 421 struct iommufd_ioas *ioas_unpin; 422 struct mutex ioas_lock; 423 const struct iommufd_access_ops *ops; 424 void *data; 425 unsigned long iova_alignment; 426 u32 iopt_access_list_id; 427 }; 428 429 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); 430 void iopt_remove_access(struct io_pagetable *iopt, 431 struct iommufd_access *access, 432 u32 iopt_access_list_id); 433 void iommufd_access_destroy_object(struct iommufd_object *obj); 434 435 /* 436 * An iommufd_fault object represents an interface to deliver I/O page faults 437 * to the user space. These objects are created/destroyed by the user space and 438 * associated with hardware page table objects during page-table allocation. 439 */ 440 struct iommufd_fault { 441 struct iommufd_object obj; 442 struct iommufd_ctx *ictx; 443 struct file *filep; 444 445 /* The lists of outstanding faults protected by below mutex. */ 446 struct mutex mutex; 447 struct list_head deliver; 448 struct xarray response; 449 450 struct wait_queue_head wait_queue; 451 }; 452 453 struct iommufd_attach_handle { 454 struct iommu_attach_handle handle; 455 struct iommufd_device *idev; 456 }; 457 458 /* Convert an iommu attach handle to iommufd handle. */ 459 #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) 460 461 static inline struct iommufd_fault * 462 iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) 463 { 464 return container_of(iommufd_get_object(ucmd->ictx, id, 465 IOMMUFD_OBJ_FAULT), 466 struct iommufd_fault, obj); 467 } 468 469 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); 470 void iommufd_fault_destroy(struct iommufd_object *obj); 471 int iommufd_fault_iopf_handler(struct iopf_group *group); 472 473 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, 474 struct iommufd_device *idev); 475 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, 476 struct iommufd_device *idev); 477 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, 478 struct iommufd_hw_pagetable *hwpt, 479 struct iommufd_hw_pagetable *old); 480 481 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, 482 struct iommufd_device *idev) 483 { 484 if (hwpt->fault) 485 return iommufd_fault_domain_attach_dev(hwpt, idev); 486 487 return iommu_attach_group(hwpt->domain, idev->igroup->group); 488 } 489 490 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, 491 struct iommufd_device *idev) 492 { 493 if (hwpt->fault) 494 iommufd_fault_domain_detach_dev(hwpt, idev); 495 496 iommu_detach_group(hwpt->domain, idev->igroup->group); 497 } 498 499 static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, 500 struct iommufd_hw_pagetable *hwpt, 501 struct iommufd_hw_pagetable *old) 502 { 503 if (old->fault || hwpt->fault) 504 return iommufd_fault_domain_replace_dev(idev, hwpt, old); 505 506 return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); 507 } 508 509 #ifdef CONFIG_IOMMUFD_TEST 510 int iommufd_test(struct iommufd_ucmd *ucmd); 511 void iommufd_selftest_destroy(struct iommufd_object *obj); 512 extern size_t iommufd_test_memory_limit; 513 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 514 unsigned int ioas_id, u64 *iova, u32 *flags); 515 bool iommufd_should_fail(void); 516 int __init iommufd_test_init(void); 517 void iommufd_test_exit(void); 518 bool iommufd_selftest_is_mock_dev(struct device *dev); 519 #else 520 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 521 unsigned int ioas_id, 522 u64 *iova, u32 *flags) 523 { 524 } 525 static inline bool iommufd_should_fail(void) 526 { 527 return false; 528 } 529 static inline int __init iommufd_test_init(void) 530 { 531 return 0; 532 } 533 static inline void iommufd_test_exit(void) 534 { 535 } 536 static inline bool iommufd_selftest_is_mock_dev(struct device *dev) 537 { 538 return false; 539 } 540 #endif 541 #endif 542