1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #ifndef __IOMMUFD_PRIVATE_H 5 #define __IOMMUFD_PRIVATE_H 6 7 #include <linux/iommu.h> 8 #include <linux/iova_bitmap.h> 9 #include <linux/refcount.h> 10 #include <linux/rwsem.h> 11 #include <linux/uaccess.h> 12 #include <linux/xarray.h> 13 #include <uapi/linux/iommufd.h> 14 15 #include "../iommu-priv.h" 16 17 struct iommu_domain; 18 struct iommu_group; 19 struct iommu_option; 20 struct iommufd_device; 21 22 struct iommufd_ctx { 23 struct file *file; 24 struct xarray objects; 25 struct xarray groups; 26 wait_queue_head_t destroy_wait; 27 28 u8 account_mode; 29 /* Compatibility with VFIO no iommu */ 30 u8 no_iommu_mode; 31 struct iommufd_ioas *vfio_ioas; 32 }; 33 34 /* 35 * The IOVA to PFN map. The map automatically copies the PFNs into multiple 36 * domains and permits sharing of PFNs between io_pagetable instances. This 37 * supports both a design where IOAS's are 1:1 with a domain (eg because the 38 * domain is HW customized), or where the IOAS is 1:N with multiple generic 39 * domains. The io_pagetable holds an interval tree of iopt_areas which point 40 * to shared iopt_pages which hold the pfns mapped to the page table. 41 * 42 * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex 43 */ 44 struct io_pagetable { 45 struct rw_semaphore domains_rwsem; 46 struct xarray domains; 47 struct xarray access_list; 48 unsigned int next_domain_id; 49 50 struct rw_semaphore iova_rwsem; 51 struct rb_root_cached area_itree; 52 /* IOVA that cannot become reserved, struct iopt_allowed */ 53 struct rb_root_cached allowed_itree; 54 /* IOVA that cannot be allocated, struct iopt_reserved */ 55 struct rb_root_cached reserved_itree; 56 u8 disable_large_pages; 57 unsigned long iova_alignment; 58 }; 59 60 void iopt_init_table(struct io_pagetable *iopt); 61 void iopt_destroy_table(struct io_pagetable *iopt); 62 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 63 unsigned long length, struct list_head *pages_list); 64 void iopt_free_pages_list(struct list_head *pages_list); 65 enum { 66 IOPT_ALLOC_IOVA = 1 << 0, 67 }; 68 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 69 unsigned long *iova, void __user *uptr, 70 unsigned long length, int iommu_prot, 71 unsigned int flags); 72 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 73 unsigned long length, unsigned long *dst_iova, 74 int iommu_prot, unsigned int flags); 75 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 76 unsigned long length, unsigned long *unmapped); 77 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); 78 79 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 80 struct iommu_domain *domain, 81 unsigned long flags, 82 struct iommu_hwpt_get_dirty_bitmap *bitmap); 83 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 84 struct iommu_domain *domain, bool enable); 85 86 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 87 unsigned long length); 88 int iopt_table_add_domain(struct io_pagetable *iopt, 89 struct iommu_domain *domain); 90 void iopt_table_remove_domain(struct io_pagetable *iopt, 91 struct iommu_domain *domain); 92 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 93 struct device *dev, 94 phys_addr_t *sw_msi_start); 95 int iopt_set_allow_iova(struct io_pagetable *iopt, 96 struct rb_root_cached *allowed_iova); 97 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 98 unsigned long last, void *owner); 99 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); 100 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 101 size_t num_iovas); 102 void iopt_enable_large_pages(struct io_pagetable *iopt); 103 int iopt_disable_large_pages(struct io_pagetable *iopt); 104 105 struct iommufd_ucmd { 106 struct iommufd_ctx *ictx; 107 void __user *ubuffer; 108 u32 user_size; 109 void *cmd; 110 }; 111 112 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, 113 unsigned long arg); 114 115 /* Copy the response in ucmd->cmd back to userspace. */ 116 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, 117 size_t cmd_len) 118 { 119 if (copy_to_user(ucmd->ubuffer, ucmd->cmd, 120 min_t(size_t, ucmd->user_size, cmd_len))) 121 return -EFAULT; 122 return 0; 123 } 124 125 enum iommufd_object_type { 126 IOMMUFD_OBJ_NONE, 127 IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, 128 IOMMUFD_OBJ_DEVICE, 129 IOMMUFD_OBJ_HWPT_PAGING, 130 IOMMUFD_OBJ_HWPT_NESTED, 131 IOMMUFD_OBJ_IOAS, 132 IOMMUFD_OBJ_ACCESS, 133 IOMMUFD_OBJ_FAULT, 134 #ifdef CONFIG_IOMMUFD_TEST 135 IOMMUFD_OBJ_SELFTEST, 136 #endif 137 IOMMUFD_OBJ_MAX, 138 }; 139 140 /* Base struct for all objects with a userspace ID handle. */ 141 struct iommufd_object { 142 refcount_t shortterm_users; 143 refcount_t users; 144 enum iommufd_object_type type; 145 unsigned int id; 146 }; 147 148 static inline bool iommufd_lock_obj(struct iommufd_object *obj) 149 { 150 if (!refcount_inc_not_zero(&obj->users)) 151 return false; 152 if (!refcount_inc_not_zero(&obj->shortterm_users)) { 153 /* 154 * If the caller doesn't already have a ref on obj this must be 155 * called under the xa_lock. Otherwise the caller is holding a 156 * ref on users. Thus it cannot be one before this decrement. 157 */ 158 refcount_dec(&obj->users); 159 return false; 160 } 161 return true; 162 } 163 164 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, 165 enum iommufd_object_type type); 166 static inline void iommufd_put_object(struct iommufd_ctx *ictx, 167 struct iommufd_object *obj) 168 { 169 /* 170 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees 171 * a spurious !0 users with a 0 shortterm_users. 172 */ 173 refcount_dec(&obj->users); 174 if (refcount_dec_and_test(&obj->shortterm_users)) 175 wake_up_interruptible_all(&ictx->destroy_wait); 176 } 177 178 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); 179 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 180 struct iommufd_object *obj); 181 void iommufd_object_finalize(struct iommufd_ctx *ictx, 182 struct iommufd_object *obj); 183 184 enum { 185 REMOVE_WAIT_SHORTTERM = 1, 186 }; 187 int iommufd_object_remove(struct iommufd_ctx *ictx, 188 struct iommufd_object *to_destroy, u32 id, 189 unsigned int flags); 190 191 /* 192 * The caller holds a users refcount and wants to destroy the object. At this 193 * point the caller has no shortterm_users reference and at least the xarray 194 * will be holding one. 195 */ 196 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, 197 struct iommufd_object *obj) 198 { 199 int ret; 200 201 ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); 202 203 /* 204 * If there is a bug and we couldn't destroy the object then we did put 205 * back the caller's users refcount and will eventually try to free it 206 * again during close. 207 */ 208 WARN_ON(ret); 209 } 210 211 /* 212 * The HWPT allocated by autodomains is used in possibly many devices and 213 * is automatically destroyed when its refcount reaches zero. 214 * 215 * If userspace uses the HWPT manually, even for a short term, then it will 216 * disrupt this refcounting and the auto-free in the kernel will not work. 217 * Userspace that tries to use the automatically allocated HWPT must be careful 218 * to ensure that it is consistently destroyed, eg by not racing accesses 219 * and by not attaching an automatic HWPT to a device manually. 220 */ 221 static inline void 222 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, 223 struct iommufd_object *obj) 224 { 225 iommufd_object_remove(ictx, obj, obj->id, 0); 226 } 227 228 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, 229 size_t size, 230 enum iommufd_object_type type); 231 232 #define __iommufd_object_alloc(ictx, ptr, type, obj) \ 233 container_of(_iommufd_object_alloc( \ 234 ictx, \ 235 sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ 236 offsetof(typeof(*(ptr)), \ 237 obj) != 0), \ 238 type), \ 239 typeof(*(ptr)), obj) 240 241 #define iommufd_object_alloc(ictx, ptr, type) \ 242 __iommufd_object_alloc(ictx, ptr, type, obj) 243 244 /* 245 * The IO Address Space (IOAS) pagetable is a virtual page table backed by the 246 * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The 247 * mapping is copied into all of the associated domains and made available to 248 * in-kernel users. 249 * 250 * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable 251 * object. When we go to attach a device to an IOAS we need to get an 252 * iommu_domain and wrapping iommufd_hw_pagetable for it. 253 * 254 * An iommu_domain & iommfd_hw_pagetable will be automatically selected 255 * for a device based on the hwpt_list. If no suitable iommu_domain 256 * is found a new iommu_domain will be created. 257 */ 258 struct iommufd_ioas { 259 struct iommufd_object obj; 260 struct io_pagetable iopt; 261 struct mutex mutex; 262 struct list_head hwpt_list; 263 }; 264 265 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, 266 u32 id) 267 { 268 return container_of(iommufd_get_object(ictx, id, 269 IOMMUFD_OBJ_IOAS), 270 struct iommufd_ioas, obj); 271 } 272 273 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); 274 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); 275 void iommufd_ioas_destroy(struct iommufd_object *obj); 276 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); 277 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); 278 int iommufd_ioas_map(struct iommufd_ucmd *ucmd); 279 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); 280 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); 281 int iommufd_ioas_option(struct iommufd_ucmd *ucmd); 282 int iommufd_option_rlimit_mode(struct iommu_option *cmd, 283 struct iommufd_ctx *ictx); 284 285 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); 286 int iommufd_check_iova_range(struct io_pagetable *iopt, 287 struct iommu_hwpt_get_dirty_bitmap *bitmap); 288 289 /* 290 * A HW pagetable is called an iommu_domain inside the kernel. This user object 291 * allows directly creating and inspecting the domains. Domains that have kernel 292 * owned page tables will be associated with an iommufd_ioas that provides the 293 * IOVA to PFN map. 294 */ 295 struct iommufd_hw_pagetable { 296 struct iommufd_object obj; 297 struct iommu_domain *domain; 298 struct iommufd_fault *fault; 299 }; 300 301 struct iommufd_hwpt_paging { 302 struct iommufd_hw_pagetable common; 303 struct iommufd_ioas *ioas; 304 bool auto_domain : 1; 305 bool enforce_cache_coherency : 1; 306 bool msi_cookie : 1; 307 bool nest_parent : 1; 308 /* Head at iommufd_ioas::hwpt_list */ 309 struct list_head hwpt_item; 310 }; 311 312 struct iommufd_hwpt_nested { 313 struct iommufd_hw_pagetable common; 314 struct iommufd_hwpt_paging *parent; 315 }; 316 317 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) 318 { 319 return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; 320 } 321 322 static inline struct iommufd_hwpt_paging * 323 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) 324 { 325 return container_of(hwpt, struct iommufd_hwpt_paging, common); 326 } 327 328 static inline struct iommufd_hwpt_nested * 329 to_hwpt_nested(struct iommufd_hw_pagetable *hwpt) 330 { 331 return container_of(hwpt, struct iommufd_hwpt_nested, common); 332 } 333 334 static inline struct iommufd_hwpt_paging * 335 find_hwpt_paging(struct iommufd_hw_pagetable *hwpt) 336 { 337 switch (hwpt->obj.type) { 338 case IOMMUFD_OBJ_HWPT_PAGING: 339 return to_hwpt_paging(hwpt); 340 case IOMMUFD_OBJ_HWPT_NESTED: 341 return to_hwpt_nested(hwpt)->parent; 342 default: 343 return NULL; 344 } 345 } 346 347 static inline struct iommufd_hwpt_paging * 348 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) 349 { 350 return container_of(iommufd_get_object(ucmd->ictx, id, 351 IOMMUFD_OBJ_HWPT_PAGING), 352 struct iommufd_hwpt_paging, common.obj); 353 } 354 355 static inline struct iommufd_hw_pagetable * 356 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) 357 { 358 return container_of(iommufd_get_object(ucmd->ictx, id, 359 IOMMUFD_OBJ_HWPT_NESTED), 360 struct iommufd_hw_pagetable, obj); 361 } 362 363 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); 364 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); 365 366 struct iommufd_hwpt_paging * 367 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, 368 struct iommufd_device *idev, u32 flags, 369 bool immediate_attach, 370 const struct iommu_user_data *user_data); 371 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 372 struct iommufd_device *idev); 373 struct iommufd_hw_pagetable * 374 iommufd_hw_pagetable_detach(struct iommufd_device *idev); 375 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); 376 void iommufd_hwpt_paging_abort(struct iommufd_object *obj); 377 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); 378 void iommufd_hwpt_nested_abort(struct iommufd_object *obj); 379 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); 380 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); 381 382 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, 383 struct iommufd_hw_pagetable *hwpt) 384 { 385 if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { 386 struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); 387 388 lockdep_assert_not_held(&hwpt_paging->ioas->mutex); 389 390 if (hwpt_paging->auto_domain) { 391 iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); 392 return; 393 } 394 } 395 refcount_dec(&hwpt->obj.users); 396 } 397 398 struct iommufd_group { 399 struct kref ref; 400 struct mutex lock; 401 struct iommufd_ctx *ictx; 402 struct iommu_group *group; 403 struct iommufd_hw_pagetable *hwpt; 404 struct list_head device_list; 405 phys_addr_t sw_msi_start; 406 }; 407 408 /* 409 * A iommufd_device object represents the binding relationship between a 410 * consuming driver and the iommufd. These objects are created/destroyed by 411 * external drivers, not by userspace. 412 */ 413 struct iommufd_device { 414 struct iommufd_object obj; 415 struct iommufd_ctx *ictx; 416 struct iommufd_group *igroup; 417 struct list_head group_item; 418 /* always the physical device */ 419 struct device *dev; 420 bool enforce_cache_coherency; 421 /* protect iopf_enabled counter */ 422 struct mutex iopf_lock; 423 unsigned int iopf_enabled; 424 }; 425 426 static inline struct iommufd_device * 427 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) 428 { 429 return container_of(iommufd_get_object(ucmd->ictx, id, 430 IOMMUFD_OBJ_DEVICE), 431 struct iommufd_device, obj); 432 } 433 434 void iommufd_device_destroy(struct iommufd_object *obj); 435 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); 436 437 struct iommufd_access { 438 struct iommufd_object obj; 439 struct iommufd_ctx *ictx; 440 struct iommufd_ioas *ioas; 441 struct iommufd_ioas *ioas_unpin; 442 struct mutex ioas_lock; 443 const struct iommufd_access_ops *ops; 444 void *data; 445 unsigned long iova_alignment; 446 u32 iopt_access_list_id; 447 }; 448 449 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); 450 void iopt_remove_access(struct io_pagetable *iopt, 451 struct iommufd_access *access, 452 u32 iopt_access_list_id); 453 void iommufd_access_destroy_object(struct iommufd_object *obj); 454 455 /* 456 * An iommufd_fault object represents an interface to deliver I/O page faults 457 * to the user space. These objects are created/destroyed by the user space and 458 * associated with hardware page table objects during page-table allocation. 459 */ 460 struct iommufd_fault { 461 struct iommufd_object obj; 462 struct iommufd_ctx *ictx; 463 struct file *filep; 464 465 /* The lists of outstanding faults protected by below mutex. */ 466 struct mutex mutex; 467 struct list_head deliver; 468 struct xarray response; 469 470 struct wait_queue_head wait_queue; 471 }; 472 473 struct iommufd_attach_handle { 474 struct iommu_attach_handle handle; 475 struct iommufd_device *idev; 476 }; 477 478 /* Convert an iommu attach handle to iommufd handle. */ 479 #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) 480 481 static inline struct iommufd_fault * 482 iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) 483 { 484 return container_of(iommufd_get_object(ucmd->ictx, id, 485 IOMMUFD_OBJ_FAULT), 486 struct iommufd_fault, obj); 487 } 488 489 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); 490 void iommufd_fault_destroy(struct iommufd_object *obj); 491 int iommufd_fault_iopf_handler(struct iopf_group *group); 492 493 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, 494 struct iommufd_device *idev); 495 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, 496 struct iommufd_device *idev); 497 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, 498 struct iommufd_hw_pagetable *hwpt, 499 struct iommufd_hw_pagetable *old); 500 501 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, 502 struct iommufd_device *idev) 503 { 504 if (hwpt->fault) 505 return iommufd_fault_domain_attach_dev(hwpt, idev); 506 507 return iommu_attach_group(hwpt->domain, idev->igroup->group); 508 } 509 510 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, 511 struct iommufd_device *idev) 512 { 513 if (hwpt->fault) { 514 iommufd_fault_domain_detach_dev(hwpt, idev); 515 return; 516 } 517 518 iommu_detach_group(hwpt->domain, idev->igroup->group); 519 } 520 521 static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, 522 struct iommufd_hw_pagetable *hwpt, 523 struct iommufd_hw_pagetable *old) 524 { 525 if (old->fault || hwpt->fault) 526 return iommufd_fault_domain_replace_dev(idev, hwpt, old); 527 528 return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); 529 } 530 531 #ifdef CONFIG_IOMMUFD_TEST 532 int iommufd_test(struct iommufd_ucmd *ucmd); 533 void iommufd_selftest_destroy(struct iommufd_object *obj); 534 extern size_t iommufd_test_memory_limit; 535 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 536 unsigned int ioas_id, u64 *iova, u32 *flags); 537 bool iommufd_should_fail(void); 538 int __init iommufd_test_init(void); 539 void iommufd_test_exit(void); 540 bool iommufd_selftest_is_mock_dev(struct device *dev); 541 #else 542 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 543 unsigned int ioas_id, 544 u64 *iova, u32 *flags) 545 { 546 } 547 static inline bool iommufd_should_fail(void) 548 { 549 return false; 550 } 551 static inline int __init iommufd_test_init(void) 552 { 553 return 0; 554 } 555 static inline void iommufd_test_exit(void) 556 { 557 } 558 static inline bool iommufd_selftest_is_mock_dev(struct device *dev) 559 { 560 return false; 561 } 562 #endif 563 #endif 564