1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #ifndef __IOMMUFD_PRIVATE_H 5 #define __IOMMUFD_PRIVATE_H 6 7 #include <linux/iommu.h> 8 #include <linux/iommufd.h> 9 #include <linux/iova_bitmap.h> 10 #include <linux/rwsem.h> 11 #include <linux/uaccess.h> 12 #include <linux/xarray.h> 13 #include <uapi/linux/iommufd.h> 14 15 #include "../iommu-priv.h" 16 17 struct iommu_domain; 18 struct iommu_group; 19 struct iommu_option; 20 struct iommufd_device; 21 22 struct iommufd_ctx { 23 struct file *file; 24 struct xarray objects; 25 struct xarray groups; 26 wait_queue_head_t destroy_wait; 27 struct rw_semaphore ioas_creation_lock; 28 29 u8 account_mode; 30 /* Compatibility with VFIO no iommu */ 31 u8 no_iommu_mode; 32 struct iommufd_ioas *vfio_ioas; 33 }; 34 35 /* 36 * The IOVA to PFN map. The map automatically copies the PFNs into multiple 37 * domains and permits sharing of PFNs between io_pagetable instances. This 38 * supports both a design where IOAS's are 1:1 with a domain (eg because the 39 * domain is HW customized), or where the IOAS is 1:N with multiple generic 40 * domains. The io_pagetable holds an interval tree of iopt_areas which point 41 * to shared iopt_pages which hold the pfns mapped to the page table. 42 * 43 * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex 44 */ 45 struct io_pagetable { 46 struct rw_semaphore domains_rwsem; 47 struct xarray domains; 48 struct xarray access_list; 49 unsigned int next_domain_id; 50 51 struct rw_semaphore iova_rwsem; 52 struct rb_root_cached area_itree; 53 /* IOVA that cannot become reserved, struct iopt_allowed */ 54 struct rb_root_cached allowed_itree; 55 /* IOVA that cannot be allocated, struct iopt_reserved */ 56 struct rb_root_cached reserved_itree; 57 u8 disable_large_pages; 58 unsigned long iova_alignment; 59 }; 60 61 void iopt_init_table(struct io_pagetable *iopt); 62 void iopt_destroy_table(struct io_pagetable *iopt); 63 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 64 unsigned long length, struct list_head *pages_list); 65 void iopt_free_pages_list(struct list_head *pages_list); 66 enum { 67 IOPT_ALLOC_IOVA = 1 << 0, 68 }; 69 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 70 unsigned long *iova, void __user *uptr, 71 unsigned long length, int iommu_prot, 72 unsigned int flags); 73 int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 74 unsigned long *iova, struct file *file, 75 unsigned long start, unsigned long length, 76 int iommu_prot, unsigned int flags); 77 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 78 unsigned long length, unsigned long *dst_iova, 79 int iommu_prot, unsigned int flags); 80 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 81 unsigned long length, unsigned long *unmapped); 82 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); 83 84 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 85 struct iommu_domain *domain, 86 unsigned long flags, 87 struct iommu_hwpt_get_dirty_bitmap *bitmap); 88 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 89 struct iommu_domain *domain, bool enable); 90 91 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 92 unsigned long length); 93 int iopt_table_add_domain(struct io_pagetable *iopt, 94 struct iommu_domain *domain); 95 void iopt_table_remove_domain(struct io_pagetable *iopt, 96 struct iommu_domain *domain); 97 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 98 struct device *dev, 99 phys_addr_t *sw_msi_start); 100 int iopt_set_allow_iova(struct io_pagetable *iopt, 101 struct rb_root_cached *allowed_iova); 102 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 103 unsigned long last, void *owner); 104 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); 105 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 106 size_t num_iovas); 107 void iopt_enable_large_pages(struct io_pagetable *iopt); 108 int iopt_disable_large_pages(struct io_pagetable *iopt); 109 110 struct iommufd_ucmd { 111 struct iommufd_ctx *ictx; 112 void __user *ubuffer; 113 u32 user_size; 114 void *cmd; 115 }; 116 117 int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, 118 unsigned long arg); 119 120 /* Copy the response in ucmd->cmd back to userspace. */ 121 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, 122 size_t cmd_len) 123 { 124 if (copy_to_user(ucmd->ubuffer, ucmd->cmd, 125 min_t(size_t, ucmd->user_size, cmd_len))) 126 return -EFAULT; 127 return 0; 128 } 129 130 static inline bool iommufd_lock_obj(struct iommufd_object *obj) 131 { 132 if (!refcount_inc_not_zero(&obj->users)) 133 return false; 134 if (!refcount_inc_not_zero(&obj->shortterm_users)) { 135 /* 136 * If the caller doesn't already have a ref on obj this must be 137 * called under the xa_lock. Otherwise the caller is holding a 138 * ref on users. Thus it cannot be one before this decrement. 139 */ 140 refcount_dec(&obj->users); 141 return false; 142 } 143 return true; 144 } 145 146 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, 147 enum iommufd_object_type type); 148 static inline void iommufd_put_object(struct iommufd_ctx *ictx, 149 struct iommufd_object *obj) 150 { 151 /* 152 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees 153 * a spurious !0 users with a 0 shortterm_users. 154 */ 155 refcount_dec(&obj->users); 156 if (refcount_dec_and_test(&obj->shortterm_users)) 157 wake_up_interruptible_all(&ictx->destroy_wait); 158 } 159 160 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); 161 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 162 struct iommufd_object *obj); 163 void iommufd_object_finalize(struct iommufd_ctx *ictx, 164 struct iommufd_object *obj); 165 166 enum { 167 REMOVE_WAIT_SHORTTERM = 1, 168 }; 169 int iommufd_object_remove(struct iommufd_ctx *ictx, 170 struct iommufd_object *to_destroy, u32 id, 171 unsigned int flags); 172 173 /* 174 * The caller holds a users refcount and wants to destroy the object. At this 175 * point the caller has no shortterm_users reference and at least the xarray 176 * will be holding one. 177 */ 178 static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, 179 struct iommufd_object *obj) 180 { 181 int ret; 182 183 ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); 184 185 /* 186 * If there is a bug and we couldn't destroy the object then we did put 187 * back the caller's users refcount and will eventually try to free it 188 * again during close. 189 */ 190 WARN_ON(ret); 191 } 192 193 /* 194 * The HWPT allocated by autodomains is used in possibly many devices and 195 * is automatically destroyed when its refcount reaches zero. 196 * 197 * If userspace uses the HWPT manually, even for a short term, then it will 198 * disrupt this refcounting and the auto-free in the kernel will not work. 199 * Userspace that tries to use the automatically allocated HWPT must be careful 200 * to ensure that it is consistently destroyed, eg by not racing accesses 201 * and by not attaching an automatic HWPT to a device manually. 202 */ 203 static inline void 204 iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, 205 struct iommufd_object *obj) 206 { 207 iommufd_object_remove(ictx, obj, obj->id, 0); 208 } 209 210 #define __iommufd_object_alloc(ictx, ptr, type, obj) \ 211 container_of(_iommufd_object_alloc( \ 212 ictx, \ 213 sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ 214 offsetof(typeof(*(ptr)), \ 215 obj) != 0), \ 216 type), \ 217 typeof(*(ptr)), obj) 218 219 #define iommufd_object_alloc(ictx, ptr, type) \ 220 __iommufd_object_alloc(ictx, ptr, type, obj) 221 222 /* 223 * The IO Address Space (IOAS) pagetable is a virtual page table backed by the 224 * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The 225 * mapping is copied into all of the associated domains and made available to 226 * in-kernel users. 227 * 228 * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable 229 * object. When we go to attach a device to an IOAS we need to get an 230 * iommu_domain and wrapping iommufd_hw_pagetable for it. 231 * 232 * An iommu_domain & iommfd_hw_pagetable will be automatically selected 233 * for a device based on the hwpt_list. If no suitable iommu_domain 234 * is found a new iommu_domain will be created. 235 */ 236 struct iommufd_ioas { 237 struct iommufd_object obj; 238 struct io_pagetable iopt; 239 struct mutex mutex; 240 struct list_head hwpt_list; 241 }; 242 243 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, 244 u32 id) 245 { 246 return container_of(iommufd_get_object(ictx, id, 247 IOMMUFD_OBJ_IOAS), 248 struct iommufd_ioas, obj); 249 } 250 251 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); 252 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); 253 void iommufd_ioas_destroy(struct iommufd_object *obj); 254 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); 255 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); 256 int iommufd_ioas_map(struct iommufd_ucmd *ucmd); 257 int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd); 258 int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd); 259 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); 260 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); 261 int iommufd_ioas_option(struct iommufd_ucmd *ucmd); 262 int iommufd_option_rlimit_mode(struct iommu_option *cmd, 263 struct iommufd_ctx *ictx); 264 265 int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); 266 int iommufd_check_iova_range(struct io_pagetable *iopt, 267 struct iommu_hwpt_get_dirty_bitmap *bitmap); 268 269 /* 270 * A HW pagetable is called an iommu_domain inside the kernel. This user object 271 * allows directly creating and inspecting the domains. Domains that have kernel 272 * owned page tables will be associated with an iommufd_ioas that provides the 273 * IOVA to PFN map. 274 */ 275 struct iommufd_hw_pagetable { 276 struct iommufd_object obj; 277 struct iommu_domain *domain; 278 struct iommufd_fault *fault; 279 }; 280 281 struct iommufd_hwpt_paging { 282 struct iommufd_hw_pagetable common; 283 struct iommufd_ioas *ioas; 284 bool auto_domain : 1; 285 bool enforce_cache_coherency : 1; 286 bool msi_cookie : 1; 287 bool nest_parent : 1; 288 /* Head at iommufd_ioas::hwpt_list */ 289 struct list_head hwpt_item; 290 }; 291 292 struct iommufd_hwpt_nested { 293 struct iommufd_hw_pagetable common; 294 struct iommufd_hwpt_paging *parent; 295 struct iommufd_viommu *viommu; 296 }; 297 298 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) 299 { 300 return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; 301 } 302 303 static inline struct iommufd_hwpt_paging * 304 to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) 305 { 306 return container_of(hwpt, struct iommufd_hwpt_paging, common); 307 } 308 309 static inline struct iommufd_hwpt_nested * 310 to_hwpt_nested(struct iommufd_hw_pagetable *hwpt) 311 { 312 return container_of(hwpt, struct iommufd_hwpt_nested, common); 313 } 314 315 static inline struct iommufd_hwpt_paging * 316 find_hwpt_paging(struct iommufd_hw_pagetable *hwpt) 317 { 318 switch (hwpt->obj.type) { 319 case IOMMUFD_OBJ_HWPT_PAGING: 320 return to_hwpt_paging(hwpt); 321 case IOMMUFD_OBJ_HWPT_NESTED: 322 return to_hwpt_nested(hwpt)->parent; 323 default: 324 return NULL; 325 } 326 } 327 328 static inline struct iommufd_hwpt_paging * 329 iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) 330 { 331 return container_of(iommufd_get_object(ucmd->ictx, id, 332 IOMMUFD_OBJ_HWPT_PAGING), 333 struct iommufd_hwpt_paging, common.obj); 334 } 335 336 static inline struct iommufd_hw_pagetable * 337 iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) 338 { 339 return container_of(iommufd_get_object(ucmd->ictx, id, 340 IOMMUFD_OBJ_HWPT_NESTED), 341 struct iommufd_hw_pagetable, obj); 342 } 343 344 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); 345 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); 346 347 struct iommufd_hwpt_paging * 348 iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, 349 struct iommufd_device *idev, u32 flags, 350 bool immediate_attach, 351 const struct iommu_user_data *user_data); 352 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 353 struct iommufd_device *idev); 354 struct iommufd_hw_pagetable * 355 iommufd_hw_pagetable_detach(struct iommufd_device *idev); 356 void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); 357 void iommufd_hwpt_paging_abort(struct iommufd_object *obj); 358 void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); 359 void iommufd_hwpt_nested_abort(struct iommufd_object *obj); 360 int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); 361 int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); 362 363 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, 364 struct iommufd_hw_pagetable *hwpt) 365 { 366 if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { 367 struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); 368 369 lockdep_assert_not_held(&hwpt_paging->ioas->mutex); 370 371 if (hwpt_paging->auto_domain) { 372 iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); 373 return; 374 } 375 } 376 refcount_dec(&hwpt->obj.users); 377 } 378 379 struct iommufd_group { 380 struct kref ref; 381 struct mutex lock; 382 struct iommufd_ctx *ictx; 383 struct iommu_group *group; 384 struct iommufd_hw_pagetable *hwpt; 385 struct list_head device_list; 386 phys_addr_t sw_msi_start; 387 }; 388 389 /* 390 * A iommufd_device object represents the binding relationship between a 391 * consuming driver and the iommufd. These objects are created/destroyed by 392 * external drivers, not by userspace. 393 */ 394 struct iommufd_device { 395 struct iommufd_object obj; 396 struct iommufd_ctx *ictx; 397 struct iommufd_group *igroup; 398 struct list_head group_item; 399 /* always the physical device */ 400 struct device *dev; 401 bool enforce_cache_coherency; 402 /* protect iopf_enabled counter */ 403 struct mutex iopf_lock; 404 unsigned int iopf_enabled; 405 }; 406 407 static inline struct iommufd_device * 408 iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) 409 { 410 return container_of(iommufd_get_object(ucmd->ictx, id, 411 IOMMUFD_OBJ_DEVICE), 412 struct iommufd_device, obj); 413 } 414 415 void iommufd_device_destroy(struct iommufd_object *obj); 416 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); 417 418 struct iommufd_access { 419 struct iommufd_object obj; 420 struct iommufd_ctx *ictx; 421 struct iommufd_ioas *ioas; 422 struct iommufd_ioas *ioas_unpin; 423 struct mutex ioas_lock; 424 const struct iommufd_access_ops *ops; 425 void *data; 426 unsigned long iova_alignment; 427 u32 iopt_access_list_id; 428 }; 429 430 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); 431 void iopt_remove_access(struct io_pagetable *iopt, 432 struct iommufd_access *access, 433 u32 iopt_access_list_id); 434 void iommufd_access_destroy_object(struct iommufd_object *obj); 435 436 /* 437 * An iommufd_fault object represents an interface to deliver I/O page faults 438 * to the user space. These objects are created/destroyed by the user space and 439 * associated with hardware page table objects during page-table allocation. 440 */ 441 struct iommufd_fault { 442 struct iommufd_object obj; 443 struct iommufd_ctx *ictx; 444 struct file *filep; 445 446 /* The lists of outstanding faults protected by below mutex. */ 447 struct mutex mutex; 448 struct list_head deliver; 449 struct xarray response; 450 451 struct wait_queue_head wait_queue; 452 }; 453 454 struct iommufd_attach_handle { 455 struct iommu_attach_handle handle; 456 struct iommufd_device *idev; 457 }; 458 459 /* Convert an iommu attach handle to iommufd handle. */ 460 #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) 461 462 static inline struct iommufd_fault * 463 iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) 464 { 465 return container_of(iommufd_get_object(ucmd->ictx, id, 466 IOMMUFD_OBJ_FAULT), 467 struct iommufd_fault, obj); 468 } 469 470 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); 471 void iommufd_fault_destroy(struct iommufd_object *obj); 472 int iommufd_fault_iopf_handler(struct iopf_group *group); 473 474 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, 475 struct iommufd_device *idev); 476 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, 477 struct iommufd_device *idev); 478 int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, 479 struct iommufd_hw_pagetable *hwpt, 480 struct iommufd_hw_pagetable *old); 481 482 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, 483 struct iommufd_device *idev) 484 { 485 if (hwpt->fault) 486 return iommufd_fault_domain_attach_dev(hwpt, idev); 487 488 return iommu_attach_group(hwpt->domain, idev->igroup->group); 489 } 490 491 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, 492 struct iommufd_device *idev) 493 { 494 if (hwpt->fault) { 495 iommufd_fault_domain_detach_dev(hwpt, idev); 496 return; 497 } 498 499 iommu_detach_group(hwpt->domain, idev->igroup->group); 500 } 501 502 static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, 503 struct iommufd_hw_pagetable *hwpt, 504 struct iommufd_hw_pagetable *old) 505 { 506 if (old->fault || hwpt->fault) 507 return iommufd_fault_domain_replace_dev(idev, hwpt, old); 508 509 return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); 510 } 511 512 static inline struct iommufd_viommu * 513 iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) 514 { 515 return container_of(iommufd_get_object(ucmd->ictx, id, 516 IOMMUFD_OBJ_VIOMMU), 517 struct iommufd_viommu, obj); 518 } 519 520 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); 521 void iommufd_viommu_destroy(struct iommufd_object *obj); 522 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); 523 void iommufd_vdevice_destroy(struct iommufd_object *obj); 524 525 struct iommufd_vdevice { 526 struct iommufd_object obj; 527 struct iommufd_ctx *ictx; 528 struct iommufd_viommu *viommu; 529 struct device *dev; 530 u64 id; /* per-vIOMMU virtual ID */ 531 }; 532 533 #ifdef CONFIG_IOMMUFD_TEST 534 int iommufd_test(struct iommufd_ucmd *ucmd); 535 void iommufd_selftest_destroy(struct iommufd_object *obj); 536 extern size_t iommufd_test_memory_limit; 537 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 538 unsigned int ioas_id, u64 *iova, u32 *flags); 539 bool iommufd_should_fail(void); 540 int __init iommufd_test_init(void); 541 void iommufd_test_exit(void); 542 bool iommufd_selftest_is_mock_dev(struct device *dev); 543 #else 544 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, 545 unsigned int ioas_id, 546 u64 *iova, u32 *flags) 547 { 548 } 549 static inline bool iommufd_should_fail(void) 550 { 551 return false; 552 } 553 static inline int __init iommufd_test_init(void) 554 { 555 return 0; 556 } 557 static inline void iommufd_test_exit(void) 558 { 559 } 560 static inline bool iommufd_selftest_is_mock_dev(struct device *dev) 561 { 562 return false; 563 } 564 #endif 565 #endif 566