1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2021 Intel Corporation 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 4 * 5 * iommufd provides control over the IOMMU HW objects created by IOMMU kernel 6 * drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA 7 * addresses (IOVA) to CPU addresses. 8 */ 9 #define pr_fmt(fmt) "iommufd: " fmt 10 11 #include <linux/file.h> 12 #include <linux/fs.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <linux/miscdevice.h> 16 #include <linux/mutex.h> 17 #include <linux/bug.h> 18 #include <uapi/linux/iommufd.h> 19 #include <linux/iommufd.h> 20 21 #include "io_pagetable.h" 22 #include "iommufd_private.h" 23 #include "iommufd_test.h" 24 25 struct iommufd_object_ops { 26 void (*destroy)(struct iommufd_object *obj); 27 }; 28 static const struct iommufd_object_ops iommufd_object_ops[]; 29 static struct miscdevice vfio_misc_dev; 30 31 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, 32 size_t size, 33 enum iommufd_object_type type) 34 { 35 struct iommufd_object *obj; 36 int rc; 37 38 obj = kzalloc(size, GFP_KERNEL_ACCOUNT); 39 if (!obj) 40 return ERR_PTR(-ENOMEM); 41 obj->type = type; 42 init_rwsem(&obj->destroy_rwsem); 43 refcount_set(&obj->users, 1); 44 45 /* 46 * Reserve an ID in the xarray but do not publish the pointer yet since 47 * the caller hasn't initialized it yet. Once the pointer is published 48 * in the xarray and visible to other threads we can't reliably destroy 49 * it anymore, so the caller must complete all errorable operations 50 * before calling iommufd_object_finalize(). 51 */ 52 rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, 53 xa_limit_32b, GFP_KERNEL_ACCOUNT); 54 if (rc) 55 goto out_free; 56 return obj; 57 out_free: 58 kfree(obj); 59 return ERR_PTR(rc); 60 } 61 62 /* 63 * Allow concurrent access to the object. 64 * 65 * Once another thread can see the object pointer it can prevent object 66 * destruction. Expect for special kernel-only objects there is no in-kernel way 67 * to reliably destroy a single object. Thus all APIs that are creating objects 68 * must use iommufd_object_abort() to handle their errors and only call 69 * iommufd_object_finalize() once object creation cannot fail. 70 */ 71 void iommufd_object_finalize(struct iommufd_ctx *ictx, 72 struct iommufd_object *obj) 73 { 74 void *old; 75 76 old = xa_store(&ictx->objects, obj->id, obj, GFP_KERNEL); 77 /* obj->id was returned from xa_alloc() so the xa_store() cannot fail */ 78 WARN_ON(old); 79 } 80 81 /* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */ 82 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) 83 { 84 void *old; 85 86 old = xa_erase(&ictx->objects, obj->id); 87 WARN_ON(old); 88 kfree(obj); 89 } 90 91 /* 92 * Abort an object that has been fully initialized and needs destroy, but has 93 * not been finalized. 94 */ 95 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 96 struct iommufd_object *obj) 97 { 98 iommufd_object_ops[obj->type].destroy(obj); 99 iommufd_object_abort(ictx, obj); 100 } 101 102 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, 103 enum iommufd_object_type type) 104 { 105 struct iommufd_object *obj; 106 107 if (iommufd_should_fail()) 108 return ERR_PTR(-ENOENT); 109 110 xa_lock(&ictx->objects); 111 obj = xa_load(&ictx->objects, id); 112 if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) || 113 !iommufd_lock_obj(obj)) 114 obj = ERR_PTR(-ENOENT); 115 xa_unlock(&ictx->objects); 116 return obj; 117 } 118 119 /* 120 * The caller holds a users refcount and wants to destroy the object. Returns 121 * true if the object was destroyed. In all cases the caller no longer has a 122 * reference on obj. 123 */ 124 bool iommufd_object_destroy_user(struct iommufd_ctx *ictx, 125 struct iommufd_object *obj) 126 { 127 /* 128 * The purpose of the destroy_rwsem is to ensure deterministic 129 * destruction of objects used by external drivers and destroyed by this 130 * function. Any temporary increment of the refcount must hold the read 131 * side of this, such as during ioctl execution. 132 */ 133 down_write(&obj->destroy_rwsem); 134 xa_lock(&ictx->objects); 135 refcount_dec(&obj->users); 136 if (!refcount_dec_if_one(&obj->users)) { 137 xa_unlock(&ictx->objects); 138 up_write(&obj->destroy_rwsem); 139 return false; 140 } 141 __xa_erase(&ictx->objects, obj->id); 142 if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj) 143 ictx->vfio_ioas = NULL; 144 xa_unlock(&ictx->objects); 145 up_write(&obj->destroy_rwsem); 146 147 iommufd_object_ops[obj->type].destroy(obj); 148 kfree(obj); 149 return true; 150 } 151 152 static int iommufd_destroy(struct iommufd_ucmd *ucmd) 153 { 154 struct iommu_destroy *cmd = ucmd->cmd; 155 struct iommufd_object *obj; 156 157 obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY); 158 if (IS_ERR(obj)) 159 return PTR_ERR(obj); 160 iommufd_ref_to_users(obj); 161 /* See iommufd_ref_to_users() */ 162 if (!iommufd_object_destroy_user(ucmd->ictx, obj)) 163 return -EBUSY; 164 return 0; 165 } 166 167 static int iommufd_fops_open(struct inode *inode, struct file *filp) 168 { 169 struct iommufd_ctx *ictx; 170 171 ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT); 172 if (!ictx) 173 return -ENOMEM; 174 175 /* 176 * For compatibility with VFIO when /dev/vfio/vfio is opened we default 177 * to the same rlimit accounting as vfio uses. 178 */ 179 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) && 180 filp->private_data == &vfio_misc_dev) { 181 ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; 182 pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n"); 183 } 184 185 xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); 186 ictx->file = filp; 187 filp->private_data = ictx; 188 return 0; 189 } 190 191 static int iommufd_fops_release(struct inode *inode, struct file *filp) 192 { 193 struct iommufd_ctx *ictx = filp->private_data; 194 struct iommufd_object *obj; 195 196 /* 197 * The objects in the xarray form a graph of "users" counts, and we have 198 * to destroy them in a depth first manner. Leaf objects will reduce the 199 * users count of interior objects when they are destroyed. 200 * 201 * Repeatedly destroying all the "1 users" leaf objects will progress 202 * until the entire list is destroyed. If this can't progress then there 203 * is some bug related to object refcounting. 204 */ 205 while (!xa_empty(&ictx->objects)) { 206 unsigned int destroyed = 0; 207 unsigned long index; 208 209 xa_for_each(&ictx->objects, index, obj) { 210 if (!refcount_dec_if_one(&obj->users)) 211 continue; 212 destroyed++; 213 xa_erase(&ictx->objects, index); 214 iommufd_object_ops[obj->type].destroy(obj); 215 kfree(obj); 216 } 217 /* Bug related to users refcount */ 218 if (WARN_ON(!destroyed)) 219 break; 220 } 221 kfree(ictx); 222 return 0; 223 } 224 225 static int iommufd_option(struct iommufd_ucmd *ucmd) 226 { 227 struct iommu_option *cmd = ucmd->cmd; 228 int rc; 229 230 if (cmd->__reserved) 231 return -EOPNOTSUPP; 232 233 switch (cmd->option_id) { 234 case IOMMU_OPTION_RLIMIT_MODE: 235 rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx); 236 break; 237 case IOMMU_OPTION_HUGE_PAGES: 238 rc = iommufd_ioas_option(ucmd); 239 break; 240 default: 241 return -EOPNOTSUPP; 242 } 243 if (rc) 244 return rc; 245 if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64, 246 &cmd->val64, sizeof(cmd->val64))) 247 return -EFAULT; 248 return 0; 249 } 250 251 union ucmd_buffer { 252 struct iommu_destroy destroy; 253 struct iommu_ioas_alloc alloc; 254 struct iommu_ioas_allow_iovas allow_iovas; 255 struct iommu_ioas_copy ioas_copy; 256 struct iommu_ioas_iova_ranges iova_ranges; 257 struct iommu_ioas_map map; 258 struct iommu_ioas_unmap unmap; 259 struct iommu_option option; 260 struct iommu_vfio_ioas vfio_ioas; 261 #ifdef CONFIG_IOMMUFD_TEST 262 struct iommu_test_cmd test; 263 #endif 264 }; 265 266 struct iommufd_ioctl_op { 267 unsigned int size; 268 unsigned int min_size; 269 unsigned int ioctl_num; 270 int (*execute)(struct iommufd_ucmd *ucmd); 271 }; 272 273 #define IOCTL_OP(_ioctl, _fn, _struct, _last) \ 274 [_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = { \ 275 .size = sizeof(_struct) + \ 276 BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ 277 sizeof(_struct)), \ 278 .min_size = offsetofend(_struct, _last), \ 279 .ioctl_num = _ioctl, \ 280 .execute = _fn, \ 281 } 282 static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { 283 IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), 284 IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, 285 struct iommu_ioas_alloc, out_ioas_id), 286 IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, 287 struct iommu_ioas_allow_iovas, allowed_iovas), 288 IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy, 289 src_iova), 290 IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, 291 struct iommu_ioas_iova_ranges, out_iova_alignment), 292 IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, 293 iova), 294 IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, 295 length), 296 IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, 297 val64), 298 IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, 299 __reserved), 300 #ifdef CONFIG_IOMMUFD_TEST 301 IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), 302 #endif 303 }; 304 305 static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, 306 unsigned long arg) 307 { 308 struct iommufd_ctx *ictx = filp->private_data; 309 const struct iommufd_ioctl_op *op; 310 struct iommufd_ucmd ucmd = {}; 311 union ucmd_buffer buf; 312 unsigned int nr; 313 int ret; 314 315 nr = _IOC_NR(cmd); 316 if (nr < IOMMUFD_CMD_BASE || 317 (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) 318 return iommufd_vfio_ioctl(ictx, cmd, arg); 319 320 ucmd.ictx = ictx; 321 ucmd.ubuffer = (void __user *)arg; 322 ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); 323 if (ret) 324 return ret; 325 326 op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE]; 327 if (op->ioctl_num != cmd) 328 return -ENOIOCTLCMD; 329 if (ucmd.user_size < op->min_size) 330 return -EINVAL; 331 332 ucmd.cmd = &buf; 333 ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, 334 ucmd.user_size); 335 if (ret) 336 return ret; 337 ret = op->execute(&ucmd); 338 return ret; 339 } 340 341 static const struct file_operations iommufd_fops = { 342 .owner = THIS_MODULE, 343 .open = iommufd_fops_open, 344 .release = iommufd_fops_release, 345 .unlocked_ioctl = iommufd_fops_ioctl, 346 }; 347 348 /** 349 * iommufd_ctx_get - Get a context reference 350 * @ictx: Context to get 351 * 352 * The caller must already hold a valid reference to ictx. 353 */ 354 void iommufd_ctx_get(struct iommufd_ctx *ictx) 355 { 356 get_file(ictx->file); 357 } 358 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, IOMMUFD); 359 360 /** 361 * iommufd_ctx_from_file - Acquires a reference to the iommufd context 362 * @file: File to obtain the reference from 363 * 364 * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file 365 * remains owned by the caller and the caller must still do fput. On success 366 * the caller is responsible to call iommufd_ctx_put(). 367 */ 368 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) 369 { 370 struct iommufd_ctx *ictx; 371 372 if (file->f_op != &iommufd_fops) 373 return ERR_PTR(-EBADFD); 374 ictx = file->private_data; 375 iommufd_ctx_get(ictx); 376 return ictx; 377 } 378 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); 379 380 /** 381 * iommufd_ctx_put - Put back a reference 382 * @ictx: Context to put back 383 */ 384 void iommufd_ctx_put(struct iommufd_ctx *ictx) 385 { 386 fput(ictx->file); 387 } 388 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); 389 390 static const struct iommufd_object_ops iommufd_object_ops[] = { 391 [IOMMUFD_OBJ_ACCESS] = { 392 .destroy = iommufd_access_destroy_object, 393 }, 394 [IOMMUFD_OBJ_DEVICE] = { 395 .destroy = iommufd_device_destroy, 396 }, 397 [IOMMUFD_OBJ_IOAS] = { 398 .destroy = iommufd_ioas_destroy, 399 }, 400 [IOMMUFD_OBJ_HW_PAGETABLE] = { 401 .destroy = iommufd_hw_pagetable_destroy, 402 }, 403 #ifdef CONFIG_IOMMUFD_TEST 404 [IOMMUFD_OBJ_SELFTEST] = { 405 .destroy = iommufd_selftest_destroy, 406 }, 407 #endif 408 }; 409 410 static struct miscdevice iommu_misc_dev = { 411 .minor = MISC_DYNAMIC_MINOR, 412 .name = "iommu", 413 .fops = &iommufd_fops, 414 .nodename = "iommu", 415 .mode = 0660, 416 }; 417 418 419 static struct miscdevice vfio_misc_dev = { 420 .minor = VFIO_MINOR, 421 .name = "vfio", 422 .fops = &iommufd_fops, 423 .nodename = "vfio/vfio", 424 .mode = 0666, 425 }; 426 427 static int __init iommufd_init(void) 428 { 429 int ret; 430 431 ret = misc_register(&iommu_misc_dev); 432 if (ret) 433 return ret; 434 435 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) { 436 ret = misc_register(&vfio_misc_dev); 437 if (ret) 438 goto err_misc; 439 } 440 iommufd_test_init(); 441 return 0; 442 err_misc: 443 misc_deregister(&iommu_misc_dev); 444 return ret; 445 } 446 447 static void __exit iommufd_exit(void) 448 { 449 iommufd_test_exit(); 450 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) 451 misc_deregister(&vfio_misc_dev); 452 misc_deregister(&iommu_misc_dev); 453 } 454 455 module_init(iommufd_init); 456 module_exit(iommufd_exit); 457 458 #if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) 459 MODULE_ALIAS_MISCDEV(VFIO_MINOR); 460 MODULE_ALIAS("devname:vfio/vfio"); 461 #endif 462 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); 463 MODULE_LICENSE("GPL"); 464