1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO core
4 *
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13 #include <linux/cdev.h>
14 #include <linux/compat.h>
15 #include <linux/device.h>
16 #include <linux/fs.h>
17 #include <linux/idr.h>
18 #include <linux/iommu.h>
19 #if IS_ENABLED(CONFIG_KVM)
20 #include <linux/kvm_host.h>
21 #endif
22 #include <linux/list.h>
23 #include <linux/miscdevice.h>
24 #include <linux/module.h>
25 #include <linux/mount.h>
26 #include <linux/mutex.h>
27 #include <linux/pci.h>
28 #include <linux/pseudo_fs.h>
29 #include <linux/rwsem.h>
30 #include <linux/sched.h>
31 #include <linux/seq_file.h>
32 #include <linux/slab.h>
33 #include <linux/stat.h>
34 #include <linux/string.h>
35 #include <linux/uaccess.h>
36 #include <linux/vfio.h>
37 #include <linux/wait.h>
38 #include <linux/sched/signal.h>
39 #include <linux/pm_runtime.h>
40 #include <linux/interval_tree.h>
41 #include <linux/iova_bitmap.h>
42 #include <linux/iommufd.h>
43 #include "vfio.h"
44
45 #define DRIVER_VERSION "0.3"
46 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
47 #define DRIVER_DESC "VFIO - User Level meta-driver"
48
49 #define VFIO_MAGIC 0x5646494f /* "VFIO" */
50
51 static struct vfio {
52 struct class *device_class;
53 struct ida device_ida;
54 struct vfsmount *vfs_mount;
55 int fs_count;
56 } vfio;
57
58 #ifdef CONFIG_VFIO_NOIOMMU
59 bool vfio_noiommu __read_mostly;
60 module_param_named(enable_unsafe_noiommu_mode,
61 vfio_noiommu, bool, S_IRUGO | S_IWUSR);
62 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
63 #endif
64
65 static DEFINE_XARRAY(vfio_device_set_xa);
66
vfio_assign_device_set(struct vfio_device * device,void * set_id)67 int vfio_assign_device_set(struct vfio_device *device, void *set_id)
68 {
69 unsigned long idx = (unsigned long)set_id;
70 struct vfio_device_set *new_dev_set;
71 struct vfio_device_set *dev_set;
72
73 if (WARN_ON(!set_id))
74 return -EINVAL;
75
76 /*
77 * Atomically acquire a singleton object in the xarray for this set_id
78 */
79 xa_lock(&vfio_device_set_xa);
80 dev_set = xa_load(&vfio_device_set_xa, idx);
81 if (dev_set)
82 goto found_get_ref;
83 xa_unlock(&vfio_device_set_xa);
84
85 new_dev_set = kzalloc_obj(*new_dev_set);
86 if (!new_dev_set)
87 return -ENOMEM;
88 mutex_init(&new_dev_set->lock);
89 INIT_LIST_HEAD(&new_dev_set->device_list);
90 new_dev_set->set_id = set_id;
91
92 xa_lock(&vfio_device_set_xa);
93 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
94 GFP_KERNEL);
95 if (!dev_set) {
96 dev_set = new_dev_set;
97 goto found_get_ref;
98 }
99
100 kfree(new_dev_set);
101 if (xa_is_err(dev_set)) {
102 xa_unlock(&vfio_device_set_xa);
103 return xa_err(dev_set);
104 }
105
106 found_get_ref:
107 dev_set->device_count++;
108 xa_unlock(&vfio_device_set_xa);
109 mutex_lock(&dev_set->lock);
110 device->dev_set = dev_set;
111 list_add_tail(&device->dev_set_list, &dev_set->device_list);
112 mutex_unlock(&dev_set->lock);
113 return 0;
114 }
115 EXPORT_SYMBOL_GPL(vfio_assign_device_set);
116
vfio_release_device_set(struct vfio_device * device)117 static void vfio_release_device_set(struct vfio_device *device)
118 {
119 struct vfio_device_set *dev_set = device->dev_set;
120
121 if (!dev_set)
122 return;
123
124 mutex_lock(&dev_set->lock);
125 list_del(&device->dev_set_list);
126 mutex_unlock(&dev_set->lock);
127
128 xa_lock(&vfio_device_set_xa);
129 if (!--dev_set->device_count) {
130 __xa_erase(&vfio_device_set_xa,
131 (unsigned long)dev_set->set_id);
132 mutex_destroy(&dev_set->lock);
133 kfree(dev_set);
134 }
135 xa_unlock(&vfio_device_set_xa);
136 }
137
vfio_device_set_open_count(struct vfio_device_set * dev_set)138 unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
139 {
140 struct vfio_device *cur;
141 unsigned int open_count = 0;
142
143 lockdep_assert_held(&dev_set->lock);
144
145 list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
146 open_count += cur->open_count;
147 return open_count;
148 }
149 EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
150
151 struct vfio_device *
vfio_find_device_in_devset(struct vfio_device_set * dev_set,struct device * dev)152 vfio_find_device_in_devset(struct vfio_device_set *dev_set,
153 struct device *dev)
154 {
155 struct vfio_device *cur;
156
157 lockdep_assert_held(&dev_set->lock);
158
159 list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
160 if (cur->dev == dev)
161 return cur;
162 return NULL;
163 }
164 EXPORT_SYMBOL_GPL(vfio_find_device_in_devset);
165
166 /*
167 * Device objects - create, release, get, put, search
168 */
169 /* Device reference always implies a group reference */
vfio_device_put_registration(struct vfio_device * device)170 void vfio_device_put_registration(struct vfio_device *device)
171 {
172 if (refcount_dec_and_test(&device->refcount))
173 complete(&device->comp);
174 }
175 EXPORT_SYMBOL_GPL(vfio_device_put_registration);
176
vfio_device_try_get_registration(struct vfio_device * device)177 bool vfio_device_try_get_registration(struct vfio_device *device)
178 {
179 return refcount_inc_not_zero(&device->refcount);
180 }
181 EXPORT_SYMBOL_GPL(vfio_device_try_get_registration);
182
183 /*
184 * VFIO driver API
185 */
186 /* Release helper called by vfio_put_device() */
vfio_device_release(struct device * dev)187 static void vfio_device_release(struct device *dev)
188 {
189 struct vfio_device *device =
190 container_of(dev, struct vfio_device, device);
191
192 vfio_release_device_set(device);
193 ida_free(&vfio.device_ida, device->index);
194
195 if (device->ops->release)
196 device->ops->release(device);
197
198 iput(device->inode);
199 simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
200 kvfree(device);
201 }
202
203 static int vfio_init_device(struct vfio_device *device, struct device *dev,
204 const struct vfio_device_ops *ops);
205
206 /*
207 * Allocate and initialize vfio_device so it can be registered to vfio
208 * core.
209 *
210 * Drivers should use the wrapper vfio_alloc_device() for allocation.
211 * @size is the size of the structure to be allocated, including any
212 * private data used by the driver.
213 *
214 * Driver may provide an @init callback to cover device private data.
215 *
216 * Use vfio_put_device() to release the structure after success return.
217 */
_vfio_alloc_device(size_t size,struct device * dev,const struct vfio_device_ops * ops)218 struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
219 const struct vfio_device_ops *ops)
220 {
221 struct vfio_device *device;
222 int ret;
223
224 if (WARN_ON(size < sizeof(struct vfio_device)))
225 return ERR_PTR(-EINVAL);
226
227 device = kvzalloc(size, GFP_KERNEL);
228 if (!device)
229 return ERR_PTR(-ENOMEM);
230
231 ret = vfio_init_device(device, dev, ops);
232 if (ret)
233 goto out_free;
234 return device;
235
236 out_free:
237 kvfree(device);
238 return ERR_PTR(ret);
239 }
240 EXPORT_SYMBOL_GPL(_vfio_alloc_device);
241
vfio_fs_init_fs_context(struct fs_context * fc)242 static int vfio_fs_init_fs_context(struct fs_context *fc)
243 {
244 return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
245 }
246
247 static struct file_system_type vfio_fs_type = {
248 .name = "vfio",
249 .owner = THIS_MODULE,
250 .init_fs_context = vfio_fs_init_fs_context,
251 .kill_sb = kill_anon_super,
252 };
253
vfio_fs_inode_new(void)254 static struct inode *vfio_fs_inode_new(void)
255 {
256 struct inode *inode;
257 int ret;
258
259 ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count);
260 if (ret)
261 return ERR_PTR(ret);
262
263 inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb);
264 if (IS_ERR(inode))
265 simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
266
267 return inode;
268 }
269
270 /*
271 * Initialize a vfio_device so it can be registered to vfio core.
272 */
vfio_init_device(struct vfio_device * device,struct device * dev,const struct vfio_device_ops * ops)273 static int vfio_init_device(struct vfio_device *device, struct device *dev,
274 const struct vfio_device_ops *ops)
275 {
276 int ret;
277
278 ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
279 if (ret < 0) {
280 dev_dbg(dev, "Error to alloc index\n");
281 return ret;
282 }
283
284 device->index = ret;
285 init_completion(&device->comp);
286 device->dev = dev;
287 device->ops = ops;
288 device->inode = vfio_fs_inode_new();
289 if (IS_ERR(device->inode)) {
290 ret = PTR_ERR(device->inode);
291 goto out_inode;
292 }
293
294 if (ops->init) {
295 ret = ops->init(device);
296 if (ret)
297 goto out_uninit;
298 }
299
300 device_initialize(&device->device);
301 device->device.release = vfio_device_release;
302 device->device.class = vfio.device_class;
303 device->device.parent = device->dev;
304 return 0;
305
306 out_uninit:
307 iput(device->inode);
308 simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
309 out_inode:
310 vfio_release_device_set(device);
311 ida_free(&vfio.device_ida, device->index);
312 return ret;
313 }
314
__vfio_register_dev(struct vfio_device * device,enum vfio_group_type type)315 static int __vfio_register_dev(struct vfio_device *device,
316 enum vfio_group_type type)
317 {
318 int ret;
319
320 if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) &&
321 (!device->ops->bind_iommufd ||
322 !device->ops->unbind_iommufd ||
323 !device->ops->attach_ioas ||
324 !device->ops->detach_ioas)))
325 return -EINVAL;
326
327 /*
328 * If the driver doesn't specify a set then the device is added to a
329 * singleton set just for itself.
330 */
331 if (!device->dev_set)
332 vfio_assign_device_set(device, device);
333
334 ret = dev_set_name(&device->device, "vfio%d", device->index);
335 if (ret)
336 return ret;
337
338 ret = vfio_device_set_group(device, type);
339 if (ret)
340 return ret;
341
342 /*
343 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
344 * restore cache coherency. It has to be checked here because it is only
345 * valid for cases where we are using iommu groups.
346 */
347 if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
348 !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
349 ret = -EINVAL;
350 goto err_out;
351 }
352
353 ret = vfio_device_add(device);
354 if (ret)
355 goto err_out;
356
357 /* Refcounting can't start until the driver calls register */
358 refcount_set(&device->refcount, 1);
359
360 vfio_device_group_register(device);
361 vfio_device_debugfs_init(device);
362
363 return 0;
364 err_out:
365 vfio_device_remove_group(device);
366 return ret;
367 }
368
vfio_register_group_dev(struct vfio_device * device)369 int vfio_register_group_dev(struct vfio_device *device)
370 {
371 return __vfio_register_dev(device, VFIO_IOMMU);
372 }
373 EXPORT_SYMBOL_GPL(vfio_register_group_dev);
374
375 /*
376 * Register a virtual device without IOMMU backing. The user of this
377 * device must not be able to directly trigger unmediated DMA.
378 */
vfio_register_emulated_iommu_dev(struct vfio_device * device)379 int vfio_register_emulated_iommu_dev(struct vfio_device *device)
380 {
381 return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
382 }
383 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
384
385 /*
386 * Decrement the device reference count and wait for the device to be
387 * removed. Open file descriptors for the device... */
vfio_unregister_group_dev(struct vfio_device * device)388 void vfio_unregister_group_dev(struct vfio_device *device)
389 {
390 unsigned int i = 0;
391 bool interrupted = false;
392 long rc;
393
394 /*
395 * Prevent new device opened by userspace via the
396 * VFIO_GROUP_GET_DEVICE_FD in the group path.
397 */
398 vfio_device_group_unregister(device);
399
400 /*
401 * Balances vfio_device_add() in register path, also prevents
402 * new device opened by userspace in the cdev path.
403 */
404 vfio_device_del(device);
405
406 vfio_device_put_registration(device);
407 rc = try_wait_for_completion(&device->comp);
408 while (rc <= 0) {
409 if (device->ops->request)
410 device->ops->request(device, i++);
411
412 if (interrupted) {
413 rc = wait_for_completion_timeout(&device->comp,
414 HZ * 10);
415 } else {
416 rc = wait_for_completion_interruptible_timeout(
417 &device->comp, HZ * 10);
418 if (rc < 0) {
419 interrupted = true;
420 dev_warn(device->dev,
421 "Device is currently in use, task"
422 " \"%s\" (%d) "
423 "blocked until device is released",
424 current->comm, task_pid_nr(current));
425 }
426 }
427 }
428
429 vfio_device_debugfs_exit(device);
430 /* Balances vfio_device_set_group in register path */
431 vfio_device_remove_group(device);
432 }
433 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
434
435 #if IS_ENABLED(CONFIG_KVM)
vfio_device_get_kvm_safe(struct vfio_device * device,struct kvm * kvm)436 void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
437 {
438 void (*pfn)(struct kvm *kvm);
439 bool (*fn)(struct kvm *kvm);
440 bool ret;
441
442 lockdep_assert_held(&device->dev_set->lock);
443
444 if (!kvm)
445 return;
446
447 pfn = symbol_get(kvm_put_kvm);
448 if (WARN_ON(!pfn))
449 return;
450
451 fn = symbol_get(kvm_get_kvm_safe);
452 if (WARN_ON(!fn)) {
453 symbol_put(kvm_put_kvm);
454 return;
455 }
456
457 ret = fn(kvm);
458 symbol_put(kvm_get_kvm_safe);
459 if (!ret) {
460 symbol_put(kvm_put_kvm);
461 return;
462 }
463
464 device->put_kvm = pfn;
465 device->kvm = kvm;
466 }
467
vfio_device_put_kvm(struct vfio_device * device)468 void vfio_device_put_kvm(struct vfio_device *device)
469 {
470 lockdep_assert_held(&device->dev_set->lock);
471
472 if (!device->kvm)
473 return;
474
475 if (WARN_ON(!device->put_kvm))
476 goto clear;
477
478 device->put_kvm(device->kvm);
479 device->put_kvm = NULL;
480 symbol_put(kvm_put_kvm);
481
482 clear:
483 device->kvm = NULL;
484 }
485 #endif
486
487 /* true if the vfio_device has open_device() called but not close_device() */
vfio_assert_device_open(struct vfio_device * device)488 static bool vfio_assert_device_open(struct vfio_device *device)
489 {
490 return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
491 }
492
493 struct vfio_device_file *
vfio_allocate_device_file(struct vfio_device * device)494 vfio_allocate_device_file(struct vfio_device *device)
495 {
496 struct vfio_device_file *df;
497
498 df = kzalloc_obj(*df, GFP_KERNEL_ACCOUNT);
499 if (!df)
500 return ERR_PTR(-ENOMEM);
501
502 df->device = device;
503 spin_lock_init(&df->kvm_ref_lock);
504
505 return df;
506 }
507
vfio_df_device_first_open(struct vfio_device_file * df)508 static int vfio_df_device_first_open(struct vfio_device_file *df)
509 {
510 struct vfio_device *device = df->device;
511 struct iommufd_ctx *iommufd = df->iommufd;
512 int ret;
513
514 lockdep_assert_held(&device->dev_set->lock);
515
516 if (!try_module_get(device->dev->driver->owner))
517 return -ENODEV;
518
519 if (iommufd)
520 ret = vfio_df_iommufd_bind(df);
521 else
522 ret = vfio_device_group_use_iommu(device);
523 if (ret)
524 goto err_module_put;
525
526 if (device->ops->open_device) {
527 ret = device->ops->open_device(device);
528 if (ret)
529 goto err_unuse_iommu;
530 }
531 return 0;
532
533 err_unuse_iommu:
534 if (iommufd)
535 vfio_df_iommufd_unbind(df);
536 else
537 vfio_device_group_unuse_iommu(device);
538 err_module_put:
539 module_put(device->dev->driver->owner);
540 return ret;
541 }
542
vfio_df_device_last_close(struct vfio_device_file * df)543 static void vfio_df_device_last_close(struct vfio_device_file *df)
544 {
545 struct vfio_device *device = df->device;
546 struct iommufd_ctx *iommufd = df->iommufd;
547
548 lockdep_assert_held(&device->dev_set->lock);
549
550 if (device->ops->close_device)
551 device->ops->close_device(device);
552 if (iommufd)
553 vfio_df_iommufd_unbind(df);
554 else
555 vfio_device_group_unuse_iommu(device);
556 module_put(device->dev->driver->owner);
557 }
558
vfio_df_open(struct vfio_device_file * df)559 int vfio_df_open(struct vfio_device_file *df)
560 {
561 struct vfio_device *device = df->device;
562 int ret = 0;
563
564 lockdep_assert_held(&device->dev_set->lock);
565
566 /*
567 * Only the group path allows the device to be opened multiple
568 * times. The device cdev path doesn't have a secure way for it.
569 */
570 if (device->open_count != 0 && !df->group)
571 return -EINVAL;
572
573 device->open_count++;
574 if (device->open_count == 1) {
575 ret = vfio_df_device_first_open(df);
576 if (ret)
577 device->open_count--;
578 }
579
580 return ret;
581 }
582
vfio_df_close(struct vfio_device_file * df)583 void vfio_df_close(struct vfio_device_file *df)
584 {
585 struct vfio_device *device = df->device;
586
587 lockdep_assert_held(&device->dev_set->lock);
588
589 if (!vfio_assert_device_open(device))
590 return;
591 if (device->open_count == 1)
592 vfio_df_device_last_close(df);
593 device->open_count--;
594 }
595
596 /*
597 * Wrapper around pm_runtime_resume_and_get().
598 * Return error code on failure or 0 on success.
599 */
vfio_device_pm_runtime_get(struct vfio_device * device)600 static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
601 {
602 struct device *dev = device->dev;
603
604 if (dev->driver && dev->driver->pm) {
605 int ret;
606
607 ret = pm_runtime_resume_and_get(dev);
608 if (ret) {
609 dev_info_ratelimited(dev,
610 "vfio: runtime resume failed %d\n", ret);
611 return -EIO;
612 }
613 }
614
615 return 0;
616 }
617
618 /*
619 * Wrapper around pm_runtime_put().
620 */
vfio_device_pm_runtime_put(struct vfio_device * device)621 static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
622 {
623 struct device *dev = device->dev;
624
625 if (dev->driver && dev->driver->pm)
626 pm_runtime_put(dev);
627 }
628
629 /*
630 * VFIO Device fd
631 */
vfio_device_fops_release(struct inode * inode,struct file * filep)632 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
633 {
634 struct vfio_device_file *df = filep->private_data;
635 struct vfio_device *device = df->device;
636
637 if (df->group)
638 vfio_df_group_close(df);
639 else
640 vfio_df_unbind_iommufd(df);
641
642 vfio_device_put_registration(device);
643
644 kfree(df);
645
646 return 0;
647 }
648
649 /*
650 * vfio_mig_get_next_state - Compute the next step in the FSM
651 * @cur_fsm - The current state the device is in
652 * @new_fsm - The target state to reach
653 * @next_fsm - Pointer to the next step to get to new_fsm
654 *
655 * Return 0 upon success, otherwise -errno
656 * Upon success the next step in the state progression between cur_fsm and
657 * new_fsm will be set in next_fsm.
658 *
659 * This breaks down requests for combination transitions into smaller steps and
660 * returns the next step to get to new_fsm. The function may need to be called
661 * multiple times before reaching new_fsm.
662 *
663 */
vfio_mig_get_next_state(struct vfio_device * device,enum vfio_device_mig_state cur_fsm,enum vfio_device_mig_state new_fsm,enum vfio_device_mig_state * next_fsm)664 int vfio_mig_get_next_state(struct vfio_device *device,
665 enum vfio_device_mig_state cur_fsm,
666 enum vfio_device_mig_state new_fsm,
667 enum vfio_device_mig_state *next_fsm)
668 {
669 enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 };
670 /*
671 * The coding in this table requires the driver to implement the
672 * following FSM arcs:
673 * RESUMING -> STOP
674 * STOP -> RESUMING
675 * STOP -> STOP_COPY
676 * STOP_COPY -> STOP
677 *
678 * If P2P is supported then the driver must also implement these FSM
679 * arcs:
680 * RUNNING -> RUNNING_P2P
681 * RUNNING_P2P -> RUNNING
682 * RUNNING_P2P -> STOP
683 * STOP -> RUNNING_P2P
684 *
685 * If precopy is supported then the driver must support these additional
686 * FSM arcs:
687 * RUNNING -> PRE_COPY
688 * PRE_COPY -> RUNNING
689 * PRE_COPY -> STOP_COPY
690 * However, if precopy and P2P are supported together then the driver
691 * must support these additional arcs beyond the P2P arcs above:
692 * PRE_COPY -> RUNNING
693 * PRE_COPY -> PRE_COPY_P2P
694 * PRE_COPY_P2P -> PRE_COPY
695 * PRE_COPY_P2P -> RUNNING_P2P
696 * PRE_COPY_P2P -> STOP_COPY
697 * RUNNING -> PRE_COPY
698 * RUNNING_P2P -> PRE_COPY_P2P
699 *
700 * Without P2P and precopy the driver must implement:
701 * RUNNING -> STOP
702 * STOP -> RUNNING
703 *
704 * The coding will step through multiple states for some combination
705 * transitions; if all optional features are supported, this means the
706 * following ones:
707 * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY
708 * PRE_COPY -> RUNNING -> RUNNING_P2P
709 * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP
710 * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING
711 * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING
712 * PRE_COPY_P2P -> RUNNING_P2P -> STOP
713 * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING
714 * RESUMING -> STOP -> RUNNING_P2P
715 * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P
716 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING
717 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
718 * RESUMING -> STOP -> STOP_COPY
719 * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P
720 * RUNNING -> RUNNING_P2P -> STOP
721 * RUNNING -> RUNNING_P2P -> STOP -> RESUMING
722 * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
723 * RUNNING_P2P -> RUNNING -> PRE_COPY
724 * RUNNING_P2P -> STOP -> RESUMING
725 * RUNNING_P2P -> STOP -> STOP_COPY
726 * STOP -> RUNNING_P2P -> PRE_COPY_P2P
727 * STOP -> RUNNING_P2P -> RUNNING
728 * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
729 * STOP_COPY -> STOP -> RESUMING
730 * STOP_COPY -> STOP -> RUNNING_P2P
731 * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
732 *
733 * The following transitions are blocked:
734 * STOP_COPY -> PRE_COPY
735 * STOP_COPY -> PRE_COPY_P2P
736 */
737 static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
738 [VFIO_DEVICE_STATE_STOP] = {
739 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
740 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
741 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
742 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
743 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
744 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
745 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
746 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
747 },
748 [VFIO_DEVICE_STATE_RUNNING] = {
749 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
750 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
751 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
752 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
753 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
754 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
755 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
756 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
757 },
758 [VFIO_DEVICE_STATE_PRE_COPY] = {
759 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING,
760 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
761 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
762 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
763 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
764 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING,
765 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING,
766 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
767 },
768 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = {
769 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
770 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
771 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
772 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
773 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
774 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
775 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
776 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
777 },
778 [VFIO_DEVICE_STATE_STOP_COPY] = {
779 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
780 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
781 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
782 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
783 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
784 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
785 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
786 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
787 },
788 [VFIO_DEVICE_STATE_RESUMING] = {
789 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
790 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
791 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP,
792 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP,
793 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
794 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
795 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
796 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
797 },
798 [VFIO_DEVICE_STATE_RUNNING_P2P] = {
799 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
800 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
801 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING,
802 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
803 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
804 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
805 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
806 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
807 },
808 [VFIO_DEVICE_STATE_ERROR] = {
809 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
810 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
811 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
812 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
813 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
814 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
815 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
816 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
817 },
818 };
819
820 static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
821 [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
822 [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
823 [VFIO_DEVICE_STATE_PRE_COPY] =
824 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY,
825 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY |
826 VFIO_MIGRATION_P2P |
827 VFIO_MIGRATION_PRE_COPY,
828 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
829 [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
830 [VFIO_DEVICE_STATE_RUNNING_P2P] =
831 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
832 [VFIO_DEVICE_STATE_ERROR] = ~0U,
833 };
834
835 if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
836 (state_flags_table[cur_fsm] & device->migration_flags) !=
837 state_flags_table[cur_fsm]))
838 return -EINVAL;
839
840 if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
841 (state_flags_table[new_fsm] & device->migration_flags) !=
842 state_flags_table[new_fsm])
843 return -EINVAL;
844
845 /*
846 * Arcs touching optional and unsupported states are skipped over. The
847 * driver will instead see an arc from the original state to the next
848 * logical state, as per the above comment.
849 */
850 *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
851 while ((state_flags_table[*next_fsm] & device->migration_flags) !=
852 state_flags_table[*next_fsm])
853 *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
854
855 return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
856 }
857 EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
858
859 /*
860 * Convert the drivers's struct file into a FD number and return it to userspace
861 */
vfio_ioct_mig_return_fd(struct file * filp,void __user * arg,struct vfio_device_feature_mig_state * mig)862 static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
863 struct vfio_device_feature_mig_state *mig)
864 {
865 int ret;
866 int fd;
867
868 fd = get_unused_fd_flags(O_CLOEXEC);
869 if (fd < 0) {
870 ret = fd;
871 goto out_fput;
872 }
873
874 mig->data_fd = fd;
875 if (copy_to_user(arg, mig, sizeof(*mig))) {
876 ret = -EFAULT;
877 goto out_put_unused;
878 }
879 fd_install(fd, filp);
880 return 0;
881
882 out_put_unused:
883 put_unused_fd(fd);
884 out_fput:
885 fput(filp);
886 return ret;
887 }
888
889 static int
vfio_ioctl_device_feature_mig_device_state(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)890 vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
891 u32 flags, void __user *arg,
892 size_t argsz)
893 {
894 size_t minsz =
895 offsetofend(struct vfio_device_feature_mig_state, data_fd);
896 struct vfio_device_feature_mig_state mig;
897 struct file *filp = NULL;
898 int ret;
899
900 if (!device->mig_ops)
901 return -ENOTTY;
902
903 ret = vfio_check_feature(flags, argsz,
904 VFIO_DEVICE_FEATURE_SET |
905 VFIO_DEVICE_FEATURE_GET,
906 sizeof(mig));
907 if (ret != 1)
908 return ret;
909
910 if (copy_from_user(&mig, arg, minsz))
911 return -EFAULT;
912
913 if (flags & VFIO_DEVICE_FEATURE_GET) {
914 enum vfio_device_mig_state curr_state;
915
916 ret = device->mig_ops->migration_get_state(device,
917 &curr_state);
918 if (ret)
919 return ret;
920 mig.device_state = curr_state;
921 goto out_copy;
922 }
923
924 /* Handle the VFIO_DEVICE_FEATURE_SET */
925 filp = device->mig_ops->migration_set_state(device, mig.device_state);
926 if (IS_ERR(filp) || !filp)
927 goto out_copy;
928
929 return vfio_ioct_mig_return_fd(filp, arg, &mig);
930 out_copy:
931 mig.data_fd = -1;
932 if (copy_to_user(arg, &mig, sizeof(mig)))
933 return -EFAULT;
934 if (IS_ERR(filp))
935 return PTR_ERR(filp);
936 return 0;
937 }
938
939 static int
vfio_ioctl_device_feature_migration_data_size(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)940 vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
941 u32 flags, void __user *arg,
942 size_t argsz)
943 {
944 struct vfio_device_feature_mig_data_size data_size = {};
945 unsigned long stop_copy_length;
946 int ret;
947
948 if (!device->mig_ops)
949 return -ENOTTY;
950
951 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
952 sizeof(data_size));
953 if (ret != 1)
954 return ret;
955
956 ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length);
957 if (ret)
958 return ret;
959
960 data_size.stop_copy_length = stop_copy_length;
961 if (copy_to_user(arg, &data_size, sizeof(data_size)))
962 return -EFAULT;
963
964 return 0;
965 }
966
vfio_ioctl_device_feature_migration(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)967 static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
968 u32 flags, void __user *arg,
969 size_t argsz)
970 {
971 struct vfio_device_feature_migration mig = {
972 .flags = device->migration_flags,
973 };
974 int ret;
975
976 if (!device->mig_ops)
977 return -ENOTTY;
978
979 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
980 sizeof(mig));
981 if (ret != 1)
982 return ret;
983 if (copy_to_user(arg, &mig, sizeof(mig)))
984 return -EFAULT;
985 return 0;
986 }
987
vfio_combine_iova_ranges(struct rb_root_cached * root,u32 cur_nodes,u32 req_nodes)988 void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
989 u32 req_nodes)
990 {
991 struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
992 unsigned long min_gap, curr_gap;
993
994 /* Special shortcut when a single range is required */
995 if (req_nodes == 1) {
996 unsigned long last;
997
998 comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
999
1000 /* Empty list */
1001 if (WARN_ON_ONCE(!comb_start))
1002 return;
1003
1004 curr = comb_start;
1005 while (curr) {
1006 last = curr->last;
1007 prev = curr;
1008 curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
1009 if (prev != comb_start)
1010 interval_tree_remove(prev, root);
1011 }
1012 comb_start->last = last;
1013 return;
1014 }
1015
1016 /* Combine ranges which have the smallest gap */
1017 while (cur_nodes > req_nodes) {
1018 prev = NULL;
1019 min_gap = ULONG_MAX;
1020 curr = interval_tree_iter_first(root, 0, ULONG_MAX);
1021 while (curr) {
1022 if (prev) {
1023 curr_gap = curr->start - prev->last;
1024 if (curr_gap < min_gap) {
1025 min_gap = curr_gap;
1026 comb_start = prev;
1027 comb_end = curr;
1028 }
1029 }
1030 prev = curr;
1031 curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
1032 }
1033
1034 /* Empty list or no nodes to combine */
1035 if (WARN_ON_ONCE(min_gap == ULONG_MAX))
1036 break;
1037
1038 comb_start->last = comb_end->last;
1039 interval_tree_remove(comb_end, root);
1040 cur_nodes--;
1041 }
1042 }
1043 EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges);
1044
1045 /* Ranges should fit into a single kernel page */
1046 #define LOG_MAX_RANGES \
1047 (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
1048
1049 static int
vfio_ioctl_device_feature_logging_start(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)1050 vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
1051 u32 flags, void __user *arg,
1052 size_t argsz)
1053 {
1054 size_t minsz =
1055 offsetofend(struct vfio_device_feature_dma_logging_control,
1056 ranges);
1057 struct vfio_device_feature_dma_logging_range __user *ranges;
1058 struct vfio_device_feature_dma_logging_control control;
1059 struct vfio_device_feature_dma_logging_range range;
1060 struct rb_root_cached root = RB_ROOT_CACHED;
1061 struct interval_tree_node *nodes;
1062 u64 iova_end;
1063 u32 nnodes;
1064 int i, ret;
1065
1066 if (!device->log_ops)
1067 return -ENOTTY;
1068
1069 ret = vfio_check_feature(flags, argsz,
1070 VFIO_DEVICE_FEATURE_SET,
1071 sizeof(control));
1072 if (ret != 1)
1073 return ret;
1074
1075 if (copy_from_user(&control, arg, minsz))
1076 return -EFAULT;
1077
1078 nnodes = control.num_ranges;
1079 if (!nnodes)
1080 return -EINVAL;
1081
1082 if (nnodes > LOG_MAX_RANGES)
1083 return -E2BIG;
1084
1085 ranges = u64_to_user_ptr(control.ranges);
1086 nodes = kmalloc_objs(struct interval_tree_node, nnodes);
1087 if (!nodes)
1088 return -ENOMEM;
1089
1090 for (i = 0; i < nnodes; i++) {
1091 if (copy_from_user(&range, &ranges[i], sizeof(range))) {
1092 ret = -EFAULT;
1093 goto end;
1094 }
1095 if (!IS_ALIGNED(range.iova, control.page_size) ||
1096 !IS_ALIGNED(range.length, control.page_size)) {
1097 ret = -EINVAL;
1098 goto end;
1099 }
1100
1101 if (check_add_overflow(range.iova, range.length, &iova_end) ||
1102 iova_end > ULONG_MAX) {
1103 ret = -EOVERFLOW;
1104 goto end;
1105 }
1106
1107 nodes[i].start = range.iova;
1108 nodes[i].last = range.iova + range.length - 1;
1109 if (interval_tree_iter_first(&root, nodes[i].start,
1110 nodes[i].last)) {
1111 /* Range overlapping */
1112 ret = -EINVAL;
1113 goto end;
1114 }
1115 interval_tree_insert(nodes + i, &root);
1116 }
1117
1118 ret = device->log_ops->log_start(device, &root, nnodes,
1119 &control.page_size);
1120 if (ret)
1121 goto end;
1122
1123 if (copy_to_user(arg, &control, sizeof(control))) {
1124 ret = -EFAULT;
1125 device->log_ops->log_stop(device);
1126 }
1127
1128 end:
1129 kfree(nodes);
1130 return ret;
1131 }
1132
1133 static int
vfio_ioctl_device_feature_logging_stop(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)1134 vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
1135 u32 flags, void __user *arg,
1136 size_t argsz)
1137 {
1138 int ret;
1139
1140 if (!device->log_ops)
1141 return -ENOTTY;
1142
1143 ret = vfio_check_feature(flags, argsz,
1144 VFIO_DEVICE_FEATURE_SET, 0);
1145 if (ret != 1)
1146 return ret;
1147
1148 return device->log_ops->log_stop(device);
1149 }
1150
vfio_device_log_read_and_clear(struct iova_bitmap * iter,unsigned long iova,size_t length,void * opaque)1151 static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
1152 unsigned long iova, size_t length,
1153 void *opaque)
1154 {
1155 struct vfio_device *device = opaque;
1156
1157 return device->log_ops->log_read_and_clear(device, iova, length, iter);
1158 }
1159
1160 static int
vfio_ioctl_device_feature_logging_report(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)1161 vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
1162 u32 flags, void __user *arg,
1163 size_t argsz)
1164 {
1165 size_t minsz =
1166 offsetofend(struct vfio_device_feature_dma_logging_report,
1167 bitmap);
1168 struct vfio_device_feature_dma_logging_report report;
1169 struct iova_bitmap *iter;
1170 u64 iova_end;
1171 int ret;
1172
1173 if (!device->log_ops)
1174 return -ENOTTY;
1175
1176 ret = vfio_check_feature(flags, argsz,
1177 VFIO_DEVICE_FEATURE_GET,
1178 sizeof(report));
1179 if (ret != 1)
1180 return ret;
1181
1182 if (copy_from_user(&report, arg, minsz))
1183 return -EFAULT;
1184
1185 if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
1186 return -EINVAL;
1187
1188 if (check_add_overflow(report.iova, report.length, &iova_end) ||
1189 iova_end > ULONG_MAX)
1190 return -EOVERFLOW;
1191
1192 iter = iova_bitmap_alloc(report.iova, report.length,
1193 report.page_size,
1194 u64_to_user_ptr(report.bitmap));
1195 if (IS_ERR(iter))
1196 return PTR_ERR(iter);
1197
1198 ret = iova_bitmap_for_each(iter, device,
1199 vfio_device_log_read_and_clear);
1200
1201 iova_bitmap_free(iter);
1202 return ret;
1203 }
1204
vfio_ioctl_device_feature(struct vfio_device * device,struct vfio_device_feature __user * arg)1205 static int vfio_ioctl_device_feature(struct vfio_device *device,
1206 struct vfio_device_feature __user *arg)
1207 {
1208 size_t minsz = offsetofend(struct vfio_device_feature, flags);
1209 struct vfio_device_feature feature;
1210
1211 if (copy_from_user(&feature, arg, minsz))
1212 return -EFAULT;
1213
1214 if (feature.argsz < minsz)
1215 return -EINVAL;
1216
1217 /* Check unknown flags */
1218 if (feature.flags &
1219 ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
1220 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
1221 return -EINVAL;
1222
1223 /* GET & SET are mutually exclusive except with PROBE */
1224 if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
1225 (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
1226 (feature.flags & VFIO_DEVICE_FEATURE_GET))
1227 return -EINVAL;
1228
1229 switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
1230 case VFIO_DEVICE_FEATURE_MIGRATION:
1231 return vfio_ioctl_device_feature_migration(
1232 device, feature.flags, arg->data,
1233 feature.argsz - minsz);
1234 case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
1235 return vfio_ioctl_device_feature_mig_device_state(
1236 device, feature.flags, arg->data,
1237 feature.argsz - minsz);
1238 case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
1239 return vfio_ioctl_device_feature_logging_start(
1240 device, feature.flags, arg->data,
1241 feature.argsz - minsz);
1242 case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
1243 return vfio_ioctl_device_feature_logging_stop(
1244 device, feature.flags, arg->data,
1245 feature.argsz - minsz);
1246 case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
1247 return vfio_ioctl_device_feature_logging_report(
1248 device, feature.flags, arg->data,
1249 feature.argsz - minsz);
1250 case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE:
1251 return vfio_ioctl_device_feature_migration_data_size(
1252 device, feature.flags, arg->data,
1253 feature.argsz - minsz);
1254 default:
1255 if (unlikely(!device->ops->device_feature))
1256 return -ENOTTY;
1257 return device->ops->device_feature(device, feature.flags,
1258 arg->data,
1259 feature.argsz - minsz);
1260 }
1261 }
1262
vfio_get_region_info(struct vfio_device * device,struct vfio_region_info __user * arg)1263 static long vfio_get_region_info(struct vfio_device *device,
1264 struct vfio_region_info __user *arg)
1265 {
1266 unsigned long minsz = offsetofend(struct vfio_region_info, offset);
1267 struct vfio_region_info info = {};
1268 struct vfio_info_cap caps = {};
1269 int ret;
1270
1271 if (unlikely(!device->ops->get_region_info_caps))
1272 return -EINVAL;
1273
1274 if (copy_from_user(&info, arg, minsz))
1275 return -EFAULT;
1276 if (info.argsz < minsz)
1277 return -EINVAL;
1278
1279 ret = device->ops->get_region_info_caps(device, &info, &caps);
1280 if (ret)
1281 goto out_free;
1282
1283 if (caps.size) {
1284 info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1285 if (info.argsz < sizeof(info) + caps.size) {
1286 info.argsz = sizeof(info) + caps.size;
1287 info.cap_offset = 0;
1288 } else {
1289 vfio_info_cap_shift(&caps, sizeof(info));
1290 if (copy_to_user(arg + 1, caps.buf, caps.size)) {
1291 ret = -EFAULT;
1292 goto out_free;
1293 }
1294 info.cap_offset = sizeof(info);
1295 }
1296 }
1297
1298 if (copy_to_user(arg, &info, minsz)){
1299 ret = -EFAULT;
1300 goto out_free;
1301 }
1302
1303 out_free:
1304 kfree(caps.buf);
1305 return ret;
1306 }
1307
vfio_device_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)1308 static long vfio_device_fops_unl_ioctl(struct file *filep,
1309 unsigned int cmd, unsigned long arg)
1310 {
1311 struct vfio_device_file *df = filep->private_data;
1312 struct vfio_device *device = df->device;
1313 void __user *uptr = (void __user *)arg;
1314 int ret;
1315
1316 if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
1317 return vfio_df_ioctl_bind_iommufd(df, uptr);
1318
1319 /* Paired with smp_store_release() following vfio_df_open() */
1320 if (!smp_load_acquire(&df->access_granted))
1321 return -EINVAL;
1322
1323 ret = vfio_device_pm_runtime_get(device);
1324 if (ret)
1325 return ret;
1326
1327 /* cdev only ioctls */
1328 if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) {
1329 switch (cmd) {
1330 case VFIO_DEVICE_ATTACH_IOMMUFD_PT:
1331 ret = vfio_df_ioctl_attach_pt(df, uptr);
1332 goto out;
1333
1334 case VFIO_DEVICE_DETACH_IOMMUFD_PT:
1335 ret = vfio_df_ioctl_detach_pt(df, uptr);
1336 goto out;
1337 }
1338 }
1339
1340 switch (cmd) {
1341 case VFIO_DEVICE_FEATURE:
1342 ret = vfio_ioctl_device_feature(device, uptr);
1343 break;
1344
1345 case VFIO_DEVICE_GET_REGION_INFO:
1346 ret = vfio_get_region_info(device, uptr);
1347 break;
1348
1349 default:
1350 if (unlikely(!device->ops->ioctl))
1351 ret = -EINVAL;
1352 else
1353 ret = device->ops->ioctl(device, cmd, arg);
1354 break;
1355 }
1356 out:
1357 vfio_device_pm_runtime_put(device);
1358 return ret;
1359 }
1360
vfio_device_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)1361 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1362 size_t count, loff_t *ppos)
1363 {
1364 struct vfio_device_file *df = filep->private_data;
1365 struct vfio_device *device = df->device;
1366
1367 /* Paired with smp_store_release() following vfio_df_open() */
1368 if (!smp_load_acquire(&df->access_granted))
1369 return -EINVAL;
1370
1371 if (unlikely(!device->ops->read))
1372 return -EINVAL;
1373
1374 return device->ops->read(device, buf, count, ppos);
1375 }
1376
vfio_device_fops_write(struct file * filep,const char __user * buf,size_t count,loff_t * ppos)1377 static ssize_t vfio_device_fops_write(struct file *filep,
1378 const char __user *buf,
1379 size_t count, loff_t *ppos)
1380 {
1381 struct vfio_device_file *df = filep->private_data;
1382 struct vfio_device *device = df->device;
1383
1384 /* Paired with smp_store_release() following vfio_df_open() */
1385 if (!smp_load_acquire(&df->access_granted))
1386 return -EINVAL;
1387
1388 if (unlikely(!device->ops->write))
1389 return -EINVAL;
1390
1391 return device->ops->write(device, buf, count, ppos);
1392 }
1393
vfio_device_fops_mmap(struct file * filep,struct vm_area_struct * vma)1394 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1395 {
1396 struct vfio_device_file *df = filep->private_data;
1397 struct vfio_device *device = df->device;
1398
1399 /* Paired with smp_store_release() following vfio_df_open() */
1400 if (!smp_load_acquire(&df->access_granted))
1401 return -EINVAL;
1402
1403 if (unlikely(!device->ops->mmap))
1404 return -EINVAL;
1405
1406 return device->ops->mmap(device, vma);
1407 }
1408
1409 #ifdef CONFIG_PROC_FS
vfio_device_show_fdinfo(struct seq_file * m,struct file * filep)1410 static void vfio_device_show_fdinfo(struct seq_file *m, struct file *filep)
1411 {
1412 char *path;
1413 struct vfio_device_file *df = filep->private_data;
1414 struct vfio_device *device = df->device;
1415
1416 path = kobject_get_path(&device->dev->kobj, GFP_KERNEL);
1417 if (!path)
1418 return;
1419
1420 seq_printf(m, "vfio-device-syspath: /sys%s\n", path);
1421 kfree(path);
1422 }
1423 #endif
1424
1425 const struct file_operations vfio_device_fops = {
1426 .owner = THIS_MODULE,
1427 .open = vfio_device_fops_cdev_open,
1428 .release = vfio_device_fops_release,
1429 .read = vfio_device_fops_read,
1430 .write = vfio_device_fops_write,
1431 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1432 .compat_ioctl = compat_ptr_ioctl,
1433 .mmap = vfio_device_fops_mmap,
1434 #ifdef CONFIG_PROC_FS
1435 .show_fdinfo = vfio_device_show_fdinfo,
1436 #endif
1437 };
1438
vfio_device_from_file(struct file * file)1439 static struct vfio_device *vfio_device_from_file(struct file *file)
1440 {
1441 struct vfio_device_file *df = file->private_data;
1442
1443 if (file->f_op != &vfio_device_fops)
1444 return NULL;
1445 return df->device;
1446 }
1447
1448 /**
1449 * vfio_file_is_valid - True if the file is valid vfio file
1450 * @file: VFIO group file or VFIO device file
1451 */
vfio_file_is_valid(struct file * file)1452 bool vfio_file_is_valid(struct file *file)
1453 {
1454 return vfio_group_from_file(file) ||
1455 vfio_device_from_file(file);
1456 }
1457 EXPORT_SYMBOL_GPL(vfio_file_is_valid);
1458
1459 /**
1460 * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
1461 * is always CPU cache coherent
1462 * @file: VFIO group file or VFIO device file
1463 *
1464 * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
1465 * bit in DMA transactions. A return of false indicates that the user has
1466 * rights to access additional instructions such as wbinvd on x86.
1467 */
vfio_file_enforced_coherent(struct file * file)1468 bool vfio_file_enforced_coherent(struct file *file)
1469 {
1470 struct vfio_device *device;
1471 struct vfio_group *group;
1472
1473 group = vfio_group_from_file(file);
1474 if (group)
1475 return vfio_group_enforced_coherent(group);
1476
1477 device = vfio_device_from_file(file);
1478 if (device)
1479 return device_iommu_capable(device->dev,
1480 IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
1481
1482 return true;
1483 }
1484 EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
1485
vfio_device_file_set_kvm(struct file * file,struct kvm * kvm)1486 static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
1487 {
1488 struct vfio_device_file *df = file->private_data;
1489
1490 /*
1491 * The kvm is first recorded in the vfio_device_file, and will
1492 * be propagated to vfio_device::kvm when the file is bound to
1493 * iommufd successfully in the vfio device cdev path.
1494 */
1495 spin_lock(&df->kvm_ref_lock);
1496 df->kvm = kvm;
1497 spin_unlock(&df->kvm_ref_lock);
1498 }
1499
1500 /**
1501 * vfio_file_set_kvm - Link a kvm with VFIO drivers
1502 * @file: VFIO group file or VFIO device file
1503 * @kvm: KVM to link
1504 *
1505 * When a VFIO device is first opened the KVM will be available in
1506 * device->kvm if one was associated with the file.
1507 */
vfio_file_set_kvm(struct file * file,struct kvm * kvm)1508 void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
1509 {
1510 struct vfio_group *group;
1511
1512 group = vfio_group_from_file(file);
1513 if (group)
1514 vfio_group_set_kvm(group, kvm);
1515
1516 if (vfio_device_from_file(file))
1517 vfio_device_file_set_kvm(file, kvm);
1518 }
1519 EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
1520
1521 /*
1522 * Sub-module support
1523 */
1524 /*
1525 * Helper for managing a buffer of info chain capabilities, allocate or
1526 * reallocate a buffer with additional @size, filling in @id and @version
1527 * of the capability. A pointer to the new capability is returned.
1528 *
1529 * NB. The chain is based at the head of the buffer, so new entries are
1530 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1531 * next offsets prior to copying to the user buffer.
1532 */
vfio_info_cap_add(struct vfio_info_cap * caps,size_t size,u16 id,u16 version)1533 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1534 size_t size, u16 id, u16 version)
1535 {
1536 void *buf;
1537 struct vfio_info_cap_header *header, *tmp;
1538
1539 /* Ensure that the next capability struct will be aligned */
1540 size = ALIGN(size, sizeof(u64));
1541
1542 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1543 if (!buf) {
1544 kfree(caps->buf);
1545 caps->buf = NULL;
1546 caps->size = 0;
1547 return ERR_PTR(-ENOMEM);
1548 }
1549
1550 caps->buf = buf;
1551 header = buf + caps->size;
1552
1553 /* Eventually copied to user buffer, zero */
1554 memset(header, 0, size);
1555
1556 header->id = id;
1557 header->version = version;
1558
1559 /* Add to the end of the capability chain */
1560 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1561 ; /* nothing */
1562
1563 tmp->next = caps->size;
1564 caps->size += size;
1565
1566 return header;
1567 }
1568 EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1569
vfio_info_cap_shift(struct vfio_info_cap * caps,size_t offset)1570 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1571 {
1572 struct vfio_info_cap_header *tmp;
1573 void *buf = (void *)caps->buf;
1574
1575 /* Capability structs should start with proper alignment */
1576 WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
1577
1578 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1579 tmp->next += offset;
1580 }
1581 EXPORT_SYMBOL(vfio_info_cap_shift);
1582
vfio_info_add_capability(struct vfio_info_cap * caps,struct vfio_info_cap_header * cap,size_t size)1583 int vfio_info_add_capability(struct vfio_info_cap *caps,
1584 struct vfio_info_cap_header *cap, size_t size)
1585 {
1586 struct vfio_info_cap_header *header;
1587
1588 header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1589 if (IS_ERR(header))
1590 return PTR_ERR(header);
1591
1592 memcpy(header + 1, cap + 1, size - sizeof(*header));
1593
1594 return 0;
1595 }
1596 EXPORT_SYMBOL(vfio_info_add_capability);
1597
vfio_set_irqs_validate_and_prepare(struct vfio_irq_set * hdr,int num_irqs,int max_irq_type,size_t * data_size)1598 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1599 int max_irq_type, size_t *data_size)
1600 {
1601 unsigned long minsz;
1602 size_t size;
1603
1604 minsz = offsetofend(struct vfio_irq_set, count);
1605
1606 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1607 (hdr->count >= (U32_MAX - hdr->start)) ||
1608 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1609 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1610 return -EINVAL;
1611
1612 if (data_size)
1613 *data_size = 0;
1614
1615 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1616 return -EINVAL;
1617
1618 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1619 case VFIO_IRQ_SET_DATA_NONE:
1620 size = 0;
1621 break;
1622 case VFIO_IRQ_SET_DATA_BOOL:
1623 size = sizeof(uint8_t);
1624 break;
1625 case VFIO_IRQ_SET_DATA_EVENTFD:
1626 size = sizeof(int32_t);
1627 break;
1628 default:
1629 return -EINVAL;
1630 }
1631
1632 if (size) {
1633 if (hdr->argsz - minsz < hdr->count * size)
1634 return -EINVAL;
1635
1636 if (!data_size)
1637 return -EINVAL;
1638
1639 *data_size = hdr->count * size;
1640 }
1641
1642 return 0;
1643 }
1644 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1645
1646 /*
1647 * Pin contiguous user pages and return their associated host pages for local
1648 * domain only.
1649 * @device [in] : device
1650 * @iova [in] : starting IOVA of user pages to be pinned.
1651 * @npage [in] : count of pages to be pinned. This count should not
1652 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1653 * @prot [in] : protection flags
1654 * @pages[out] : array of host pages
1655 * Return error or number of pages pinned.
1656 *
1657 * A driver may only call this function if the vfio_device was created
1658 * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
1659 */
vfio_pin_pages(struct vfio_device * device,dma_addr_t iova,int npage,int prot,struct page ** pages)1660 int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
1661 int npage, int prot, struct page **pages)
1662 {
1663 /* group->container cannot change while a vfio device is open */
1664 if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
1665 return -EINVAL;
1666 if (!device->ops->dma_unmap)
1667 return -EINVAL;
1668 if (vfio_device_has_container(device))
1669 return vfio_device_container_pin_pages(device, iova,
1670 npage, prot, pages);
1671 if (device->iommufd_access) {
1672 int ret;
1673
1674 if (iova > ULONG_MAX)
1675 return -EINVAL;
1676 /*
1677 * VFIO ignores the sub page offset, npages is from the start of
1678 * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
1679 * the sub page offset by doing:
1680 * pages[0] + (iova % PAGE_SIZE)
1681 */
1682 ret = iommufd_access_pin_pages(
1683 device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
1684 npage * PAGE_SIZE, pages,
1685 (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
1686 if (ret)
1687 return ret;
1688 return npage;
1689 }
1690 return -EINVAL;
1691 }
1692 EXPORT_SYMBOL(vfio_pin_pages);
1693
1694 /*
1695 * Unpin contiguous host pages for local domain only.
1696 * @device [in] : device
1697 * @iova [in] : starting address of user pages to be unpinned.
1698 * @npage [in] : count of pages to be unpinned. This count should not
1699 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1700 */
vfio_unpin_pages(struct vfio_device * device,dma_addr_t iova,int npage)1701 void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
1702 {
1703 if (WARN_ON(!vfio_assert_device_open(device)))
1704 return;
1705 if (WARN_ON(!device->ops->dma_unmap))
1706 return;
1707
1708 if (vfio_device_has_container(device)) {
1709 vfio_device_container_unpin_pages(device, iova, npage);
1710 return;
1711 }
1712 if (device->iommufd_access) {
1713 if (WARN_ON(iova > ULONG_MAX))
1714 return;
1715 iommufd_access_unpin_pages(device->iommufd_access,
1716 ALIGN_DOWN(iova, PAGE_SIZE),
1717 npage * PAGE_SIZE);
1718 return;
1719 }
1720 }
1721 EXPORT_SYMBOL(vfio_unpin_pages);
1722
1723 /*
1724 * This interface allows the CPUs to perform some sort of virtual DMA on
1725 * behalf of the device.
1726 *
1727 * CPUs read/write from/into a range of IOVAs pointing to user space memory
1728 * into/from a kernel buffer.
1729 *
1730 * As the read/write of user space memory is conducted via the CPUs and is
1731 * not a real device DMA, it is not necessary to pin the user space memory.
1732 *
1733 * @device [in] : VFIO device
1734 * @iova [in] : base IOVA of a user space buffer
1735 * @data [in] : pointer to kernel buffer
1736 * @len [in] : kernel buffer length
1737 * @write : indicate read or write
1738 * Return error code on failure or 0 on success.
1739 */
vfio_dma_rw(struct vfio_device * device,dma_addr_t iova,void * data,size_t len,bool write)1740 int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
1741 size_t len, bool write)
1742 {
1743 if (!data || len <= 0 || !vfio_assert_device_open(device))
1744 return -EINVAL;
1745
1746 if (vfio_device_has_container(device))
1747 return vfio_device_container_dma_rw(device, iova,
1748 data, len, write);
1749
1750 if (device->iommufd_access) {
1751 unsigned int flags = 0;
1752
1753 if (iova > ULONG_MAX)
1754 return -EINVAL;
1755
1756 /* VFIO historically tries to auto-detect a kthread */
1757 if (!current->mm)
1758 flags |= IOMMUFD_ACCESS_RW_KTHREAD;
1759 if (write)
1760 flags |= IOMMUFD_ACCESS_RW_WRITE;
1761 return iommufd_access_rw(device->iommufd_access, iova, data,
1762 len, flags);
1763 }
1764 return -EINVAL;
1765 }
1766 EXPORT_SYMBOL(vfio_dma_rw);
1767
1768 /*
1769 * Module/class support
1770 */
vfio_init(void)1771 static int __init vfio_init(void)
1772 {
1773 int ret;
1774
1775 ida_init(&vfio.device_ida);
1776
1777 ret = vfio_group_init();
1778 if (ret)
1779 return ret;
1780
1781 ret = vfio_virqfd_init();
1782 if (ret)
1783 goto err_virqfd;
1784
1785 /* /sys/class/vfio-dev/vfioX */
1786 vfio.device_class = class_create("vfio-dev");
1787 if (IS_ERR(vfio.device_class)) {
1788 ret = PTR_ERR(vfio.device_class);
1789 goto err_dev_class;
1790 }
1791
1792 ret = vfio_cdev_init(vfio.device_class);
1793 if (ret)
1794 goto err_alloc_dev_chrdev;
1795
1796 vfio_debugfs_create_root();
1797 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1798 return 0;
1799
1800 err_alloc_dev_chrdev:
1801 class_destroy(vfio.device_class);
1802 vfio.device_class = NULL;
1803 err_dev_class:
1804 vfio_virqfd_exit();
1805 err_virqfd:
1806 vfio_group_cleanup();
1807 return ret;
1808 }
1809
vfio_cleanup(void)1810 static void __exit vfio_cleanup(void)
1811 {
1812 vfio_debugfs_remove_root();
1813 ida_destroy(&vfio.device_ida);
1814 vfio_cdev_cleanup();
1815 class_destroy(vfio.device_class);
1816 vfio.device_class = NULL;
1817 vfio_virqfd_exit();
1818 vfio_group_cleanup();
1819 xa_destroy(&vfio_device_set_xa);
1820 }
1821
1822 module_init(vfio_init);
1823 module_exit(vfio_cleanup);
1824
1825 MODULE_IMPORT_NS("IOMMUFD");
1826 MODULE_VERSION(DRIVER_VERSION);
1827 MODULE_LICENSE("GPL v2");
1828 MODULE_AUTHOR(DRIVER_AUTHOR);
1829 MODULE_DESCRIPTION(DRIVER_DESC);
1830 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
1831