1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34
35 #include "iova_domain.h"
36
37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
38 #define DRV_DESC "vDPA Device in Userspace"
39 #define DRV_LICENSE "GPL v2"
40
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48
49 #define IRQ_UNBOUND -1
50
51 struct vduse_virtqueue {
52 u16 index;
53 u16 num_max;
54 u32 num;
55 u64 desc_addr;
56 u64 driver_addr;
57 u64 device_addr;
58 struct vdpa_vq_state state;
59 bool ready;
60 bool kicked;
61 spinlock_t kick_lock;
62 spinlock_t irq_lock;
63 struct eventfd_ctx *kickfd;
64 struct vdpa_callback cb;
65 struct work_struct inject;
66 struct work_struct kick;
67 int irq_effective_cpu;
68 struct cpumask irq_affinity;
69 struct kobject kobj;
70 };
71
72 struct vduse_dev;
73
74 struct vduse_vdpa {
75 struct vdpa_device vdpa;
76 struct vduse_dev *dev;
77 };
78
79 struct vduse_umem {
80 unsigned long iova;
81 unsigned long npages;
82 struct page **pages;
83 struct mm_struct *mm;
84 };
85
86 struct vduse_dev {
87 struct vduse_vdpa *vdev;
88 struct device *dev;
89 struct vduse_virtqueue **vqs;
90 struct vduse_iova_domain *domain;
91 char *name;
92 struct mutex lock;
93 spinlock_t msg_lock;
94 u64 msg_unique;
95 u32 msg_timeout;
96 wait_queue_head_t waitq;
97 struct list_head send_list;
98 struct list_head recv_list;
99 struct vdpa_callback config_cb;
100 struct work_struct inject;
101 spinlock_t irq_lock;
102 struct rw_semaphore rwsem;
103 int minor;
104 bool broken;
105 bool connected;
106 u64 api_version;
107 u64 device_features;
108 u64 driver_features;
109 u32 device_id;
110 u32 vendor_id;
111 u32 generation;
112 u32 config_size;
113 void *config;
114 u8 status;
115 u32 vq_num;
116 u32 vq_align;
117 struct vduse_umem *umem;
118 struct mutex mem_lock;
119 unsigned int bounce_size;
120 struct mutex domain_lock;
121 };
122
123 struct vduse_dev_msg {
124 struct vduse_dev_request req;
125 struct vduse_dev_response resp;
126 struct list_head list;
127 wait_queue_head_t waitq;
128 bool completed;
129 };
130
131 struct vduse_control {
132 u64 api_version;
133 };
134
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137
138 static dev_t vduse_major;
139 static struct cdev vduse_ctrl_cdev;
140 static struct cdev vduse_cdev;
141 static struct workqueue_struct *vduse_irq_wq;
142 static struct workqueue_struct *vduse_irq_bound_wq;
143
144 static u32 allowed_device_id[] = {
145 VIRTIO_ID_BLOCK,
146 VIRTIO_ID_NET,
147 VIRTIO_ID_FS,
148 };
149
vdpa_to_vduse(struct vdpa_device * vdpa)150 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
151 {
152 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
153
154 return vdev->dev;
155 }
156
dev_to_vduse(struct device * dev)157 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
158 {
159 struct vdpa_device *vdpa = dev_to_vdpa(dev);
160
161 return vdpa_to_vduse(vdpa);
162 }
163
vduse_find_msg(struct list_head * head,uint32_t request_id)164 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
165 uint32_t request_id)
166 {
167 struct vduse_dev_msg *msg;
168
169 list_for_each_entry(msg, head, list) {
170 if (msg->req.request_id == request_id) {
171 list_del(&msg->list);
172 return msg;
173 }
174 }
175
176 return NULL;
177 }
178
vduse_dequeue_msg(struct list_head * head)179 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
180 {
181 struct vduse_dev_msg *msg = NULL;
182
183 if (!list_empty(head)) {
184 msg = list_first_entry(head, struct vduse_dev_msg, list);
185 list_del(&msg->list);
186 }
187
188 return msg;
189 }
190
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)191 static void vduse_enqueue_msg(struct list_head *head,
192 struct vduse_dev_msg *msg)
193 {
194 list_add_tail(&msg->list, head);
195 }
196
vduse_dev_broken(struct vduse_dev * dev)197 static void vduse_dev_broken(struct vduse_dev *dev)
198 {
199 struct vduse_dev_msg *msg, *tmp;
200
201 if (unlikely(dev->broken))
202 return;
203
204 list_splice_init(&dev->recv_list, &dev->send_list);
205 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
206 list_del(&msg->list);
207 msg->completed = 1;
208 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
209 wake_up(&msg->waitq);
210 }
211 dev->broken = true;
212 wake_up(&dev->waitq);
213 }
214
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)215 static int vduse_dev_msg_sync(struct vduse_dev *dev,
216 struct vduse_dev_msg *msg)
217 {
218 int ret;
219
220 if (unlikely(dev->broken))
221 return -EIO;
222
223 init_waitqueue_head(&msg->waitq);
224 spin_lock(&dev->msg_lock);
225 if (unlikely(dev->broken)) {
226 spin_unlock(&dev->msg_lock);
227 return -EIO;
228 }
229 msg->req.request_id = dev->msg_unique++;
230 vduse_enqueue_msg(&dev->send_list, msg);
231 wake_up(&dev->waitq);
232 spin_unlock(&dev->msg_lock);
233 if (dev->msg_timeout)
234 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
235 (long)dev->msg_timeout * HZ);
236 else
237 ret = wait_event_killable(msg->waitq, msg->completed);
238
239 spin_lock(&dev->msg_lock);
240 if (!msg->completed) {
241 list_del(&msg->list);
242 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
243 /* Mark the device as malfunction when there is a timeout */
244 if (!ret)
245 vduse_dev_broken(dev);
246 }
247 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
248 spin_unlock(&dev->msg_lock);
249
250 return ret;
251 }
252
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)253 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
254 struct vduse_virtqueue *vq,
255 struct vdpa_vq_state_packed *packed)
256 {
257 struct vduse_dev_msg msg = { 0 };
258 int ret;
259
260 msg.req.type = VDUSE_GET_VQ_STATE;
261 msg.req.vq_state.index = vq->index;
262
263 ret = vduse_dev_msg_sync(dev, &msg);
264 if (ret)
265 return ret;
266
267 packed->last_avail_counter =
268 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
269 packed->last_avail_idx =
270 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
271 packed->last_used_counter =
272 msg.resp.vq_state.packed.last_used_counter & 0x0001;
273 packed->last_used_idx =
274 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
275
276 return 0;
277 }
278
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)279 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
280 struct vduse_virtqueue *vq,
281 struct vdpa_vq_state_split *split)
282 {
283 struct vduse_dev_msg msg = { 0 };
284 int ret;
285
286 msg.req.type = VDUSE_GET_VQ_STATE;
287 msg.req.vq_state.index = vq->index;
288
289 ret = vduse_dev_msg_sync(dev, &msg);
290 if (ret)
291 return ret;
292
293 split->avail_index = msg.resp.vq_state.split.avail_index;
294
295 return 0;
296 }
297
vduse_dev_set_status(struct vduse_dev * dev,u8 status)298 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
299 {
300 struct vduse_dev_msg msg = { 0 };
301
302 msg.req.type = VDUSE_SET_STATUS;
303 msg.req.s.status = status;
304
305 return vduse_dev_msg_sync(dev, &msg);
306 }
307
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)308 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
309 u64 start, u64 last)
310 {
311 struct vduse_dev_msg msg = { 0 };
312
313 if (last < start)
314 return -EINVAL;
315
316 msg.req.type = VDUSE_UPDATE_IOTLB;
317 msg.req.iova.start = start;
318 msg.req.iova.last = last;
319
320 return vduse_dev_msg_sync(dev, &msg);
321 }
322
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)323 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
324 {
325 struct file *file = iocb->ki_filp;
326 struct vduse_dev *dev = file->private_data;
327 struct vduse_dev_msg *msg;
328 int size = sizeof(struct vduse_dev_request);
329 ssize_t ret;
330
331 if (iov_iter_count(to) < size)
332 return -EINVAL;
333
334 spin_lock(&dev->msg_lock);
335 while (1) {
336 msg = vduse_dequeue_msg(&dev->send_list);
337 if (msg)
338 break;
339
340 ret = -EAGAIN;
341 if (file->f_flags & O_NONBLOCK)
342 goto unlock;
343
344 spin_unlock(&dev->msg_lock);
345 ret = wait_event_interruptible_exclusive(dev->waitq,
346 !list_empty(&dev->send_list));
347 if (ret)
348 return ret;
349
350 spin_lock(&dev->msg_lock);
351 }
352 spin_unlock(&dev->msg_lock);
353 ret = copy_to_iter(&msg->req, size, to);
354 spin_lock(&dev->msg_lock);
355 if (ret != size) {
356 ret = -EFAULT;
357 vduse_enqueue_msg(&dev->send_list, msg);
358 goto unlock;
359 }
360 vduse_enqueue_msg(&dev->recv_list, msg);
361 unlock:
362 spin_unlock(&dev->msg_lock);
363
364 return ret;
365 }
366
is_mem_zero(const char * ptr,int size)367 static bool is_mem_zero(const char *ptr, int size)
368 {
369 int i;
370
371 for (i = 0; i < size; i++) {
372 if (ptr[i])
373 return false;
374 }
375 return true;
376 }
377
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)378 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
379 {
380 struct file *file = iocb->ki_filp;
381 struct vduse_dev *dev = file->private_data;
382 struct vduse_dev_response resp;
383 struct vduse_dev_msg *msg;
384 size_t ret;
385
386 ret = copy_from_iter(&resp, sizeof(resp), from);
387 if (ret != sizeof(resp))
388 return -EINVAL;
389
390 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
391 return -EINVAL;
392
393 spin_lock(&dev->msg_lock);
394 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
395 if (!msg) {
396 ret = -ENOENT;
397 goto unlock;
398 }
399
400 memcpy(&msg->resp, &resp, sizeof(resp));
401 msg->completed = 1;
402 wake_up(&msg->waitq);
403 unlock:
404 spin_unlock(&dev->msg_lock);
405
406 return ret;
407 }
408
vduse_dev_poll(struct file * file,poll_table * wait)409 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
410 {
411 struct vduse_dev *dev = file->private_data;
412 __poll_t mask = 0;
413
414 poll_wait(file, &dev->waitq, wait);
415
416 spin_lock(&dev->msg_lock);
417
418 if (unlikely(dev->broken))
419 mask |= EPOLLERR;
420 if (!list_empty(&dev->send_list))
421 mask |= EPOLLIN | EPOLLRDNORM;
422 if (!list_empty(&dev->recv_list))
423 mask |= EPOLLOUT | EPOLLWRNORM;
424
425 spin_unlock(&dev->msg_lock);
426
427 return mask;
428 }
429
vduse_dev_reset(struct vduse_dev * dev)430 static void vduse_dev_reset(struct vduse_dev *dev)
431 {
432 int i;
433 struct vduse_iova_domain *domain = dev->domain;
434
435 /* The coherent mappings are handled in vduse_dev_free_coherent() */
436 if (domain && domain->bounce_map)
437 vduse_domain_reset_bounce_map(domain);
438
439 down_write(&dev->rwsem);
440
441 dev->status = 0;
442 dev->driver_features = 0;
443 dev->generation++;
444 spin_lock(&dev->irq_lock);
445 dev->config_cb.callback = NULL;
446 dev->config_cb.private = NULL;
447 spin_unlock(&dev->irq_lock);
448 flush_work(&dev->inject);
449
450 for (i = 0; i < dev->vq_num; i++) {
451 struct vduse_virtqueue *vq = dev->vqs[i];
452
453 vq->ready = false;
454 vq->desc_addr = 0;
455 vq->driver_addr = 0;
456 vq->device_addr = 0;
457 vq->num = 0;
458 memset(&vq->state, 0, sizeof(vq->state));
459
460 spin_lock(&vq->kick_lock);
461 vq->kicked = false;
462 if (vq->kickfd)
463 eventfd_ctx_put(vq->kickfd);
464 vq->kickfd = NULL;
465 spin_unlock(&vq->kick_lock);
466
467 spin_lock(&vq->irq_lock);
468 vq->cb.callback = NULL;
469 vq->cb.private = NULL;
470 vq->cb.trigger = NULL;
471 spin_unlock(&vq->irq_lock);
472 flush_work(&vq->inject);
473 flush_work(&vq->kick);
474 }
475
476 up_write(&dev->rwsem);
477 }
478
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)479 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
480 u64 desc_area, u64 driver_area,
481 u64 device_area)
482 {
483 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
484 struct vduse_virtqueue *vq = dev->vqs[idx];
485
486 vq->desc_addr = desc_area;
487 vq->driver_addr = driver_area;
488 vq->device_addr = device_area;
489
490 return 0;
491 }
492
vduse_vq_kick(struct vduse_virtqueue * vq)493 static void vduse_vq_kick(struct vduse_virtqueue *vq)
494 {
495 spin_lock(&vq->kick_lock);
496 if (!vq->ready)
497 goto unlock;
498
499 if (vq->kickfd)
500 eventfd_signal(vq->kickfd);
501 else
502 vq->kicked = true;
503 unlock:
504 spin_unlock(&vq->kick_lock);
505 }
506
vduse_vq_kick_work(struct work_struct * work)507 static void vduse_vq_kick_work(struct work_struct *work)
508 {
509 struct vduse_virtqueue *vq = container_of(work,
510 struct vduse_virtqueue, kick);
511
512 vduse_vq_kick(vq);
513 }
514
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)515 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
516 {
517 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
518 struct vduse_virtqueue *vq = dev->vqs[idx];
519
520 if (!eventfd_signal_allowed()) {
521 schedule_work(&vq->kick);
522 return;
523 }
524 vduse_vq_kick(vq);
525 }
526
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)527 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
528 struct vdpa_callback *cb)
529 {
530 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
531 struct vduse_virtqueue *vq = dev->vqs[idx];
532
533 spin_lock(&vq->irq_lock);
534 vq->cb.callback = cb->callback;
535 vq->cb.private = cb->private;
536 vq->cb.trigger = cb->trigger;
537 spin_unlock(&vq->irq_lock);
538 }
539
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)540 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
541 {
542 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
543 struct vduse_virtqueue *vq = dev->vqs[idx];
544
545 vq->num = num;
546 }
547
vduse_vdpa_get_vq_size(struct vdpa_device * vdpa,u16 idx)548 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
549 {
550 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
551 struct vduse_virtqueue *vq = dev->vqs[idx];
552
553 if (vq->num)
554 return vq->num;
555 else
556 return vq->num_max;
557 }
558
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)559 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
560 u16 idx, bool ready)
561 {
562 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
563 struct vduse_virtqueue *vq = dev->vqs[idx];
564
565 vq->ready = ready;
566 }
567
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)568 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
569 {
570 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
571 struct vduse_virtqueue *vq = dev->vqs[idx];
572
573 return vq->ready;
574 }
575
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)576 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
577 const struct vdpa_vq_state *state)
578 {
579 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
580 struct vduse_virtqueue *vq = dev->vqs[idx];
581
582 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
583 vq->state.packed.last_avail_counter =
584 state->packed.last_avail_counter;
585 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
586 vq->state.packed.last_used_counter =
587 state->packed.last_used_counter;
588 vq->state.packed.last_used_idx = state->packed.last_used_idx;
589 } else
590 vq->state.split.avail_index = state->split.avail_index;
591
592 return 0;
593 }
594
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)595 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
596 struct vdpa_vq_state *state)
597 {
598 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
599 struct vduse_virtqueue *vq = dev->vqs[idx];
600
601 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
602 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
603
604 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
605 }
606
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)607 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
608 {
609 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
610
611 return dev->vq_align;
612 }
613
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)614 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
615 {
616 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
617
618 return dev->device_features;
619 }
620
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)621 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
622 {
623 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
624
625 dev->driver_features = features;
626 return 0;
627 }
628
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)629 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
630 {
631 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
632
633 return dev->driver_features;
634 }
635
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)636 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
637 struct vdpa_callback *cb)
638 {
639 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
640
641 spin_lock(&dev->irq_lock);
642 dev->config_cb.callback = cb->callback;
643 dev->config_cb.private = cb->private;
644 spin_unlock(&dev->irq_lock);
645 }
646
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)647 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
648 {
649 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
650 u16 num_max = 0;
651 int i;
652
653 for (i = 0; i < dev->vq_num; i++)
654 if (num_max < dev->vqs[i]->num_max)
655 num_max = dev->vqs[i]->num_max;
656
657 return num_max;
658 }
659
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)660 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
661 {
662 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
663
664 return dev->device_id;
665 }
666
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)667 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
668 {
669 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
670
671 return dev->vendor_id;
672 }
673
vduse_vdpa_get_status(struct vdpa_device * vdpa)674 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
675 {
676 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
677
678 return dev->status;
679 }
680
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)681 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
682 {
683 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
684
685 if (vduse_dev_set_status(dev, status))
686 return;
687
688 dev->status = status;
689 }
690
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)691 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
692 {
693 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
694
695 return dev->config_size;
696 }
697
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)698 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
699 void *buf, unsigned int len)
700 {
701 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
702
703 /* Initialize the buffer in case of partial copy. */
704 memset(buf, 0, len);
705
706 if (offset > dev->config_size)
707 return;
708
709 if (len > dev->config_size - offset)
710 len = dev->config_size - offset;
711
712 memcpy(buf, dev->config + offset, len);
713 }
714
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)715 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
716 const void *buf, unsigned int len)
717 {
718 /* Now we only support read-only configuration space */
719 }
720
vduse_vdpa_reset(struct vdpa_device * vdpa)721 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
722 {
723 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
724 int ret = vduse_dev_set_status(dev, 0);
725
726 vduse_dev_reset(dev);
727
728 return ret;
729 }
730
vduse_vdpa_get_generation(struct vdpa_device * vdpa)731 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
732 {
733 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
734
735 return dev->generation;
736 }
737
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)738 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
739 const struct cpumask *cpu_mask)
740 {
741 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
742
743 if (cpu_mask)
744 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
745 else
746 cpumask_setall(&dev->vqs[idx]->irq_affinity);
747
748 return 0;
749 }
750
751 static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)752 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
753 {
754 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
755
756 return &dev->vqs[idx]->irq_affinity;
757 }
758
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)759 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
760 unsigned int asid,
761 struct vhost_iotlb *iotlb)
762 {
763 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
764 int ret;
765
766 ret = vduse_domain_set_map(dev->domain, iotlb);
767 if (ret)
768 return ret;
769
770 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
771 if (ret) {
772 vduse_domain_clear_map(dev->domain, iotlb);
773 return ret;
774 }
775
776 return 0;
777 }
778
vduse_vdpa_free(struct vdpa_device * vdpa)779 static void vduse_vdpa_free(struct vdpa_device *vdpa)
780 {
781 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
782
783 dev->vdev = NULL;
784 }
785
786 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
787 .set_vq_address = vduse_vdpa_set_vq_address,
788 .kick_vq = vduse_vdpa_kick_vq,
789 .set_vq_cb = vduse_vdpa_set_vq_cb,
790 .set_vq_num = vduse_vdpa_set_vq_num,
791 .get_vq_size = vduse_vdpa_get_vq_size,
792 .set_vq_ready = vduse_vdpa_set_vq_ready,
793 .get_vq_ready = vduse_vdpa_get_vq_ready,
794 .set_vq_state = vduse_vdpa_set_vq_state,
795 .get_vq_state = vduse_vdpa_get_vq_state,
796 .get_vq_align = vduse_vdpa_get_vq_align,
797 .get_device_features = vduse_vdpa_get_device_features,
798 .set_driver_features = vduse_vdpa_set_driver_features,
799 .get_driver_features = vduse_vdpa_get_driver_features,
800 .set_config_cb = vduse_vdpa_set_config_cb,
801 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
802 .get_device_id = vduse_vdpa_get_device_id,
803 .get_vendor_id = vduse_vdpa_get_vendor_id,
804 .get_status = vduse_vdpa_get_status,
805 .set_status = vduse_vdpa_set_status,
806 .get_config_size = vduse_vdpa_get_config_size,
807 .get_config = vduse_vdpa_get_config,
808 .set_config = vduse_vdpa_set_config,
809 .get_generation = vduse_vdpa_get_generation,
810 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
811 .get_vq_affinity = vduse_vdpa_get_vq_affinity,
812 .reset = vduse_vdpa_reset,
813 .set_map = vduse_vdpa_set_map,
814 .free = vduse_vdpa_free,
815 };
816
vduse_dev_sync_single_for_device(union virtio_map token,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)817 static void vduse_dev_sync_single_for_device(union virtio_map token,
818 dma_addr_t dma_addr, size_t size,
819 enum dma_data_direction dir)
820 {
821 struct vduse_iova_domain *domain = token.iova_domain;
822
823 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
824 }
825
vduse_dev_sync_single_for_cpu(union virtio_map token,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)826 static void vduse_dev_sync_single_for_cpu(union virtio_map token,
827 dma_addr_t dma_addr, size_t size,
828 enum dma_data_direction dir)
829 {
830 struct vduse_iova_domain *domain = token.iova_domain;
831
832 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
833 }
834
vduse_dev_map_page(union virtio_map token,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)835 static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page,
836 unsigned long offset, size_t size,
837 enum dma_data_direction dir,
838 unsigned long attrs)
839 {
840 struct vduse_iova_domain *domain = token.iova_domain;
841
842 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
843 }
844
vduse_dev_unmap_page(union virtio_map token,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)845 static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr,
846 size_t size, enum dma_data_direction dir,
847 unsigned long attrs)
848 {
849 struct vduse_iova_domain *domain = token.iova_domain;
850
851 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
852 }
853
vduse_dev_alloc_coherent(union virtio_map token,size_t size,dma_addr_t * dma_addr,gfp_t flag)854 static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size,
855 dma_addr_t *dma_addr, gfp_t flag)
856 {
857 struct vduse_iova_domain *domain = token.iova_domain;
858 unsigned long iova;
859 void *addr;
860
861 *dma_addr = DMA_MAPPING_ERROR;
862 addr = vduse_domain_alloc_coherent(domain, size,
863 (dma_addr_t *)&iova, flag);
864 if (!addr)
865 return NULL;
866
867 *dma_addr = (dma_addr_t)iova;
868
869 return addr;
870 }
871
vduse_dev_free_coherent(union virtio_map token,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)872 static void vduse_dev_free_coherent(union virtio_map token, size_t size,
873 void *vaddr, dma_addr_t dma_addr,
874 unsigned long attrs)
875 {
876 struct vduse_iova_domain *domain = token.iova_domain;
877
878 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
879 }
880
vduse_dev_need_sync(union virtio_map token,dma_addr_t dma_addr)881 static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr)
882 {
883 struct vduse_iova_domain *domain = token.iova_domain;
884
885 return dma_addr < domain->bounce_size;
886 }
887
vduse_dev_mapping_error(union virtio_map token,dma_addr_t dma_addr)888 static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr)
889 {
890 if (unlikely(dma_addr == DMA_MAPPING_ERROR))
891 return -ENOMEM;
892 return 0;
893 }
894
vduse_dev_max_mapping_size(union virtio_map token)895 static size_t vduse_dev_max_mapping_size(union virtio_map token)
896 {
897 struct vduse_iova_domain *domain = token.iova_domain;
898
899 return domain->bounce_size;
900 }
901
902 static const struct virtio_map_ops vduse_map_ops = {
903 .sync_single_for_device = vduse_dev_sync_single_for_device,
904 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
905 .map_page = vduse_dev_map_page,
906 .unmap_page = vduse_dev_unmap_page,
907 .alloc = vduse_dev_alloc_coherent,
908 .free = vduse_dev_free_coherent,
909 .need_sync = vduse_dev_need_sync,
910 .mapping_error = vduse_dev_mapping_error,
911 .max_mapping_size = vduse_dev_max_mapping_size,
912 };
913
perm_to_file_flags(u8 perm)914 static unsigned int perm_to_file_flags(u8 perm)
915 {
916 unsigned int flags = 0;
917
918 switch (perm) {
919 case VDUSE_ACCESS_WO:
920 flags |= O_WRONLY;
921 break;
922 case VDUSE_ACCESS_RO:
923 flags |= O_RDONLY;
924 break;
925 case VDUSE_ACCESS_RW:
926 flags |= O_RDWR;
927 break;
928 default:
929 WARN(1, "invalidate vhost IOTLB permission\n");
930 break;
931 }
932
933 return flags;
934 }
935
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)936 static int vduse_kickfd_setup(struct vduse_dev *dev,
937 struct vduse_vq_eventfd *eventfd)
938 {
939 struct eventfd_ctx *ctx = NULL;
940 struct vduse_virtqueue *vq;
941 u32 index;
942
943 if (eventfd->index >= dev->vq_num)
944 return -EINVAL;
945
946 index = array_index_nospec(eventfd->index, dev->vq_num);
947 vq = dev->vqs[index];
948 if (eventfd->fd >= 0) {
949 ctx = eventfd_ctx_fdget(eventfd->fd);
950 if (IS_ERR(ctx))
951 return PTR_ERR(ctx);
952 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
953 return 0;
954
955 spin_lock(&vq->kick_lock);
956 if (vq->kickfd)
957 eventfd_ctx_put(vq->kickfd);
958 vq->kickfd = ctx;
959 if (vq->ready && vq->kicked && vq->kickfd) {
960 eventfd_signal(vq->kickfd);
961 vq->kicked = false;
962 }
963 spin_unlock(&vq->kick_lock);
964
965 return 0;
966 }
967
vduse_dev_is_ready(struct vduse_dev * dev)968 static bool vduse_dev_is_ready(struct vduse_dev *dev)
969 {
970 int i;
971
972 for (i = 0; i < dev->vq_num; i++)
973 if (!dev->vqs[i]->num_max)
974 return false;
975
976 return true;
977 }
978
vduse_dev_irq_inject(struct work_struct * work)979 static void vduse_dev_irq_inject(struct work_struct *work)
980 {
981 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
982
983 spin_lock_bh(&dev->irq_lock);
984 if (dev->config_cb.callback)
985 dev->config_cb.callback(dev->config_cb.private);
986 spin_unlock_bh(&dev->irq_lock);
987 }
988
vduse_vq_irq_inject(struct work_struct * work)989 static void vduse_vq_irq_inject(struct work_struct *work)
990 {
991 struct vduse_virtqueue *vq = container_of(work,
992 struct vduse_virtqueue, inject);
993
994 spin_lock_bh(&vq->irq_lock);
995 if (vq->ready && vq->cb.callback)
996 vq->cb.callback(vq->cb.private);
997 spin_unlock_bh(&vq->irq_lock);
998 }
999
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)1000 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
1001 {
1002 bool signal = false;
1003
1004 if (!vq->cb.trigger)
1005 return false;
1006
1007 spin_lock_irq(&vq->irq_lock);
1008 if (vq->ready && vq->cb.trigger) {
1009 eventfd_signal(vq->cb.trigger);
1010 signal = true;
1011 }
1012 spin_unlock_irq(&vq->irq_lock);
1013
1014 return signal;
1015 }
1016
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)1017 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1018 struct work_struct *irq_work,
1019 int irq_effective_cpu)
1020 {
1021 int ret = -EINVAL;
1022
1023 down_read(&dev->rwsem);
1024 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1025 goto unlock;
1026
1027 ret = 0;
1028 if (irq_effective_cpu == IRQ_UNBOUND)
1029 queue_work(vduse_irq_wq, irq_work);
1030 else
1031 queue_work_on(irq_effective_cpu,
1032 vduse_irq_bound_wq, irq_work);
1033 unlock:
1034 up_read(&dev->rwsem);
1035
1036 return ret;
1037 }
1038
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)1039 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1040 u64 iova, u64 size)
1041 {
1042 int ret;
1043
1044 mutex_lock(&dev->mem_lock);
1045 ret = -ENOENT;
1046 if (!dev->umem)
1047 goto unlock;
1048
1049 ret = -EINVAL;
1050 if (!dev->domain)
1051 goto unlock;
1052
1053 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1054 goto unlock;
1055
1056 vduse_domain_remove_user_bounce_pages(dev->domain);
1057 unpin_user_pages_dirty_lock(dev->umem->pages,
1058 dev->umem->npages, true);
1059 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1060 mmdrop(dev->umem->mm);
1061 vfree(dev->umem->pages);
1062 kfree(dev->umem);
1063 dev->umem = NULL;
1064 ret = 0;
1065 unlock:
1066 mutex_unlock(&dev->mem_lock);
1067 return ret;
1068 }
1069
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)1070 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1071 u64 iova, u64 uaddr, u64 size)
1072 {
1073 struct page **page_list = NULL;
1074 struct vduse_umem *umem = NULL;
1075 long pinned = 0;
1076 unsigned long npages, lock_limit;
1077 int ret;
1078
1079 if (!dev->domain || !dev->domain->bounce_map ||
1080 size != dev->domain->bounce_size ||
1081 iova != 0 || uaddr & ~PAGE_MASK)
1082 return -EINVAL;
1083
1084 mutex_lock(&dev->mem_lock);
1085 ret = -EEXIST;
1086 if (dev->umem)
1087 goto unlock;
1088
1089 ret = -ENOMEM;
1090 npages = size >> PAGE_SHIFT;
1091 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1092 GFP_KERNEL_ACCOUNT);
1093 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1094 if (!page_list || !umem)
1095 goto unlock;
1096
1097 mmap_read_lock(current->mm);
1098
1099 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1100 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
1101 goto out;
1102
1103 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1104 page_list);
1105 if (pinned != npages) {
1106 ret = pinned < 0 ? pinned : -ENOMEM;
1107 goto out;
1108 }
1109
1110 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1111 page_list, pinned);
1112 if (ret)
1113 goto out;
1114
1115 atomic64_add(npages, ¤t->mm->pinned_vm);
1116
1117 umem->pages = page_list;
1118 umem->npages = pinned;
1119 umem->iova = iova;
1120 umem->mm = current->mm;
1121 mmgrab(current->mm);
1122
1123 dev->umem = umem;
1124 out:
1125 if (ret && pinned > 0)
1126 unpin_user_pages(page_list, pinned);
1127
1128 mmap_read_unlock(current->mm);
1129 unlock:
1130 if (ret) {
1131 vfree(page_list);
1132 kfree(umem);
1133 }
1134 mutex_unlock(&dev->mem_lock);
1135 return ret;
1136 }
1137
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)1138 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1139 {
1140 int curr_cpu = vq->irq_effective_cpu;
1141
1142 while (true) {
1143 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1144 if (cpu_online(curr_cpu))
1145 break;
1146
1147 if (curr_cpu >= nr_cpu_ids)
1148 curr_cpu = IRQ_UNBOUND;
1149 }
1150
1151 vq->irq_effective_cpu = curr_cpu;
1152 }
1153
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1154 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1155 unsigned long arg)
1156 {
1157 struct vduse_dev *dev = file->private_data;
1158 void __user *argp = (void __user *)arg;
1159 int ret;
1160
1161 if (unlikely(dev->broken))
1162 return -EPERM;
1163
1164 switch (cmd) {
1165 case VDUSE_IOTLB_GET_FD: {
1166 struct vduse_iotlb_entry entry;
1167 struct vhost_iotlb_map *map;
1168 struct vdpa_map_file *map_file;
1169 struct file *f = NULL;
1170
1171 ret = -EFAULT;
1172 if (copy_from_user(&entry, argp, sizeof(entry)))
1173 break;
1174
1175 ret = -EINVAL;
1176 if (entry.start > entry.last)
1177 break;
1178
1179 mutex_lock(&dev->domain_lock);
1180 if (!dev->domain) {
1181 mutex_unlock(&dev->domain_lock);
1182 break;
1183 }
1184 spin_lock(&dev->domain->iotlb_lock);
1185 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1186 entry.start, entry.last);
1187 if (map) {
1188 map_file = (struct vdpa_map_file *)map->opaque;
1189 f = get_file(map_file->file);
1190 entry.offset = map_file->offset;
1191 entry.start = map->start;
1192 entry.last = map->last;
1193 entry.perm = map->perm;
1194 }
1195 spin_unlock(&dev->domain->iotlb_lock);
1196 mutex_unlock(&dev->domain_lock);
1197 ret = -EINVAL;
1198 if (!f)
1199 break;
1200
1201 ret = -EFAULT;
1202 if (copy_to_user(argp, &entry, sizeof(entry))) {
1203 fput(f);
1204 break;
1205 }
1206 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1207 fput(f);
1208 break;
1209 }
1210 case VDUSE_DEV_GET_FEATURES:
1211 /*
1212 * Just mirror what driver wrote here.
1213 * The driver is expected to check FEATURE_OK later.
1214 */
1215 ret = put_user(dev->driver_features, (u64 __user *)argp);
1216 break;
1217 case VDUSE_DEV_SET_CONFIG: {
1218 struct vduse_config_data config;
1219 unsigned long size = offsetof(struct vduse_config_data,
1220 buffer);
1221
1222 ret = -EFAULT;
1223 if (copy_from_user(&config, argp, size))
1224 break;
1225
1226 ret = -EINVAL;
1227 if (config.offset > dev->config_size ||
1228 config.length == 0 ||
1229 config.length > dev->config_size - config.offset)
1230 break;
1231
1232 ret = -EFAULT;
1233 if (copy_from_user(dev->config + config.offset, argp + size,
1234 config.length))
1235 break;
1236
1237 ret = 0;
1238 break;
1239 }
1240 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1241 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1242 break;
1243 case VDUSE_VQ_SETUP: {
1244 struct vduse_vq_config config;
1245 u32 index;
1246
1247 ret = -EFAULT;
1248 if (copy_from_user(&config, argp, sizeof(config)))
1249 break;
1250
1251 ret = -EINVAL;
1252 if (config.index >= dev->vq_num)
1253 break;
1254
1255 if (!is_mem_zero((const char *)config.reserved,
1256 sizeof(config.reserved)))
1257 break;
1258
1259 index = array_index_nospec(config.index, dev->vq_num);
1260 dev->vqs[index]->num_max = config.max_size;
1261 ret = 0;
1262 break;
1263 }
1264 case VDUSE_VQ_GET_INFO: {
1265 struct vduse_vq_info vq_info;
1266 struct vduse_virtqueue *vq;
1267 u32 index;
1268
1269 ret = -EFAULT;
1270 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1271 break;
1272
1273 ret = -EINVAL;
1274 if (vq_info.index >= dev->vq_num)
1275 break;
1276
1277 index = array_index_nospec(vq_info.index, dev->vq_num);
1278 vq = dev->vqs[index];
1279 vq_info.desc_addr = vq->desc_addr;
1280 vq_info.driver_addr = vq->driver_addr;
1281 vq_info.device_addr = vq->device_addr;
1282 vq_info.num = vq->num;
1283
1284 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1285 vq_info.packed.last_avail_counter =
1286 vq->state.packed.last_avail_counter;
1287 vq_info.packed.last_avail_idx =
1288 vq->state.packed.last_avail_idx;
1289 vq_info.packed.last_used_counter =
1290 vq->state.packed.last_used_counter;
1291 vq_info.packed.last_used_idx =
1292 vq->state.packed.last_used_idx;
1293 } else
1294 vq_info.split.avail_index =
1295 vq->state.split.avail_index;
1296
1297 vq_info.ready = vq->ready;
1298
1299 ret = -EFAULT;
1300 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1301 break;
1302
1303 ret = 0;
1304 break;
1305 }
1306 case VDUSE_VQ_SETUP_KICKFD: {
1307 struct vduse_vq_eventfd eventfd;
1308
1309 ret = -EFAULT;
1310 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1311 break;
1312
1313 ret = vduse_kickfd_setup(dev, &eventfd);
1314 break;
1315 }
1316 case VDUSE_VQ_INJECT_IRQ: {
1317 u32 index;
1318
1319 ret = -EFAULT;
1320 if (get_user(index, (u32 __user *)argp))
1321 break;
1322
1323 ret = -EINVAL;
1324 if (index >= dev->vq_num)
1325 break;
1326
1327 ret = 0;
1328 index = array_index_nospec(index, dev->vq_num);
1329 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1330 vduse_vq_update_effective_cpu(dev->vqs[index]);
1331 ret = vduse_dev_queue_irq_work(dev,
1332 &dev->vqs[index]->inject,
1333 dev->vqs[index]->irq_effective_cpu);
1334 }
1335 break;
1336 }
1337 case VDUSE_IOTLB_REG_UMEM: {
1338 struct vduse_iova_umem umem;
1339
1340 ret = -EFAULT;
1341 if (copy_from_user(&umem, argp, sizeof(umem)))
1342 break;
1343
1344 ret = -EINVAL;
1345 if (!is_mem_zero((const char *)umem.reserved,
1346 sizeof(umem.reserved)))
1347 break;
1348
1349 mutex_lock(&dev->domain_lock);
1350 ret = vduse_dev_reg_umem(dev, umem.iova,
1351 umem.uaddr, umem.size);
1352 mutex_unlock(&dev->domain_lock);
1353 break;
1354 }
1355 case VDUSE_IOTLB_DEREG_UMEM: {
1356 struct vduse_iova_umem umem;
1357
1358 ret = -EFAULT;
1359 if (copy_from_user(&umem, argp, sizeof(umem)))
1360 break;
1361
1362 ret = -EINVAL;
1363 if (!is_mem_zero((const char *)umem.reserved,
1364 sizeof(umem.reserved)))
1365 break;
1366 mutex_lock(&dev->domain_lock);
1367 ret = vduse_dev_dereg_umem(dev, umem.iova,
1368 umem.size);
1369 mutex_unlock(&dev->domain_lock);
1370 break;
1371 }
1372 case VDUSE_IOTLB_GET_INFO: {
1373 struct vduse_iova_info info;
1374 struct vhost_iotlb_map *map;
1375
1376 ret = -EFAULT;
1377 if (copy_from_user(&info, argp, sizeof(info)))
1378 break;
1379
1380 ret = -EINVAL;
1381 if (info.start > info.last)
1382 break;
1383
1384 if (!is_mem_zero((const char *)info.reserved,
1385 sizeof(info.reserved)))
1386 break;
1387
1388 mutex_lock(&dev->domain_lock);
1389 if (!dev->domain) {
1390 mutex_unlock(&dev->domain_lock);
1391 break;
1392 }
1393 spin_lock(&dev->domain->iotlb_lock);
1394 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1395 info.start, info.last);
1396 if (map) {
1397 info.start = map->start;
1398 info.last = map->last;
1399 info.capability = 0;
1400 if (dev->domain->bounce_map && map->start == 0 &&
1401 map->last == dev->domain->bounce_size - 1)
1402 info.capability |= VDUSE_IOVA_CAP_UMEM;
1403 }
1404 spin_unlock(&dev->domain->iotlb_lock);
1405 mutex_unlock(&dev->domain_lock);
1406 if (!map)
1407 break;
1408
1409 ret = -EFAULT;
1410 if (copy_to_user(argp, &info, sizeof(info)))
1411 break;
1412
1413 ret = 0;
1414 break;
1415 }
1416 default:
1417 ret = -ENOIOCTLCMD;
1418 break;
1419 }
1420
1421 return ret;
1422 }
1423
vduse_dev_release(struct inode * inode,struct file * file)1424 static int vduse_dev_release(struct inode *inode, struct file *file)
1425 {
1426 struct vduse_dev *dev = file->private_data;
1427
1428 mutex_lock(&dev->domain_lock);
1429 if (dev->domain)
1430 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1431 mutex_unlock(&dev->domain_lock);
1432 spin_lock(&dev->msg_lock);
1433 /* Make sure the inflight messages can processed after reconncection */
1434 list_splice_init(&dev->recv_list, &dev->send_list);
1435 spin_unlock(&dev->msg_lock);
1436 dev->connected = false;
1437
1438 return 0;
1439 }
1440
vduse_dev_get_from_minor(int minor)1441 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1442 {
1443 struct vduse_dev *dev;
1444
1445 mutex_lock(&vduse_lock);
1446 dev = idr_find(&vduse_idr, minor);
1447 mutex_unlock(&vduse_lock);
1448
1449 return dev;
1450 }
1451
vduse_dev_open(struct inode * inode,struct file * file)1452 static int vduse_dev_open(struct inode *inode, struct file *file)
1453 {
1454 int ret;
1455 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1456
1457 if (!dev)
1458 return -ENODEV;
1459
1460 ret = -EBUSY;
1461 mutex_lock(&dev->lock);
1462 if (dev->connected)
1463 goto unlock;
1464
1465 ret = 0;
1466 dev->connected = true;
1467 file->private_data = dev;
1468 unlock:
1469 mutex_unlock(&dev->lock);
1470
1471 return ret;
1472 }
1473
1474 static const struct file_operations vduse_dev_fops = {
1475 .owner = THIS_MODULE,
1476 .open = vduse_dev_open,
1477 .release = vduse_dev_release,
1478 .read_iter = vduse_dev_read_iter,
1479 .write_iter = vduse_dev_write_iter,
1480 .poll = vduse_dev_poll,
1481 .unlocked_ioctl = vduse_dev_ioctl,
1482 .compat_ioctl = compat_ptr_ioctl,
1483 .llseek = noop_llseek,
1484 };
1485
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)1486 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1487 {
1488 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1489 }
1490
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)1491 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1492 const char *buf, size_t count)
1493 {
1494 cpumask_var_t new_value;
1495 int ret;
1496
1497 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1498 return -ENOMEM;
1499
1500 ret = cpumask_parse(buf, new_value);
1501 if (ret)
1502 goto free_mask;
1503
1504 ret = -EINVAL;
1505 if (!cpumask_intersects(new_value, cpu_online_mask))
1506 goto free_mask;
1507
1508 cpumask_copy(&vq->irq_affinity, new_value);
1509 ret = count;
1510 free_mask:
1511 free_cpumask_var(new_value);
1512 return ret;
1513 }
1514
1515 struct vq_sysfs_entry {
1516 struct attribute attr;
1517 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1518 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1519 size_t count);
1520 };
1521
1522 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1523
1524 static struct attribute *vq_attrs[] = {
1525 &irq_cb_affinity_attr.attr,
1526 NULL,
1527 };
1528 ATTRIBUTE_GROUPS(vq);
1529
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)1530 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1531 char *buf)
1532 {
1533 struct vduse_virtqueue *vq = container_of(kobj,
1534 struct vduse_virtqueue, kobj);
1535 struct vq_sysfs_entry *entry = container_of(attr,
1536 struct vq_sysfs_entry, attr);
1537
1538 if (!entry->show)
1539 return -EIO;
1540
1541 return entry->show(vq, buf);
1542 }
1543
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1544 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1545 const char *buf, size_t count)
1546 {
1547 struct vduse_virtqueue *vq = container_of(kobj,
1548 struct vduse_virtqueue, kobj);
1549 struct vq_sysfs_entry *entry = container_of(attr,
1550 struct vq_sysfs_entry, attr);
1551
1552 if (!entry->store)
1553 return -EIO;
1554
1555 return entry->store(vq, buf, count);
1556 }
1557
1558 static const struct sysfs_ops vq_sysfs_ops = {
1559 .show = vq_attr_show,
1560 .store = vq_attr_store,
1561 };
1562
vq_release(struct kobject * kobj)1563 static void vq_release(struct kobject *kobj)
1564 {
1565 struct vduse_virtqueue *vq = container_of(kobj,
1566 struct vduse_virtqueue, kobj);
1567 kfree(vq);
1568 }
1569
1570 static const struct kobj_type vq_type = {
1571 .release = vq_release,
1572 .sysfs_ops = &vq_sysfs_ops,
1573 .default_groups = vq_groups,
1574 };
1575
vduse_devnode(const struct device * dev,umode_t * mode)1576 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1577 {
1578 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1579 }
1580
1581 static const struct class vduse_class = {
1582 .name = "vduse",
1583 .devnode = vduse_devnode,
1584 };
1585
vduse_dev_deinit_vqs(struct vduse_dev * dev)1586 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1587 {
1588 int i;
1589
1590 if (!dev->vqs)
1591 return;
1592
1593 for (i = 0; i < dev->vq_num; i++)
1594 kobject_put(&dev->vqs[i]->kobj);
1595 kfree(dev->vqs);
1596 }
1597
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)1598 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1599 {
1600 int ret, i;
1601
1602 dev->vq_align = vq_align;
1603 dev->vq_num = vq_num;
1604 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1605 if (!dev->vqs)
1606 return -ENOMEM;
1607
1608 for (i = 0; i < vq_num; i++) {
1609 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1610 if (!dev->vqs[i]) {
1611 ret = -ENOMEM;
1612 goto err;
1613 }
1614
1615 dev->vqs[i]->index = i;
1616 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1617 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1618 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1619 spin_lock_init(&dev->vqs[i]->kick_lock);
1620 spin_lock_init(&dev->vqs[i]->irq_lock);
1621 cpumask_setall(&dev->vqs[i]->irq_affinity);
1622
1623 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1624 ret = kobject_add(&dev->vqs[i]->kobj,
1625 &dev->dev->kobj, "vq%d", i);
1626 if (ret) {
1627 kfree(dev->vqs[i]);
1628 goto err;
1629 }
1630 }
1631
1632 return 0;
1633 err:
1634 while (i--)
1635 kobject_put(&dev->vqs[i]->kobj);
1636 kfree(dev->vqs);
1637 dev->vqs = NULL;
1638 return ret;
1639 }
1640
vduse_dev_create(void)1641 static struct vduse_dev *vduse_dev_create(void)
1642 {
1643 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1644
1645 if (!dev)
1646 return NULL;
1647
1648 mutex_init(&dev->lock);
1649 mutex_init(&dev->mem_lock);
1650 mutex_init(&dev->domain_lock);
1651 spin_lock_init(&dev->msg_lock);
1652 INIT_LIST_HEAD(&dev->send_list);
1653 INIT_LIST_HEAD(&dev->recv_list);
1654 spin_lock_init(&dev->irq_lock);
1655 init_rwsem(&dev->rwsem);
1656
1657 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1658 init_waitqueue_head(&dev->waitq);
1659
1660 return dev;
1661 }
1662
vduse_dev_destroy(struct vduse_dev * dev)1663 static void vduse_dev_destroy(struct vduse_dev *dev)
1664 {
1665 kfree(dev);
1666 }
1667
vduse_find_dev(const char * name)1668 static struct vduse_dev *vduse_find_dev(const char *name)
1669 {
1670 struct vduse_dev *dev;
1671 int id;
1672
1673 idr_for_each_entry(&vduse_idr, dev, id)
1674 if (!strcmp(dev->name, name))
1675 return dev;
1676
1677 return NULL;
1678 }
1679
vduse_destroy_dev(char * name)1680 static int vduse_destroy_dev(char *name)
1681 {
1682 struct vduse_dev *dev = vduse_find_dev(name);
1683
1684 if (!dev)
1685 return -EINVAL;
1686
1687 mutex_lock(&dev->lock);
1688 if (dev->vdev || dev->connected) {
1689 mutex_unlock(&dev->lock);
1690 return -EBUSY;
1691 }
1692 dev->connected = true;
1693 mutex_unlock(&dev->lock);
1694
1695 vduse_dev_reset(dev);
1696 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1697 idr_remove(&vduse_idr, dev->minor);
1698 kvfree(dev->config);
1699 vduse_dev_deinit_vqs(dev);
1700 if (dev->domain)
1701 vduse_domain_destroy(dev->domain);
1702 kfree(dev->name);
1703 vduse_dev_destroy(dev);
1704 module_put(THIS_MODULE);
1705
1706 return 0;
1707 }
1708
device_is_allowed(u32 device_id)1709 static bool device_is_allowed(u32 device_id)
1710 {
1711 int i;
1712
1713 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1714 if (allowed_device_id[i] == device_id)
1715 return true;
1716
1717 return false;
1718 }
1719
features_is_valid(struct vduse_dev_config * config)1720 static bool features_is_valid(struct vduse_dev_config *config)
1721 {
1722 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1723 return false;
1724
1725 /* Now we only support read-only configuration space */
1726 if ((config->device_id == VIRTIO_ID_BLOCK) &&
1727 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1728 return false;
1729 else if ((config->device_id == VIRTIO_ID_NET) &&
1730 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1731 return false;
1732
1733 if ((config->device_id == VIRTIO_ID_NET) &&
1734 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1735 return false;
1736
1737 return true;
1738 }
1739
vduse_validate_config(struct vduse_dev_config * config)1740 static bool vduse_validate_config(struct vduse_dev_config *config)
1741 {
1742 if (!is_mem_zero((const char *)config->reserved,
1743 sizeof(config->reserved)))
1744 return false;
1745
1746 if (config->vq_align > PAGE_SIZE)
1747 return false;
1748
1749 if (config->config_size > PAGE_SIZE)
1750 return false;
1751
1752 if (config->vq_num > 0xffff)
1753 return false;
1754
1755 if (!config->name[0])
1756 return false;
1757
1758 if (!device_is_allowed(config->device_id))
1759 return false;
1760
1761 if (!features_is_valid(config))
1762 return false;
1763
1764 return true;
1765 }
1766
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1767 static ssize_t msg_timeout_show(struct device *device,
1768 struct device_attribute *attr, char *buf)
1769 {
1770 struct vduse_dev *dev = dev_get_drvdata(device);
1771
1772 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1773 }
1774
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1775 static ssize_t msg_timeout_store(struct device *device,
1776 struct device_attribute *attr,
1777 const char *buf, size_t count)
1778 {
1779 struct vduse_dev *dev = dev_get_drvdata(device);
1780 int ret;
1781
1782 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1783 if (ret < 0)
1784 return ret;
1785
1786 return count;
1787 }
1788
1789 static DEVICE_ATTR_RW(msg_timeout);
1790
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1791 static ssize_t bounce_size_show(struct device *device,
1792 struct device_attribute *attr, char *buf)
1793 {
1794 struct vduse_dev *dev = dev_get_drvdata(device);
1795
1796 return sysfs_emit(buf, "%u\n", dev->bounce_size);
1797 }
1798
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1799 static ssize_t bounce_size_store(struct device *device,
1800 struct device_attribute *attr,
1801 const char *buf, size_t count)
1802 {
1803 struct vduse_dev *dev = dev_get_drvdata(device);
1804 unsigned int bounce_size;
1805 int ret;
1806
1807 ret = -EPERM;
1808 mutex_lock(&dev->domain_lock);
1809 if (dev->domain)
1810 goto unlock;
1811
1812 ret = kstrtouint(buf, 10, &bounce_size);
1813 if (ret < 0)
1814 goto unlock;
1815
1816 ret = -EINVAL;
1817 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1818 bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1819 goto unlock;
1820
1821 dev->bounce_size = bounce_size & PAGE_MASK;
1822 ret = count;
1823 unlock:
1824 mutex_unlock(&dev->domain_lock);
1825 return ret;
1826 }
1827
1828 static DEVICE_ATTR_RW(bounce_size);
1829
1830 static struct attribute *vduse_dev_attrs[] = {
1831 &dev_attr_msg_timeout.attr,
1832 &dev_attr_bounce_size.attr,
1833 NULL
1834 };
1835
1836 ATTRIBUTE_GROUPS(vduse_dev);
1837
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1838 static int vduse_create_dev(struct vduse_dev_config *config,
1839 void *config_buf, u64 api_version)
1840 {
1841 int ret;
1842 struct vduse_dev *dev;
1843
1844 ret = -EPERM;
1845 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1846 goto err;
1847
1848 ret = -EEXIST;
1849 if (vduse_find_dev(config->name))
1850 goto err;
1851
1852 ret = -ENOMEM;
1853 dev = vduse_dev_create();
1854 if (!dev)
1855 goto err;
1856
1857 dev->api_version = api_version;
1858 dev->device_features = config->features;
1859 dev->device_id = config->device_id;
1860 dev->vendor_id = config->vendor_id;
1861 dev->name = kstrdup(config->name, GFP_KERNEL);
1862 if (!dev->name)
1863 goto err_str;
1864
1865 dev->bounce_size = VDUSE_BOUNCE_SIZE;
1866 dev->config = config_buf;
1867 dev->config_size = config->config_size;
1868
1869 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1870 if (ret < 0)
1871 goto err_idr;
1872
1873 dev->minor = ret;
1874 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1875 dev->dev = device_create_with_groups(&vduse_class, NULL,
1876 MKDEV(MAJOR(vduse_major), dev->minor),
1877 dev, vduse_dev_groups, "%s", config->name);
1878 if (IS_ERR(dev->dev)) {
1879 ret = PTR_ERR(dev->dev);
1880 goto err_dev;
1881 }
1882
1883 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1884 if (ret)
1885 goto err_vqs;
1886
1887 __module_get(THIS_MODULE);
1888
1889 return 0;
1890 err_vqs:
1891 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1892 err_dev:
1893 idr_remove(&vduse_idr, dev->minor);
1894 err_idr:
1895 kfree(dev->name);
1896 err_str:
1897 vduse_dev_destroy(dev);
1898 err:
1899 return ret;
1900 }
1901
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1902 static long vduse_ioctl(struct file *file, unsigned int cmd,
1903 unsigned long arg)
1904 {
1905 int ret;
1906 void __user *argp = (void __user *)arg;
1907 struct vduse_control *control = file->private_data;
1908
1909 mutex_lock(&vduse_lock);
1910 switch (cmd) {
1911 case VDUSE_GET_API_VERSION:
1912 ret = put_user(control->api_version, (u64 __user *)argp);
1913 break;
1914 case VDUSE_SET_API_VERSION: {
1915 u64 api_version;
1916
1917 ret = -EFAULT;
1918 if (get_user(api_version, (u64 __user *)argp))
1919 break;
1920
1921 ret = -EINVAL;
1922 if (api_version > VDUSE_API_VERSION)
1923 break;
1924
1925 ret = 0;
1926 control->api_version = api_version;
1927 break;
1928 }
1929 case VDUSE_CREATE_DEV: {
1930 struct vduse_dev_config config;
1931 unsigned long size = offsetof(struct vduse_dev_config, config);
1932 void *buf;
1933
1934 ret = -EFAULT;
1935 if (copy_from_user(&config, argp, size))
1936 break;
1937
1938 ret = -EINVAL;
1939 if (vduse_validate_config(&config) == false)
1940 break;
1941
1942 buf = vmemdup_user(argp + size, config.config_size);
1943 if (IS_ERR(buf)) {
1944 ret = PTR_ERR(buf);
1945 break;
1946 }
1947 config.name[VDUSE_NAME_MAX - 1] = '\0';
1948 ret = vduse_create_dev(&config, buf, control->api_version);
1949 if (ret)
1950 kvfree(buf);
1951 break;
1952 }
1953 case VDUSE_DESTROY_DEV: {
1954 char name[VDUSE_NAME_MAX];
1955
1956 ret = -EFAULT;
1957 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1958 break;
1959
1960 name[VDUSE_NAME_MAX - 1] = '\0';
1961 ret = vduse_destroy_dev(name);
1962 break;
1963 }
1964 default:
1965 ret = -EINVAL;
1966 break;
1967 }
1968 mutex_unlock(&vduse_lock);
1969
1970 return ret;
1971 }
1972
vduse_release(struct inode * inode,struct file * file)1973 static int vduse_release(struct inode *inode, struct file *file)
1974 {
1975 struct vduse_control *control = file->private_data;
1976
1977 kfree(control);
1978 return 0;
1979 }
1980
vduse_open(struct inode * inode,struct file * file)1981 static int vduse_open(struct inode *inode, struct file *file)
1982 {
1983 struct vduse_control *control;
1984
1985 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1986 if (!control)
1987 return -ENOMEM;
1988
1989 control->api_version = VDUSE_API_VERSION;
1990 file->private_data = control;
1991
1992 return 0;
1993 }
1994
1995 static const struct file_operations vduse_ctrl_fops = {
1996 .owner = THIS_MODULE,
1997 .open = vduse_open,
1998 .release = vduse_release,
1999 .unlocked_ioctl = vduse_ioctl,
2000 .compat_ioctl = compat_ptr_ioctl,
2001 .llseek = noop_llseek,
2002 };
2003
2004 struct vduse_mgmt_dev {
2005 struct vdpa_mgmt_dev mgmt_dev;
2006 struct device dev;
2007 };
2008
2009 static struct vduse_mgmt_dev *vduse_mgmt;
2010
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)2011 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2012 {
2013 struct vduse_vdpa *vdev;
2014
2015 if (dev->vdev)
2016 return -EEXIST;
2017
2018 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2019 &vduse_vdpa_config_ops, &vduse_map_ops,
2020 1, 1, name, true);
2021 if (IS_ERR(vdev))
2022 return PTR_ERR(vdev);
2023
2024 dev->vdev = vdev;
2025 vdev->dev = dev;
2026 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2027
2028 return 0;
2029 }
2030
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)2031 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2032 const struct vdpa_dev_set_config *config)
2033 {
2034 struct vduse_dev *dev;
2035 int ret;
2036
2037 mutex_lock(&vduse_lock);
2038 dev = vduse_find_dev(name);
2039 if (!dev || !vduse_dev_is_ready(dev)) {
2040 mutex_unlock(&vduse_lock);
2041 return -EINVAL;
2042 }
2043 ret = vduse_dev_init_vdpa(dev, name);
2044 mutex_unlock(&vduse_lock);
2045 if (ret)
2046 return ret;
2047
2048 mutex_lock(&dev->domain_lock);
2049 if (!dev->domain)
2050 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2051 dev->bounce_size);
2052 mutex_unlock(&dev->domain_lock);
2053 if (!dev->domain) {
2054 put_device(&dev->vdev->vdpa.dev);
2055 return -ENOMEM;
2056 }
2057
2058 dev->vdev->vdpa.vmap.iova_domain = dev->domain;
2059 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2060 if (ret) {
2061 put_device(&dev->vdev->vdpa.dev);
2062 mutex_lock(&dev->domain_lock);
2063 vduse_domain_destroy(dev->domain);
2064 dev->domain = NULL;
2065 mutex_unlock(&dev->domain_lock);
2066 return ret;
2067 }
2068
2069 return 0;
2070 }
2071
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2072 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2073 {
2074 _vdpa_unregister_device(dev);
2075 }
2076
2077 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2078 .dev_add = vdpa_dev_add,
2079 .dev_del = vdpa_dev_del,
2080 };
2081
2082 static struct virtio_device_id id_table[] = {
2083 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2084 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2085 { 0 },
2086 };
2087
vduse_mgmtdev_release(struct device * dev)2088 static void vduse_mgmtdev_release(struct device *dev)
2089 {
2090 struct vduse_mgmt_dev *mgmt_dev;
2091
2092 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2093 kfree(mgmt_dev);
2094 }
2095
vduse_mgmtdev_init(void)2096 static int vduse_mgmtdev_init(void)
2097 {
2098 int ret;
2099
2100 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2101 if (!vduse_mgmt)
2102 return -ENOMEM;
2103
2104 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2105 if (ret) {
2106 kfree(vduse_mgmt);
2107 return ret;
2108 }
2109
2110 vduse_mgmt->dev.release = vduse_mgmtdev_release;
2111
2112 ret = device_register(&vduse_mgmt->dev);
2113 if (ret)
2114 goto dev_reg_err;
2115
2116 vduse_mgmt->mgmt_dev.id_table = id_table;
2117 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2118 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2119 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2120 if (ret)
2121 device_unregister(&vduse_mgmt->dev);
2122
2123 return ret;
2124
2125 dev_reg_err:
2126 put_device(&vduse_mgmt->dev);
2127 return ret;
2128 }
2129
vduse_mgmtdev_exit(void)2130 static void vduse_mgmtdev_exit(void)
2131 {
2132 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2133 device_unregister(&vduse_mgmt->dev);
2134 }
2135
vduse_init(void)2136 static int vduse_init(void)
2137 {
2138 int ret;
2139 struct device *dev;
2140
2141 ret = class_register(&vduse_class);
2142 if (ret)
2143 return ret;
2144
2145 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2146 if (ret)
2147 goto err_chardev_region;
2148
2149 /* /dev/vduse/control */
2150 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2151 vduse_ctrl_cdev.owner = THIS_MODULE;
2152 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2153 if (ret)
2154 goto err_ctrl_cdev;
2155
2156 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2157 if (IS_ERR(dev)) {
2158 ret = PTR_ERR(dev);
2159 goto err_device;
2160 }
2161
2162 /* /dev/vduse/$DEVICE */
2163 cdev_init(&vduse_cdev, &vduse_dev_fops);
2164 vduse_cdev.owner = THIS_MODULE;
2165 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2166 VDUSE_DEV_MAX - 1);
2167 if (ret)
2168 goto err_cdev;
2169
2170 ret = -ENOMEM;
2171 vduse_irq_wq = alloc_workqueue("vduse-irq",
2172 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2173 if (!vduse_irq_wq)
2174 goto err_wq;
2175
2176 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound",
2177 WQ_HIGHPRI | WQ_PERCPU, 0);
2178 if (!vduse_irq_bound_wq)
2179 goto err_bound_wq;
2180
2181 ret = vduse_domain_init();
2182 if (ret)
2183 goto err_domain;
2184
2185 ret = vduse_mgmtdev_init();
2186 if (ret)
2187 goto err_mgmtdev;
2188
2189 return 0;
2190 err_mgmtdev:
2191 vduse_domain_exit();
2192 err_domain:
2193 destroy_workqueue(vduse_irq_bound_wq);
2194 err_bound_wq:
2195 destroy_workqueue(vduse_irq_wq);
2196 err_wq:
2197 cdev_del(&vduse_cdev);
2198 err_cdev:
2199 device_destroy(&vduse_class, vduse_major);
2200 err_device:
2201 cdev_del(&vduse_ctrl_cdev);
2202 err_ctrl_cdev:
2203 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2204 err_chardev_region:
2205 class_unregister(&vduse_class);
2206 return ret;
2207 }
2208 module_init(vduse_init);
2209
vduse_exit(void)2210 static void vduse_exit(void)
2211 {
2212 vduse_mgmtdev_exit();
2213 vduse_domain_exit();
2214 destroy_workqueue(vduse_irq_bound_wq);
2215 destroy_workqueue(vduse_irq_wq);
2216 cdev_del(&vduse_cdev);
2217 device_destroy(&vduse_class, vduse_major);
2218 cdev_del(&vduse_ctrl_cdev);
2219 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2220 class_unregister(&vduse_class);
2221 idr_destroy(&vduse_idr);
2222 }
2223 module_exit(vduse_exit);
2224
2225 MODULE_LICENSE(DRV_LICENSE);
2226 MODULE_AUTHOR(DRV_AUTHOR);
2227 MODULE_DESCRIPTION(DRV_DESC);
2228