1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include "linux/virtio_net.h" 12 #include <linux/cleanup.h> 13 #include <linux/init.h> 14 #include <linux/module.h> 15 #include <linux/cdev.h> 16 #include <linux/device.h> 17 #include <linux/eventfd.h> 18 #include <linux/slab.h> 19 #include <linux/wait.h> 20 #include <linux/dma-map-ops.h> 21 #include <linux/poll.h> 22 #include <linux/file.h> 23 #include <linux/uio.h> 24 #include <linux/vdpa.h> 25 #include <linux/nospec.h> 26 #include <linux/virtio.h> 27 #include <linux/vmalloc.h> 28 #include <linux/sched/mm.h> 29 #include <uapi/linux/vduse.h> 30 #include <uapi/linux/vdpa.h> 31 #include <uapi/linux/virtio_config.h> 32 #include <uapi/linux/virtio_ids.h> 33 #include <uapi/linux/virtio_blk.h> 34 #include <uapi/linux/virtio_ring.h> 35 #include <linux/mod_devicetable.h> 36 37 #include "iova_domain.h" 38 39 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 40 #define DRV_DESC "vDPA Device in Userspace" 41 #define DRV_LICENSE "GPL v2" 42 43 #define VDUSE_DEV_MAX (1U << MINORBITS) 44 #define VDUSE_DEV_MAX_GROUPS 0xffff 45 #define VDUSE_DEV_MAX_AS 0xffff 46 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) 47 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) 48 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 49 /* 128 MB reserved for virtqueue creation */ 50 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024) 51 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 52 53 #define IRQ_UNBOUND -1 54 55 /* 56 * VDUSE instance have not asked the vduse API version, so assume 0. 57 * 58 * Old devices may not ask for the device version and assume it is 0. Keep 59 * this value for these. From the moment the VDUSE instance ask for the 60 * version, convert to the latests supported one and continue regular flow 61 */ 62 #define VDUSE_API_VERSION_NOT_ASKED U64_MAX 63 64 struct vduse_virtqueue { 65 u16 index; 66 u16 num_max; 67 u32 num; 68 u64 desc_addr; 69 u64 driver_addr; 70 u64 device_addr; 71 struct vdpa_vq_state state; 72 bool ready; 73 bool kicked; 74 u32 group; 75 spinlock_t kick_lock; 76 spinlock_t irq_lock; 77 struct eventfd_ctx *kickfd; 78 struct vdpa_callback cb; 79 struct work_struct inject; 80 struct work_struct kick; 81 int irq_effective_cpu; 82 struct cpumask irq_affinity; 83 struct kobject kobj; 84 }; 85 86 struct vduse_dev; 87 88 struct vduse_vdpa { 89 struct vdpa_device vdpa; 90 struct vduse_dev *dev; 91 }; 92 93 struct vduse_umem { 94 unsigned long iova; 95 unsigned long npages; 96 struct page **pages; 97 struct mm_struct *mm; 98 }; 99 100 struct vduse_as { 101 struct vduse_iova_domain *domain; 102 struct vduse_umem *umem; 103 struct mutex mem_lock; 104 }; 105 106 struct vduse_vq_group { 107 rwlock_t as_lock; 108 struct vduse_as *as; /* Protected by as_lock */ 109 struct vduse_dev *dev; 110 }; 111 112 struct vduse_dev { 113 struct vduse_vdpa *vdev; 114 struct device *dev; 115 struct vduse_virtqueue **vqs; 116 struct vduse_as *as; 117 char *name; 118 struct mutex lock; 119 spinlock_t msg_lock; 120 u64 msg_unique; 121 u32 msg_timeout; 122 wait_queue_head_t waitq; 123 struct list_head send_list; 124 struct list_head recv_list; 125 struct vdpa_callback config_cb; 126 struct work_struct inject; 127 spinlock_t irq_lock; 128 struct rw_semaphore rwsem; 129 int minor; 130 bool broken; 131 bool connected; 132 u64 api_version; 133 u64 device_features; 134 u64 driver_features; 135 u32 device_id; 136 u32 vendor_id; 137 u32 generation; 138 u32 config_size; 139 void *config; 140 u8 status; 141 u32 vq_num; 142 u32 vq_align; 143 u32 ngroups; 144 u32 nas; 145 struct vduse_vq_group *groups; 146 unsigned int bounce_size; 147 struct mutex domain_lock; 148 }; 149 150 struct vduse_dev_msg { 151 struct vduse_dev_request req; 152 struct vduse_dev_response resp; 153 struct list_head list; 154 wait_queue_head_t waitq; 155 bool completed; 156 }; 157 158 struct vduse_control { 159 u64 api_version; 160 }; 161 162 static DEFINE_MUTEX(vduse_lock); 163 static DEFINE_IDR(vduse_idr); 164 165 static dev_t vduse_major; 166 static struct cdev vduse_ctrl_cdev; 167 static struct cdev vduse_cdev; 168 static struct workqueue_struct *vduse_irq_wq; 169 static struct workqueue_struct *vduse_irq_bound_wq; 170 171 static u32 allowed_device_id[] = { 172 VIRTIO_ID_BLOCK, 173 VIRTIO_ID_NET, 174 VIRTIO_ID_FS, 175 }; 176 177 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 178 { 179 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 180 181 return vdev->dev; 182 } 183 184 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 185 { 186 struct vdpa_device *vdpa = dev_to_vdpa(dev); 187 188 return vdpa_to_vduse(vdpa); 189 } 190 191 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 192 uint32_t request_id) 193 { 194 struct vduse_dev_msg *msg; 195 196 list_for_each_entry(msg, head, list) { 197 if (msg->req.request_id == request_id) { 198 list_del(&msg->list); 199 return msg; 200 } 201 } 202 203 return NULL; 204 } 205 206 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 207 { 208 struct vduse_dev_msg *msg = NULL; 209 210 if (!list_empty(head)) { 211 msg = list_first_entry(head, struct vduse_dev_msg, list); 212 list_del(&msg->list); 213 } 214 215 return msg; 216 } 217 218 static void vduse_enqueue_msg(struct list_head *head, 219 struct vduse_dev_msg *msg) 220 { 221 list_add_tail(&msg->list, head); 222 } 223 224 static void vduse_dev_broken(struct vduse_dev *dev) 225 { 226 struct vduse_dev_msg *msg, *tmp; 227 228 if (unlikely(dev->broken)) 229 return; 230 231 list_splice_init(&dev->recv_list, &dev->send_list); 232 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 233 list_del(&msg->list); 234 msg->completed = 1; 235 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 236 wake_up(&msg->waitq); 237 } 238 dev->broken = true; 239 wake_up(&dev->waitq); 240 } 241 242 static int vduse_dev_msg_sync(struct vduse_dev *dev, 243 struct vduse_dev_msg *msg) 244 { 245 int ret; 246 247 if (unlikely(dev->broken)) 248 return -EIO; 249 250 init_waitqueue_head(&msg->waitq); 251 spin_lock(&dev->msg_lock); 252 if (unlikely(dev->broken)) { 253 spin_unlock(&dev->msg_lock); 254 return -EIO; 255 } 256 msg->req.request_id = dev->msg_unique++; 257 vduse_enqueue_msg(&dev->send_list, msg); 258 wake_up(&dev->waitq); 259 spin_unlock(&dev->msg_lock); 260 if (dev->msg_timeout) 261 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 262 (long)dev->msg_timeout * HZ); 263 else 264 ret = wait_event_killable(msg->waitq, msg->completed); 265 266 spin_lock(&dev->msg_lock); 267 if (!msg->completed) { 268 list_del(&msg->list); 269 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 270 /* Mark the device as malfunction when there is a timeout */ 271 if (!ret) 272 vduse_dev_broken(dev); 273 } 274 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 275 spin_unlock(&dev->msg_lock); 276 277 return ret; 278 } 279 280 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 281 struct vduse_virtqueue *vq, 282 struct vdpa_vq_state_packed *packed) 283 { 284 struct vduse_dev_msg msg = { 0 }; 285 int ret; 286 287 msg.req.type = VDUSE_GET_VQ_STATE; 288 msg.req.vq_state.index = vq->index; 289 290 ret = vduse_dev_msg_sync(dev, &msg); 291 if (ret) 292 return ret; 293 294 packed->last_avail_counter = 295 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 296 packed->last_avail_idx = 297 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 298 packed->last_used_counter = 299 msg.resp.vq_state.packed.last_used_counter & 0x0001; 300 packed->last_used_idx = 301 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 302 303 return 0; 304 } 305 306 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 307 struct vduse_virtqueue *vq, 308 struct vdpa_vq_state_split *split) 309 { 310 struct vduse_dev_msg msg = { 0 }; 311 int ret; 312 313 msg.req.type = VDUSE_GET_VQ_STATE; 314 msg.req.vq_state.index = vq->index; 315 316 ret = vduse_dev_msg_sync(dev, &msg); 317 if (ret) 318 return ret; 319 320 split->avail_index = msg.resp.vq_state.split.avail_index; 321 322 return 0; 323 } 324 325 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 326 { 327 struct vduse_dev_msg msg = { 0 }; 328 329 msg.req.type = VDUSE_SET_STATUS; 330 msg.req.s.status = status; 331 332 return vduse_dev_msg_sync(dev, &msg); 333 } 334 335 static int vduse_dev_update_iotlb(struct vduse_dev *dev, u32 asid, 336 u64 start, u64 last) 337 { 338 struct vduse_dev_msg msg = { 0 }; 339 340 if (last < start) 341 return -EINVAL; 342 343 msg.req.type = VDUSE_UPDATE_IOTLB; 344 if (dev->api_version < VDUSE_API_VERSION_1) { 345 msg.req.iova.start = start; 346 msg.req.iova.last = last; 347 } else { 348 msg.req.iova_v2.start = start; 349 msg.req.iova_v2.last = last; 350 msg.req.iova_v2.asid = asid; 351 } 352 353 return vduse_dev_msg_sync(dev, &msg); 354 } 355 356 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 357 { 358 struct file *file = iocb->ki_filp; 359 struct vduse_dev *dev = file->private_data; 360 struct vduse_dev_msg *msg; 361 int size = sizeof(struct vduse_dev_request); 362 ssize_t ret; 363 364 if (iov_iter_count(to) < size) 365 return -EINVAL; 366 367 spin_lock(&dev->msg_lock); 368 while (1) { 369 msg = vduse_dequeue_msg(&dev->send_list); 370 if (msg) 371 break; 372 373 ret = -EAGAIN; 374 if (file->f_flags & O_NONBLOCK) 375 goto unlock; 376 377 spin_unlock(&dev->msg_lock); 378 ret = wait_event_interruptible_exclusive(dev->waitq, 379 !list_empty(&dev->send_list)); 380 if (ret) 381 return ret; 382 383 spin_lock(&dev->msg_lock); 384 } 385 spin_unlock(&dev->msg_lock); 386 ret = copy_to_iter(&msg->req, size, to); 387 spin_lock(&dev->msg_lock); 388 if (ret != size) { 389 ret = -EFAULT; 390 vduse_enqueue_msg(&dev->send_list, msg); 391 goto unlock; 392 } 393 vduse_enqueue_msg(&dev->recv_list, msg); 394 unlock: 395 spin_unlock(&dev->msg_lock); 396 397 return ret; 398 } 399 400 static bool is_mem_zero(const char *ptr, int size) 401 { 402 int i; 403 404 for (i = 0; i < size; i++) { 405 if (ptr[i]) 406 return false; 407 } 408 return true; 409 } 410 411 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 412 { 413 struct file *file = iocb->ki_filp; 414 struct vduse_dev *dev = file->private_data; 415 struct vduse_dev_response resp; 416 struct vduse_dev_msg *msg; 417 size_t ret; 418 419 ret = copy_from_iter(&resp, sizeof(resp), from); 420 if (ret != sizeof(resp)) 421 return -EINVAL; 422 423 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 424 return -EINVAL; 425 426 spin_lock(&dev->msg_lock); 427 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 428 if (!msg) { 429 ret = -ENOENT; 430 goto unlock; 431 } 432 433 memcpy(&msg->resp, &resp, sizeof(resp)); 434 msg->completed = 1; 435 wake_up(&msg->waitq); 436 unlock: 437 spin_unlock(&dev->msg_lock); 438 439 return ret; 440 } 441 442 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 443 { 444 struct vduse_dev *dev = file->private_data; 445 __poll_t mask = 0; 446 447 poll_wait(file, &dev->waitq, wait); 448 449 spin_lock(&dev->msg_lock); 450 451 if (unlikely(dev->broken)) 452 mask |= EPOLLERR; 453 if (!list_empty(&dev->send_list)) 454 mask |= EPOLLIN | EPOLLRDNORM; 455 if (!list_empty(&dev->recv_list)) 456 mask |= EPOLLOUT | EPOLLWRNORM; 457 458 spin_unlock(&dev->msg_lock); 459 460 return mask; 461 } 462 463 static void vduse_dev_reset(struct vduse_dev *dev) 464 { 465 int i; 466 467 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 468 for (i = 0; i < dev->nas; i++) { 469 struct vduse_iova_domain *domain = dev->as[i].domain; 470 471 if (domain && domain->bounce_map) 472 vduse_domain_reset_bounce_map(domain); 473 } 474 475 down_write(&dev->rwsem); 476 477 dev->status = 0; 478 dev->driver_features = 0; 479 dev->generation++; 480 spin_lock(&dev->irq_lock); 481 dev->config_cb.callback = NULL; 482 dev->config_cb.private = NULL; 483 spin_unlock(&dev->irq_lock); 484 flush_work(&dev->inject); 485 486 for (i = 0; i < dev->vq_num; i++) { 487 struct vduse_virtqueue *vq = dev->vqs[i]; 488 489 vq->ready = false; 490 vq->desc_addr = 0; 491 vq->driver_addr = 0; 492 vq->device_addr = 0; 493 vq->num = 0; 494 memset(&vq->state, 0, sizeof(vq->state)); 495 496 spin_lock(&vq->kick_lock); 497 vq->kicked = false; 498 if (vq->kickfd) 499 eventfd_ctx_put(vq->kickfd); 500 vq->kickfd = NULL; 501 spin_unlock(&vq->kick_lock); 502 503 spin_lock(&vq->irq_lock); 504 vq->cb.callback = NULL; 505 vq->cb.private = NULL; 506 vq->cb.trigger = NULL; 507 spin_unlock(&vq->irq_lock); 508 flush_work(&vq->inject); 509 flush_work(&vq->kick); 510 } 511 512 up_write(&dev->rwsem); 513 } 514 515 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 516 u64 desc_area, u64 driver_area, 517 u64 device_area) 518 { 519 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 520 struct vduse_virtqueue *vq = dev->vqs[idx]; 521 522 vq->desc_addr = desc_area; 523 vq->driver_addr = driver_area; 524 vq->device_addr = device_area; 525 526 return 0; 527 } 528 529 static void vduse_vq_kick(struct vduse_virtqueue *vq) 530 { 531 spin_lock(&vq->kick_lock); 532 if (!vq->ready) 533 goto unlock; 534 535 if (vq->kickfd) 536 eventfd_signal(vq->kickfd); 537 else 538 vq->kicked = true; 539 unlock: 540 spin_unlock(&vq->kick_lock); 541 } 542 543 static void vduse_vq_kick_work(struct work_struct *work) 544 { 545 struct vduse_virtqueue *vq = container_of(work, 546 struct vduse_virtqueue, kick); 547 548 vduse_vq_kick(vq); 549 } 550 551 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 552 { 553 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 554 struct vduse_virtqueue *vq = dev->vqs[idx]; 555 556 if (!eventfd_signal_allowed()) { 557 schedule_work(&vq->kick); 558 return; 559 } 560 vduse_vq_kick(vq); 561 } 562 563 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 564 struct vdpa_callback *cb) 565 { 566 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 567 struct vduse_virtqueue *vq = dev->vqs[idx]; 568 569 spin_lock(&vq->irq_lock); 570 vq->cb.callback = cb->callback; 571 vq->cb.private = cb->private; 572 vq->cb.trigger = cb->trigger; 573 spin_unlock(&vq->irq_lock); 574 } 575 576 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 577 { 578 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 579 struct vduse_virtqueue *vq = dev->vqs[idx]; 580 581 vq->num = num; 582 } 583 584 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx) 585 { 586 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 587 struct vduse_virtqueue *vq = dev->vqs[idx]; 588 589 if (vq->num) 590 return vq->num; 591 else 592 return vq->num_max; 593 } 594 595 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 596 u16 idx, bool ready) 597 { 598 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 599 struct vduse_virtqueue *vq = dev->vqs[idx]; 600 601 vq->ready = ready; 602 } 603 604 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 605 { 606 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 607 struct vduse_virtqueue *vq = dev->vqs[idx]; 608 609 return vq->ready; 610 } 611 612 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 613 const struct vdpa_vq_state *state) 614 { 615 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 616 struct vduse_virtqueue *vq = dev->vqs[idx]; 617 618 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 619 vq->state.packed.last_avail_counter = 620 state->packed.last_avail_counter; 621 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 622 vq->state.packed.last_used_counter = 623 state->packed.last_used_counter; 624 vq->state.packed.last_used_idx = state->packed.last_used_idx; 625 } else 626 vq->state.split.avail_index = state->split.avail_index; 627 628 return 0; 629 } 630 631 static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx) 632 { 633 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 634 635 if (dev->api_version < VDUSE_API_VERSION_1) 636 return 0; 637 638 return dev->vqs[idx]->group; 639 } 640 641 static union virtio_map vduse_get_vq_map(struct vdpa_device *vdpa, u16 idx) 642 { 643 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 644 u32 vq_group = vduse_get_vq_group(vdpa, idx); 645 union virtio_map ret = { 646 .group = &dev->groups[vq_group], 647 }; 648 649 return ret; 650 } 651 652 DEFINE_GUARD(vq_group_as_read_lock, struct vduse_vq_group *, 653 if (_T->dev->nas > 1) 654 read_lock(&_T->as_lock), 655 if (_T->dev->nas > 1) 656 read_unlock(&_T->as_lock)) 657 658 DEFINE_GUARD(vq_group_as_write_lock, struct vduse_vq_group *, 659 if (_T->dev->nas > 1) 660 write_lock(&_T->as_lock), 661 if (_T->dev->nas > 1) 662 write_unlock(&_T->as_lock)) 663 664 static int vduse_set_group_asid(struct vdpa_device *vdpa, unsigned int group, 665 unsigned int asid) 666 { 667 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 668 struct vduse_dev_msg msg = { 0 }; 669 int r; 670 671 if (dev->api_version < VDUSE_API_VERSION_1) 672 return -EINVAL; 673 674 msg.req.type = VDUSE_SET_VQ_GROUP_ASID; 675 msg.req.vq_group_asid.group = group; 676 msg.req.vq_group_asid.asid = asid; 677 678 r = vduse_dev_msg_sync(dev, &msg); 679 if (r < 0) 680 return r; 681 682 guard(vq_group_as_write_lock)(&dev->groups[group]); 683 dev->groups[group].as = &dev->as[asid]; 684 685 return 0; 686 } 687 688 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 689 struct vdpa_vq_state *state) 690 { 691 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 692 struct vduse_virtqueue *vq = dev->vqs[idx]; 693 694 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 695 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 696 697 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 698 } 699 700 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 701 { 702 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 703 704 return dev->vq_align; 705 } 706 707 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 708 { 709 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 710 711 return dev->device_features; 712 } 713 714 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 715 { 716 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 717 718 dev->driver_features = features; 719 return 0; 720 } 721 722 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 723 { 724 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 725 726 return dev->driver_features; 727 } 728 729 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 730 struct vdpa_callback *cb) 731 { 732 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 733 734 spin_lock(&dev->irq_lock); 735 dev->config_cb.callback = cb->callback; 736 dev->config_cb.private = cb->private; 737 spin_unlock(&dev->irq_lock); 738 } 739 740 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 741 { 742 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 743 u16 num_max = 0; 744 int i; 745 746 for (i = 0; i < dev->vq_num; i++) 747 if (num_max < dev->vqs[i]->num_max) 748 num_max = dev->vqs[i]->num_max; 749 750 return num_max; 751 } 752 753 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 754 { 755 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 756 757 return dev->device_id; 758 } 759 760 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 761 { 762 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 763 764 return dev->vendor_id; 765 } 766 767 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 768 { 769 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 770 771 return dev->status; 772 } 773 774 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 775 { 776 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 777 778 if (vduse_dev_set_status(dev, status)) 779 return; 780 781 dev->status = status; 782 } 783 784 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 785 { 786 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 787 788 return dev->config_size; 789 } 790 791 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 792 void *buf, unsigned int len) 793 { 794 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 795 796 /* Initialize the buffer in case of partial copy. */ 797 memset(buf, 0, len); 798 799 if (offset > dev->config_size) 800 return; 801 802 if (len > dev->config_size - offset) 803 len = dev->config_size - offset; 804 805 memcpy(buf, dev->config + offset, len); 806 } 807 808 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 809 const void *buf, unsigned int len) 810 { 811 /* Now we only support read-only configuration space */ 812 } 813 814 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 815 { 816 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 817 int ret = vduse_dev_set_status(dev, 0); 818 819 vduse_dev_reset(dev); 820 821 return ret; 822 } 823 824 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 825 { 826 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 827 828 return dev->generation; 829 } 830 831 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, 832 const struct cpumask *cpu_mask) 833 { 834 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 835 836 if (cpu_mask) 837 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); 838 else 839 cpumask_setall(&dev->vqs[idx]->irq_affinity); 840 841 return 0; 842 } 843 844 static const struct cpumask * 845 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx) 846 { 847 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 848 849 return &dev->vqs[idx]->irq_affinity; 850 } 851 852 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 853 unsigned int asid, 854 struct vhost_iotlb *iotlb) 855 { 856 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 857 int ret; 858 859 ret = vduse_domain_set_map(dev->as[asid].domain, iotlb); 860 if (ret) 861 return ret; 862 863 ret = vduse_dev_update_iotlb(dev, asid, 0ULL, ULLONG_MAX); 864 if (ret) { 865 vduse_domain_clear_map(dev->as[asid].domain, iotlb); 866 return ret; 867 } 868 869 return 0; 870 } 871 872 static void vduse_vdpa_free(struct vdpa_device *vdpa) 873 { 874 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 875 876 dev->vdev = NULL; 877 } 878 879 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 880 .set_vq_address = vduse_vdpa_set_vq_address, 881 .kick_vq = vduse_vdpa_kick_vq, 882 .set_vq_cb = vduse_vdpa_set_vq_cb, 883 .set_vq_num = vduse_vdpa_set_vq_num, 884 .get_vq_size = vduse_vdpa_get_vq_size, 885 .get_vq_group = vduse_get_vq_group, 886 .set_vq_ready = vduse_vdpa_set_vq_ready, 887 .get_vq_ready = vduse_vdpa_get_vq_ready, 888 .set_vq_state = vduse_vdpa_set_vq_state, 889 .get_vq_state = vduse_vdpa_get_vq_state, 890 .get_vq_align = vduse_vdpa_get_vq_align, 891 .get_device_features = vduse_vdpa_get_device_features, 892 .set_driver_features = vduse_vdpa_set_driver_features, 893 .get_driver_features = vduse_vdpa_get_driver_features, 894 .set_config_cb = vduse_vdpa_set_config_cb, 895 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 896 .get_device_id = vduse_vdpa_get_device_id, 897 .get_vendor_id = vduse_vdpa_get_vendor_id, 898 .get_status = vduse_vdpa_get_status, 899 .set_status = vduse_vdpa_set_status, 900 .get_config_size = vduse_vdpa_get_config_size, 901 .get_config = vduse_vdpa_get_config, 902 .set_config = vduse_vdpa_set_config, 903 .get_generation = vduse_vdpa_get_generation, 904 .set_vq_affinity = vduse_vdpa_set_vq_affinity, 905 .get_vq_affinity = vduse_vdpa_get_vq_affinity, 906 .reset = vduse_vdpa_reset, 907 .set_map = vduse_vdpa_set_map, 908 .set_group_asid = vduse_set_group_asid, 909 .get_vq_map = vduse_get_vq_map, 910 .free = vduse_vdpa_free, 911 }; 912 913 static void vduse_dev_sync_single_for_device(union virtio_map token, 914 dma_addr_t dma_addr, size_t size, 915 enum dma_data_direction dir) 916 { 917 struct vduse_iova_domain *domain; 918 919 if (!token.group) 920 return; 921 922 guard(vq_group_as_read_lock)(token.group); 923 domain = token.group->as->domain; 924 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); 925 } 926 927 static void vduse_dev_sync_single_for_cpu(union virtio_map token, 928 dma_addr_t dma_addr, size_t size, 929 enum dma_data_direction dir) 930 { 931 struct vduse_iova_domain *domain; 932 933 if (!token.group) 934 return; 935 936 guard(vq_group_as_read_lock)(token.group); 937 domain = token.group->as->domain; 938 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); 939 } 940 941 static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page, 942 unsigned long offset, size_t size, 943 enum dma_data_direction dir, 944 unsigned long attrs) 945 { 946 struct vduse_iova_domain *domain; 947 948 if (!token.group) 949 return DMA_MAPPING_ERROR; 950 951 guard(vq_group_as_read_lock)(token.group); 952 domain = token.group->as->domain; 953 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 954 } 955 956 static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr, 957 size_t size, enum dma_data_direction dir, 958 unsigned long attrs) 959 { 960 struct vduse_iova_domain *domain; 961 962 if (!token.group) 963 return; 964 965 guard(vq_group_as_read_lock)(token.group); 966 domain = token.group->as->domain; 967 vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 968 } 969 970 static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size, 971 dma_addr_t *dma_addr, gfp_t flag) 972 { 973 void *addr; 974 975 *dma_addr = DMA_MAPPING_ERROR; 976 if (!token.group) 977 return NULL; 978 979 addr = alloc_pages_exact(size, flag); 980 if (!addr) 981 return NULL; 982 983 { 984 struct vduse_iova_domain *domain; 985 986 guard(vq_group_as_read_lock)(token.group); 987 domain = token.group->as->domain; 988 *dma_addr = vduse_domain_alloc_coherent(domain, size, addr); 989 if (*dma_addr == DMA_MAPPING_ERROR) 990 goto err; 991 } 992 993 return addr; 994 995 err: 996 free_pages_exact(addr, size); 997 return NULL; 998 } 999 1000 static void vduse_dev_free_coherent(union virtio_map token, size_t size, 1001 void *vaddr, dma_addr_t dma_addr, 1002 unsigned long attrs) 1003 { 1004 if (!token.group) 1005 return; 1006 1007 { 1008 struct vduse_iova_domain *domain; 1009 1010 guard(vq_group_as_read_lock)(token.group); 1011 domain = token.group->as->domain; 1012 vduse_domain_free_coherent(domain, size, dma_addr, attrs); 1013 } 1014 1015 free_pages_exact(vaddr, size); 1016 } 1017 1018 static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr) 1019 { 1020 if (!token.group) 1021 return false; 1022 1023 guard(vq_group_as_read_lock)(token.group); 1024 return dma_addr < token.group->as->domain->bounce_size; 1025 } 1026 1027 static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr) 1028 { 1029 if (unlikely(dma_addr == DMA_MAPPING_ERROR)) 1030 return -ENOMEM; 1031 return 0; 1032 } 1033 1034 static size_t vduse_dev_max_mapping_size(union virtio_map token) 1035 { 1036 if (!token.group) 1037 return 0; 1038 1039 guard(vq_group_as_read_lock)(token.group); 1040 return token.group->as->domain->bounce_size; 1041 } 1042 1043 static const struct virtio_map_ops vduse_map_ops = { 1044 .sync_single_for_device = vduse_dev_sync_single_for_device, 1045 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, 1046 .map_page = vduse_dev_map_page, 1047 .unmap_page = vduse_dev_unmap_page, 1048 .alloc = vduse_dev_alloc_coherent, 1049 .free = vduse_dev_free_coherent, 1050 .need_sync = vduse_dev_need_sync, 1051 .mapping_error = vduse_dev_mapping_error, 1052 .max_mapping_size = vduse_dev_max_mapping_size, 1053 }; 1054 1055 static unsigned int perm_to_file_flags(u8 perm) 1056 { 1057 unsigned int flags = 0; 1058 1059 switch (perm) { 1060 case VDUSE_ACCESS_WO: 1061 flags |= O_WRONLY; 1062 break; 1063 case VDUSE_ACCESS_RO: 1064 flags |= O_RDONLY; 1065 break; 1066 case VDUSE_ACCESS_RW: 1067 flags |= O_RDWR; 1068 break; 1069 default: 1070 WARN(1, "invalidate vhost IOTLB permission\n"); 1071 break; 1072 } 1073 1074 return flags; 1075 } 1076 1077 static int vduse_kickfd_setup(struct vduse_dev *dev, 1078 struct vduse_vq_eventfd *eventfd) 1079 { 1080 struct eventfd_ctx *ctx = NULL; 1081 struct vduse_virtqueue *vq; 1082 u32 index; 1083 1084 if (eventfd->index >= dev->vq_num) 1085 return -EINVAL; 1086 1087 index = array_index_nospec(eventfd->index, dev->vq_num); 1088 vq = dev->vqs[index]; 1089 if (eventfd->fd >= 0) { 1090 ctx = eventfd_ctx_fdget(eventfd->fd); 1091 if (IS_ERR(ctx)) 1092 return PTR_ERR(ctx); 1093 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 1094 return 0; 1095 1096 spin_lock(&vq->kick_lock); 1097 if (vq->kickfd) 1098 eventfd_ctx_put(vq->kickfd); 1099 vq->kickfd = ctx; 1100 if (vq->ready && vq->kicked && vq->kickfd) { 1101 eventfd_signal(vq->kickfd); 1102 vq->kicked = false; 1103 } 1104 spin_unlock(&vq->kick_lock); 1105 1106 return 0; 1107 } 1108 1109 static bool vduse_dev_is_ready(struct vduse_dev *dev) 1110 { 1111 int i; 1112 1113 for (i = 0; i < dev->vq_num; i++) 1114 if (!dev->vqs[i]->num_max) 1115 return false; 1116 1117 return true; 1118 } 1119 1120 static void vduse_dev_irq_inject(struct work_struct *work) 1121 { 1122 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 1123 1124 spin_lock_bh(&dev->irq_lock); 1125 if (dev->config_cb.callback) 1126 dev->config_cb.callback(dev->config_cb.private); 1127 spin_unlock_bh(&dev->irq_lock); 1128 } 1129 1130 static void vduse_vq_irq_inject(struct work_struct *work) 1131 { 1132 struct vduse_virtqueue *vq = container_of(work, 1133 struct vduse_virtqueue, inject); 1134 1135 spin_lock_bh(&vq->irq_lock); 1136 if (vq->ready && vq->cb.callback) 1137 vq->cb.callback(vq->cb.private); 1138 spin_unlock_bh(&vq->irq_lock); 1139 } 1140 1141 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) 1142 { 1143 bool signal = false; 1144 1145 if (!vq->cb.trigger) 1146 return false; 1147 1148 spin_lock_irq(&vq->irq_lock); 1149 if (vq->ready && vq->cb.trigger) { 1150 eventfd_signal(vq->cb.trigger); 1151 signal = true; 1152 } 1153 spin_unlock_irq(&vq->irq_lock); 1154 1155 return signal; 1156 } 1157 1158 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 1159 struct work_struct *irq_work, 1160 int irq_effective_cpu) 1161 { 1162 int ret = -EINVAL; 1163 1164 down_read(&dev->rwsem); 1165 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1166 goto unlock; 1167 1168 ret = 0; 1169 if (irq_effective_cpu == IRQ_UNBOUND) 1170 queue_work(vduse_irq_wq, irq_work); 1171 else 1172 queue_work_on(irq_effective_cpu, 1173 vduse_irq_bound_wq, irq_work); 1174 unlock: 1175 up_read(&dev->rwsem); 1176 1177 return ret; 1178 } 1179 1180 static int vduse_dev_dereg_umem(struct vduse_dev *dev, u32 asid, 1181 u64 iova, u64 size) 1182 { 1183 int ret; 1184 1185 mutex_lock(&dev->as[asid].mem_lock); 1186 ret = -ENOENT; 1187 if (!dev->as[asid].umem) 1188 goto unlock; 1189 1190 ret = -EINVAL; 1191 if (!dev->as[asid].domain) 1192 goto unlock; 1193 1194 if (dev->as[asid].umem->iova != iova || 1195 size != dev->as[asid].domain->bounce_size) 1196 goto unlock; 1197 1198 vduse_domain_remove_user_bounce_pages(dev->as[asid].domain); 1199 unpin_user_pages_dirty_lock(dev->as[asid].umem->pages, 1200 dev->as[asid].umem->npages, true); 1201 atomic64_sub(dev->as[asid].umem->npages, &dev->as[asid].umem->mm->pinned_vm); 1202 mmdrop(dev->as[asid].umem->mm); 1203 vfree(dev->as[asid].umem->pages); 1204 kfree(dev->as[asid].umem); 1205 dev->as[asid].umem = NULL; 1206 ret = 0; 1207 unlock: 1208 mutex_unlock(&dev->as[asid].mem_lock); 1209 return ret; 1210 } 1211 1212 static int vduse_dev_reg_umem(struct vduse_dev *dev, 1213 u32 asid, u64 iova, u64 uaddr, u64 size) 1214 { 1215 struct page **page_list = NULL; 1216 struct vduse_umem *umem = NULL; 1217 long pinned = 0; 1218 unsigned long npages, lock_limit; 1219 int ret; 1220 1221 if (!dev->as[asid].domain || !dev->as[asid].domain->bounce_map || 1222 size != dev->as[asid].domain->bounce_size || 1223 iova != 0 || uaddr & ~PAGE_MASK) 1224 return -EINVAL; 1225 1226 mutex_lock(&dev->as[asid].mem_lock); 1227 ret = -EEXIST; 1228 if (dev->as[asid].umem) 1229 goto unlock; 1230 1231 ret = -ENOMEM; 1232 npages = size >> PAGE_SHIFT; 1233 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 1234 GFP_KERNEL_ACCOUNT); 1235 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 1236 if (!page_list || !umem) 1237 goto unlock; 1238 1239 mmap_read_lock(current->mm); 1240 1241 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 1242 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 1243 goto out; 1244 1245 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 1246 page_list); 1247 if (pinned != npages) { 1248 ret = pinned < 0 ? pinned : -ENOMEM; 1249 goto out; 1250 } 1251 1252 ret = vduse_domain_add_user_bounce_pages(dev->as[asid].domain, 1253 page_list, pinned); 1254 if (ret) 1255 goto out; 1256 1257 atomic64_add(npages, ¤t->mm->pinned_vm); 1258 1259 umem->pages = page_list; 1260 umem->npages = pinned; 1261 umem->iova = iova; 1262 umem->mm = current->mm; 1263 mmgrab(current->mm); 1264 1265 dev->as[asid].umem = umem; 1266 out: 1267 if (ret && pinned > 0) 1268 unpin_user_pages(page_list, pinned); 1269 1270 mmap_read_unlock(current->mm); 1271 unlock: 1272 if (ret) { 1273 vfree(page_list); 1274 kfree(umem); 1275 } 1276 mutex_unlock(&dev->as[asid].mem_lock); 1277 return ret; 1278 } 1279 1280 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) 1281 { 1282 int curr_cpu = vq->irq_effective_cpu; 1283 1284 while (true) { 1285 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity); 1286 if (cpu_online(curr_cpu)) 1287 break; 1288 1289 if (curr_cpu >= nr_cpu_ids) 1290 curr_cpu = IRQ_UNBOUND; 1291 } 1292 1293 vq->irq_effective_cpu = curr_cpu; 1294 } 1295 1296 static int vduse_dev_iotlb_entry(struct vduse_dev *dev, 1297 struct vduse_iotlb_entry_v2 *entry, 1298 struct file **f, uint64_t *capability) 1299 { 1300 u32 asid; 1301 int r = -EINVAL; 1302 struct vhost_iotlb_map *map; 1303 1304 if (entry->start > entry->last || entry->asid >= dev->nas) 1305 return -EINVAL; 1306 1307 asid = array_index_nospec(entry->asid, dev->nas); 1308 mutex_lock(&dev->domain_lock); 1309 1310 if (!dev->as[asid].domain) 1311 goto out; 1312 1313 spin_lock(&dev->as[asid].domain->iotlb_lock); 1314 map = vhost_iotlb_itree_first(dev->as[asid].domain->iotlb, 1315 entry->start, entry->last); 1316 if (map) { 1317 if (f) { 1318 const struct vdpa_map_file *map_file; 1319 1320 map_file = (struct vdpa_map_file *)map->opaque; 1321 entry->offset = map_file->offset; 1322 *f = get_file(map_file->file); 1323 } 1324 entry->start = map->start; 1325 entry->last = map->last; 1326 entry->perm = map->perm; 1327 if (capability) { 1328 *capability = 0; 1329 1330 if (dev->as[asid].domain->bounce_map && map->start == 0 && 1331 map->last == dev->as[asid].domain->bounce_size - 1) 1332 *capability |= VDUSE_IOVA_CAP_UMEM; 1333 } 1334 1335 r = 0; 1336 } 1337 spin_unlock(&dev->as[asid].domain->iotlb_lock); 1338 1339 out: 1340 mutex_unlock(&dev->domain_lock); 1341 return r; 1342 } 1343 1344 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1345 unsigned long arg) 1346 { 1347 struct vduse_dev *dev = file->private_data; 1348 void __user *argp = (void __user *)arg; 1349 int ret; 1350 1351 if (unlikely(dev->broken)) 1352 return -EPERM; 1353 1354 switch (cmd) { 1355 case VDUSE_IOTLB_GET_FD: 1356 case VDUSE_IOTLB_GET_FD2: { 1357 struct vduse_iotlb_entry_v2 entry = {0}; 1358 struct file *f = NULL; 1359 1360 ret = -ENOIOCTLCMD; 1361 if (dev->api_version < VDUSE_API_VERSION_1 && 1362 cmd == VDUSE_IOTLB_GET_FD2) 1363 break; 1364 1365 ret = -EFAULT; 1366 if (copy_from_user(&entry, argp, _IOC_SIZE(cmd))) 1367 break; 1368 1369 ret = -EINVAL; 1370 if (!is_mem_zero((const char *)entry.reserved, 1371 sizeof(entry.reserved))) 1372 break; 1373 1374 ret = vduse_dev_iotlb_entry(dev, &entry, &f, NULL); 1375 if (ret) 1376 break; 1377 1378 ret = -EINVAL; 1379 if (!f) 1380 break; 1381 1382 ret = copy_to_user(argp, &entry, _IOC_SIZE(cmd)); 1383 if (ret) { 1384 ret = -EFAULT; 1385 fput(f); 1386 break; 1387 } 1388 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); 1389 fput(f); 1390 break; 1391 } 1392 case VDUSE_DEV_GET_FEATURES: 1393 /* 1394 * Just mirror what driver wrote here. 1395 * The driver is expected to check FEATURE_OK later. 1396 */ 1397 ret = put_user(dev->driver_features, (u64 __user *)argp); 1398 break; 1399 case VDUSE_DEV_SET_CONFIG: { 1400 struct vduse_config_data config; 1401 unsigned long size = offsetof(struct vduse_config_data, 1402 buffer); 1403 1404 ret = -EFAULT; 1405 if (copy_from_user(&config, argp, size)) 1406 break; 1407 1408 ret = -EINVAL; 1409 if (config.offset > dev->config_size || 1410 config.length == 0 || 1411 config.length > dev->config_size - config.offset) 1412 break; 1413 1414 ret = -EFAULT; 1415 if (copy_from_user(dev->config + config.offset, argp + size, 1416 config.length)) 1417 break; 1418 1419 ret = 0; 1420 break; 1421 } 1422 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1423 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); 1424 break; 1425 case VDUSE_VQ_SETUP: { 1426 struct vduse_vq_config config; 1427 u32 index; 1428 1429 ret = -EFAULT; 1430 if (copy_from_user(&config, argp, sizeof(config))) 1431 break; 1432 1433 ret = -EINVAL; 1434 if (config.index >= dev->vq_num) 1435 break; 1436 1437 if (dev->api_version < VDUSE_API_VERSION_1) { 1438 if (config.group) 1439 break; 1440 } else { 1441 if (config.group >= dev->ngroups) 1442 break; 1443 if (dev->status & VIRTIO_CONFIG_S_DRIVER_OK) 1444 break; 1445 } 1446 1447 if (config.reserved1 || 1448 !is_mem_zero((const char *)config.reserved2, 1449 sizeof(config.reserved2))) 1450 break; 1451 1452 index = array_index_nospec(config.index, dev->vq_num); 1453 dev->vqs[index]->num_max = config.max_size; 1454 dev->vqs[index]->group = config.group; 1455 ret = 0; 1456 break; 1457 } 1458 case VDUSE_VQ_GET_INFO: { 1459 struct vduse_vq_info vq_info; 1460 struct vduse_virtqueue *vq; 1461 u32 index; 1462 1463 ret = -EFAULT; 1464 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1465 break; 1466 1467 ret = -EINVAL; 1468 if (vq_info.index >= dev->vq_num) 1469 break; 1470 1471 index = array_index_nospec(vq_info.index, dev->vq_num); 1472 vq = dev->vqs[index]; 1473 vq_info.desc_addr = vq->desc_addr; 1474 vq_info.driver_addr = vq->driver_addr; 1475 vq_info.device_addr = vq->device_addr; 1476 vq_info.num = vq->num; 1477 1478 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1479 vq_info.packed.last_avail_counter = 1480 vq->state.packed.last_avail_counter; 1481 vq_info.packed.last_avail_idx = 1482 vq->state.packed.last_avail_idx; 1483 vq_info.packed.last_used_counter = 1484 vq->state.packed.last_used_counter; 1485 vq_info.packed.last_used_idx = 1486 vq->state.packed.last_used_idx; 1487 } else 1488 vq_info.split.avail_index = 1489 vq->state.split.avail_index; 1490 1491 vq_info.ready = vq->ready; 1492 1493 ret = -EFAULT; 1494 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1495 break; 1496 1497 ret = 0; 1498 break; 1499 } 1500 case VDUSE_VQ_SETUP_KICKFD: { 1501 struct vduse_vq_eventfd eventfd; 1502 1503 ret = -EFAULT; 1504 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1505 break; 1506 1507 ret = vduse_kickfd_setup(dev, &eventfd); 1508 break; 1509 } 1510 case VDUSE_VQ_INJECT_IRQ: { 1511 u32 index; 1512 1513 ret = -EFAULT; 1514 if (get_user(index, (u32 __user *)argp)) 1515 break; 1516 1517 ret = -EINVAL; 1518 if (index >= dev->vq_num) 1519 break; 1520 1521 ret = 0; 1522 index = array_index_nospec(index, dev->vq_num); 1523 if (!vduse_vq_signal_irqfd(dev->vqs[index])) { 1524 vduse_vq_update_effective_cpu(dev->vqs[index]); 1525 ret = vduse_dev_queue_irq_work(dev, 1526 &dev->vqs[index]->inject, 1527 dev->vqs[index]->irq_effective_cpu); 1528 } 1529 break; 1530 } 1531 case VDUSE_IOTLB_REG_UMEM: { 1532 struct vduse_iova_umem umem; 1533 u32 asid; 1534 1535 ret = -EFAULT; 1536 if (copy_from_user(&umem, argp, sizeof(umem))) 1537 break; 1538 1539 ret = -EINVAL; 1540 if (!is_mem_zero((const char *)umem.reserved, 1541 sizeof(umem.reserved)) || 1542 (dev->api_version < VDUSE_API_VERSION_1 && 1543 umem.asid != 0) || umem.asid >= dev->nas) 1544 break; 1545 1546 mutex_lock(&dev->domain_lock); 1547 asid = array_index_nospec(umem.asid, dev->nas); 1548 ret = vduse_dev_reg_umem(dev, asid, umem.iova, 1549 umem.uaddr, umem.size); 1550 mutex_unlock(&dev->domain_lock); 1551 break; 1552 } 1553 case VDUSE_IOTLB_DEREG_UMEM: { 1554 struct vduse_iova_umem umem; 1555 u32 asid; 1556 1557 ret = -EFAULT; 1558 if (copy_from_user(&umem, argp, sizeof(umem))) 1559 break; 1560 1561 ret = -EINVAL; 1562 if (!is_mem_zero((const char *)umem.reserved, 1563 sizeof(umem.reserved)) || 1564 (dev->api_version < VDUSE_API_VERSION_1 && 1565 umem.asid != 0) || 1566 umem.asid >= dev->nas) 1567 break; 1568 1569 mutex_lock(&dev->domain_lock); 1570 asid = array_index_nospec(umem.asid, dev->nas); 1571 ret = vduse_dev_dereg_umem(dev, asid, umem.iova, 1572 umem.size); 1573 mutex_unlock(&dev->domain_lock); 1574 break; 1575 } 1576 case VDUSE_IOTLB_GET_INFO: { 1577 struct vduse_iova_info info; 1578 struct vduse_iotlb_entry_v2 entry; 1579 1580 ret = -EFAULT; 1581 if (copy_from_user(&info, argp, sizeof(info))) 1582 break; 1583 1584 if (!is_mem_zero((const char *)info.reserved, 1585 sizeof(info.reserved))) 1586 break; 1587 1588 if (dev->api_version < VDUSE_API_VERSION_1) { 1589 if (info.asid) 1590 break; 1591 } else if (info.asid >= dev->nas) 1592 break; 1593 1594 entry.start = info.start; 1595 entry.last = info.last; 1596 entry.asid = info.asid; 1597 ret = vduse_dev_iotlb_entry(dev, &entry, NULL, 1598 &info.capability); 1599 if (ret < 0) 1600 break; 1601 1602 info.start = entry.start; 1603 info.last = entry.last; 1604 info.asid = entry.asid; 1605 1606 ret = -EFAULT; 1607 if (copy_to_user(argp, &info, sizeof(info))) 1608 break; 1609 1610 ret = 0; 1611 break; 1612 } 1613 default: 1614 ret = -ENOIOCTLCMD; 1615 break; 1616 } 1617 1618 return ret; 1619 } 1620 1621 static int vduse_dev_release(struct inode *inode, struct file *file) 1622 { 1623 struct vduse_dev *dev = file->private_data; 1624 1625 mutex_lock(&dev->domain_lock); 1626 for (int i = 0; i < dev->nas; i++) 1627 if (dev->as[i].domain) 1628 vduse_dev_dereg_umem(dev, i, 0, 1629 dev->as[i].domain->bounce_size); 1630 mutex_unlock(&dev->domain_lock); 1631 spin_lock(&dev->msg_lock); 1632 /* Make sure the inflight messages can processed after reconncection */ 1633 list_splice_init(&dev->recv_list, &dev->send_list); 1634 spin_unlock(&dev->msg_lock); 1635 dev->connected = false; 1636 1637 return 0; 1638 } 1639 1640 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1641 { 1642 struct vduse_dev *dev; 1643 1644 mutex_lock(&vduse_lock); 1645 dev = idr_find(&vduse_idr, minor); 1646 mutex_unlock(&vduse_lock); 1647 1648 return dev; 1649 } 1650 1651 static int vduse_dev_open(struct inode *inode, struct file *file) 1652 { 1653 int ret; 1654 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1655 1656 if (!dev) 1657 return -ENODEV; 1658 1659 ret = -EBUSY; 1660 mutex_lock(&dev->lock); 1661 if (dev->connected) 1662 goto unlock; 1663 1664 ret = 0; 1665 dev->connected = true; 1666 file->private_data = dev; 1667 unlock: 1668 mutex_unlock(&dev->lock); 1669 1670 return ret; 1671 } 1672 1673 static const struct file_operations vduse_dev_fops = { 1674 .owner = THIS_MODULE, 1675 .open = vduse_dev_open, 1676 .release = vduse_dev_release, 1677 .read_iter = vduse_dev_read_iter, 1678 .write_iter = vduse_dev_write_iter, 1679 .poll = vduse_dev_poll, 1680 .unlocked_ioctl = vduse_dev_ioctl, 1681 .compat_ioctl = compat_ptr_ioctl, 1682 .llseek = noop_llseek, 1683 }; 1684 1685 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf) 1686 { 1687 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity)); 1688 } 1689 1690 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq, 1691 const char *buf, size_t count) 1692 { 1693 cpumask_var_t new_value; 1694 int ret; 1695 1696 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) 1697 return -ENOMEM; 1698 1699 ret = cpumask_parse(buf, new_value); 1700 if (ret) 1701 goto free_mask; 1702 1703 ret = -EINVAL; 1704 if (!cpumask_intersects(new_value, cpu_online_mask)) 1705 goto free_mask; 1706 1707 cpumask_copy(&vq->irq_affinity, new_value); 1708 ret = count; 1709 free_mask: 1710 free_cpumask_var(new_value); 1711 return ret; 1712 } 1713 1714 struct vq_sysfs_entry { 1715 struct attribute attr; 1716 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf); 1717 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf, 1718 size_t count); 1719 }; 1720 1721 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity); 1722 1723 static struct attribute *vq_attrs[] = { 1724 &irq_cb_affinity_attr.attr, 1725 NULL, 1726 }; 1727 ATTRIBUTE_GROUPS(vq); 1728 1729 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr, 1730 char *buf) 1731 { 1732 struct vduse_virtqueue *vq = container_of(kobj, 1733 struct vduse_virtqueue, kobj); 1734 struct vq_sysfs_entry *entry = container_of(attr, 1735 struct vq_sysfs_entry, attr); 1736 1737 if (!entry->show) 1738 return -EIO; 1739 1740 return entry->show(vq, buf); 1741 } 1742 1743 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr, 1744 const char *buf, size_t count) 1745 { 1746 struct vduse_virtqueue *vq = container_of(kobj, 1747 struct vduse_virtqueue, kobj); 1748 struct vq_sysfs_entry *entry = container_of(attr, 1749 struct vq_sysfs_entry, attr); 1750 1751 if (!entry->store) 1752 return -EIO; 1753 1754 return entry->store(vq, buf, count); 1755 } 1756 1757 static const struct sysfs_ops vq_sysfs_ops = { 1758 .show = vq_attr_show, 1759 .store = vq_attr_store, 1760 }; 1761 1762 static void vq_release(struct kobject *kobj) 1763 { 1764 struct vduse_virtqueue *vq = container_of(kobj, 1765 struct vduse_virtqueue, kobj); 1766 kfree(vq); 1767 } 1768 1769 static const struct kobj_type vq_type = { 1770 .release = vq_release, 1771 .sysfs_ops = &vq_sysfs_ops, 1772 .default_groups = vq_groups, 1773 }; 1774 1775 static char *vduse_devnode(const struct device *dev, umode_t *mode) 1776 { 1777 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1778 } 1779 1780 static const struct class vduse_class = { 1781 .name = "vduse", 1782 .devnode = vduse_devnode, 1783 }; 1784 1785 static void vduse_dev_deinit_vqs(struct vduse_dev *dev) 1786 { 1787 int i; 1788 1789 if (!dev->vqs) 1790 return; 1791 1792 for (i = 0; i < dev->vq_num; i++) 1793 kobject_put(&dev->vqs[i]->kobj); 1794 kfree(dev->vqs); 1795 } 1796 1797 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) 1798 { 1799 int ret, i; 1800 1801 dev->vq_align = vq_align; 1802 dev->vq_num = vq_num; 1803 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1804 if (!dev->vqs) 1805 return -ENOMEM; 1806 1807 for (i = 0; i < vq_num; i++) { 1808 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); 1809 if (!dev->vqs[i]) { 1810 ret = -ENOMEM; 1811 goto err; 1812 } 1813 1814 dev->vqs[i]->index = i; 1815 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND; 1816 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); 1817 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); 1818 spin_lock_init(&dev->vqs[i]->kick_lock); 1819 spin_lock_init(&dev->vqs[i]->irq_lock); 1820 cpumask_setall(&dev->vqs[i]->irq_affinity); 1821 1822 kobject_init(&dev->vqs[i]->kobj, &vq_type); 1823 ret = kobject_add(&dev->vqs[i]->kobj, 1824 &dev->dev->kobj, "vq%d", i); 1825 if (ret) { 1826 kfree(dev->vqs[i]); 1827 goto err; 1828 } 1829 } 1830 1831 return 0; 1832 err: 1833 while (i--) 1834 kobject_put(&dev->vqs[i]->kobj); 1835 kfree(dev->vqs); 1836 dev->vqs = NULL; 1837 return ret; 1838 } 1839 1840 static struct vduse_dev *vduse_dev_create(void) 1841 { 1842 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1843 1844 if (!dev) 1845 return NULL; 1846 1847 mutex_init(&dev->lock); 1848 mutex_init(&dev->domain_lock); 1849 spin_lock_init(&dev->msg_lock); 1850 INIT_LIST_HEAD(&dev->send_list); 1851 INIT_LIST_HEAD(&dev->recv_list); 1852 spin_lock_init(&dev->irq_lock); 1853 init_rwsem(&dev->rwsem); 1854 1855 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1856 init_waitqueue_head(&dev->waitq); 1857 1858 return dev; 1859 } 1860 1861 static void vduse_dev_destroy(struct vduse_dev *dev) 1862 { 1863 kfree(dev); 1864 } 1865 1866 static struct vduse_dev *vduse_find_dev(const char *name) 1867 { 1868 struct vduse_dev *dev; 1869 int id; 1870 1871 idr_for_each_entry(&vduse_idr, dev, id) 1872 if (!strcmp(dev->name, name)) 1873 return dev; 1874 1875 return NULL; 1876 } 1877 1878 static int vduse_destroy_dev(char *name) 1879 { 1880 struct vduse_dev *dev = vduse_find_dev(name); 1881 1882 if (!dev) 1883 return -EINVAL; 1884 1885 mutex_lock(&dev->lock); 1886 if (dev->vdev || dev->connected) { 1887 mutex_unlock(&dev->lock); 1888 return -EBUSY; 1889 } 1890 dev->connected = true; 1891 mutex_unlock(&dev->lock); 1892 1893 vduse_dev_reset(dev); 1894 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1895 idr_remove(&vduse_idr, dev->minor); 1896 kvfree(dev->config); 1897 vduse_dev_deinit_vqs(dev); 1898 for (int i = 0; i < dev->nas; i++) { 1899 if (dev->as[i].domain) 1900 vduse_domain_destroy(dev->as[i].domain); 1901 } 1902 kfree(dev->as); 1903 kfree(dev->name); 1904 kfree(dev->groups); 1905 vduse_dev_destroy(dev); 1906 module_put(THIS_MODULE); 1907 1908 return 0; 1909 } 1910 1911 static bool device_is_allowed(u32 device_id) 1912 { 1913 int i; 1914 1915 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1916 if (allowed_device_id[i] == device_id) 1917 return true; 1918 1919 return false; 1920 } 1921 1922 static bool features_is_valid(struct vduse_dev_config *config) 1923 { 1924 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 1925 return false; 1926 1927 /* Now we only support read-only configuration space */ 1928 if ((config->device_id == VIRTIO_ID_BLOCK) && 1929 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) 1930 return false; 1931 else if ((config->device_id == VIRTIO_ID_NET) && 1932 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1933 return false; 1934 1935 if ((config->device_id == VIRTIO_ID_NET) && 1936 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1))) 1937 return false; 1938 1939 return true; 1940 } 1941 1942 static bool vduse_validate_config(struct vduse_dev_config *config, 1943 u64 api_version) 1944 { 1945 if (!is_mem_zero((const char *)config->reserved, 1946 sizeof(config->reserved))) 1947 return false; 1948 1949 if (api_version < VDUSE_API_VERSION_1 && 1950 (config->ngroups || config->nas)) 1951 return false; 1952 1953 if (api_version >= VDUSE_API_VERSION_1) { 1954 if (!config->ngroups || config->ngroups > VDUSE_DEV_MAX_GROUPS) 1955 return false; 1956 1957 if (!config->nas || config->nas > VDUSE_DEV_MAX_AS) 1958 return false; 1959 } 1960 1961 if (config->vq_align > PAGE_SIZE) 1962 return false; 1963 1964 if (config->config_size > PAGE_SIZE) 1965 return false; 1966 1967 if (config->vq_num > 0xffff) 1968 return false; 1969 1970 if (!config->name[0]) 1971 return false; 1972 1973 if (!device_is_allowed(config->device_id)) 1974 return false; 1975 1976 if (!features_is_valid(config)) 1977 return false; 1978 1979 return true; 1980 } 1981 1982 static ssize_t msg_timeout_show(struct device *device, 1983 struct device_attribute *attr, char *buf) 1984 { 1985 struct vduse_dev *dev = dev_get_drvdata(device); 1986 1987 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1988 } 1989 1990 static ssize_t msg_timeout_store(struct device *device, 1991 struct device_attribute *attr, 1992 const char *buf, size_t count) 1993 { 1994 struct vduse_dev *dev = dev_get_drvdata(device); 1995 int ret; 1996 1997 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1998 if (ret < 0) 1999 return ret; 2000 2001 return count; 2002 } 2003 2004 static DEVICE_ATTR_RW(msg_timeout); 2005 2006 static ssize_t bounce_size_show(struct device *device, 2007 struct device_attribute *attr, char *buf) 2008 { 2009 struct vduse_dev *dev = dev_get_drvdata(device); 2010 2011 return sysfs_emit(buf, "%u\n", dev->bounce_size); 2012 } 2013 2014 static ssize_t bounce_size_store(struct device *device, 2015 struct device_attribute *attr, 2016 const char *buf, size_t count) 2017 { 2018 struct vduse_dev *dev = dev_get_drvdata(device); 2019 unsigned int bounce_size; 2020 int ret; 2021 2022 ret = -EPERM; 2023 mutex_lock(&dev->domain_lock); 2024 /* Assuming that if the first domain is allocated, all are allocated */ 2025 if (dev->as[0].domain) 2026 goto unlock; 2027 2028 ret = kstrtouint(buf, 10, &bounce_size); 2029 if (ret < 0) 2030 goto unlock; 2031 2032 ret = -EINVAL; 2033 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE || 2034 bounce_size < VDUSE_MIN_BOUNCE_SIZE) 2035 goto unlock; 2036 2037 dev->bounce_size = bounce_size & PAGE_MASK; 2038 ret = count; 2039 unlock: 2040 mutex_unlock(&dev->domain_lock); 2041 return ret; 2042 } 2043 2044 static DEVICE_ATTR_RW(bounce_size); 2045 2046 static struct attribute *vduse_dev_attrs[] = { 2047 &dev_attr_msg_timeout.attr, 2048 &dev_attr_bounce_size.attr, 2049 NULL 2050 }; 2051 2052 ATTRIBUTE_GROUPS(vduse_dev); 2053 2054 static int vduse_create_dev(struct vduse_dev_config *config, 2055 void *config_buf, u64 api_version) 2056 { 2057 int ret; 2058 struct vduse_dev *dev; 2059 2060 ret = -EPERM; 2061 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN)) 2062 goto err; 2063 2064 ret = -EEXIST; 2065 if (vduse_find_dev(config->name)) 2066 goto err; 2067 2068 ret = -ENOMEM; 2069 dev = vduse_dev_create(); 2070 if (!dev) 2071 goto err; 2072 2073 dev->api_version = api_version; 2074 dev->device_features = config->features; 2075 dev->device_id = config->device_id; 2076 dev->vendor_id = config->vendor_id; 2077 2078 dev->nas = (dev->api_version < VDUSE_API_VERSION_1) ? 1 : config->nas; 2079 dev->as = kcalloc(dev->nas, sizeof(dev->as[0]), GFP_KERNEL); 2080 if (!dev->as) 2081 goto err_as; 2082 for (int i = 0; i < dev->nas; i++) 2083 mutex_init(&dev->as[i].mem_lock); 2084 2085 dev->ngroups = (dev->api_version < VDUSE_API_VERSION_1) 2086 ? 1 2087 : config->ngroups; 2088 dev->groups = kcalloc(dev->ngroups, sizeof(dev->groups[0]), 2089 GFP_KERNEL); 2090 if (!dev->groups) 2091 goto err_vq_groups; 2092 for (u32 i = 0; i < dev->ngroups; ++i) { 2093 dev->groups[i].dev = dev; 2094 rwlock_init(&dev->groups[i].as_lock); 2095 dev->groups[i].as = &dev->as[0]; 2096 } 2097 2098 dev->name = kstrdup(config->name, GFP_KERNEL); 2099 if (!dev->name) 2100 goto err_str; 2101 2102 dev->bounce_size = VDUSE_BOUNCE_SIZE; 2103 dev->config = config_buf; 2104 dev->config_size = config->config_size; 2105 2106 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 2107 if (ret < 0) 2108 goto err_idr; 2109 2110 dev->minor = ret; 2111 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 2112 dev->dev = device_create_with_groups(&vduse_class, NULL, 2113 MKDEV(MAJOR(vduse_major), dev->minor), 2114 dev, vduse_dev_groups, "%s", config->name); 2115 if (IS_ERR(dev->dev)) { 2116 ret = PTR_ERR(dev->dev); 2117 goto err_dev; 2118 } 2119 2120 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); 2121 if (ret) 2122 goto err_vqs; 2123 2124 __module_get(THIS_MODULE); 2125 2126 return 0; 2127 err_vqs: 2128 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 2129 err_dev: 2130 idr_remove(&vduse_idr, dev->minor); 2131 err_idr: 2132 kfree(dev->name); 2133 err_str: 2134 kfree(dev->groups); 2135 err_vq_groups: 2136 kfree(dev->as); 2137 err_as: 2138 vduse_dev_destroy(dev); 2139 err: 2140 return ret; 2141 } 2142 2143 static long vduse_ioctl(struct file *file, unsigned int cmd, 2144 unsigned long arg) 2145 { 2146 int ret; 2147 void __user *argp = (void __user *)arg; 2148 struct vduse_control *control = file->private_data; 2149 2150 mutex_lock(&vduse_lock); 2151 switch (cmd) { 2152 case VDUSE_GET_API_VERSION: 2153 if (control->api_version == VDUSE_API_VERSION_NOT_ASKED) 2154 control->api_version = VDUSE_API_VERSION_1; 2155 ret = put_user(control->api_version, (u64 __user *)argp); 2156 break; 2157 case VDUSE_SET_API_VERSION: { 2158 u64 api_version; 2159 2160 ret = -EFAULT; 2161 if (get_user(api_version, (u64 __user *)argp)) 2162 break; 2163 2164 ret = -EINVAL; 2165 if (api_version > VDUSE_API_VERSION_1) 2166 break; 2167 2168 ret = 0; 2169 control->api_version = api_version; 2170 break; 2171 } 2172 case VDUSE_CREATE_DEV: { 2173 struct vduse_dev_config config; 2174 unsigned long size = offsetof(struct vduse_dev_config, config); 2175 void *buf; 2176 2177 ret = -EFAULT; 2178 if (copy_from_user(&config, argp, size)) 2179 break; 2180 2181 ret = -EINVAL; 2182 if (control->api_version == VDUSE_API_VERSION_NOT_ASKED) 2183 control->api_version = VDUSE_API_VERSION; 2184 if (!vduse_validate_config(&config, control->api_version)) 2185 break; 2186 2187 buf = vmemdup_user(argp + size, config.config_size); 2188 if (IS_ERR(buf)) { 2189 ret = PTR_ERR(buf); 2190 break; 2191 } 2192 config.name[VDUSE_NAME_MAX - 1] = '\0'; 2193 ret = vduse_create_dev(&config, buf, control->api_version); 2194 if (ret) 2195 kvfree(buf); 2196 break; 2197 } 2198 case VDUSE_DESTROY_DEV: { 2199 char name[VDUSE_NAME_MAX]; 2200 2201 ret = -EFAULT; 2202 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 2203 break; 2204 2205 name[VDUSE_NAME_MAX - 1] = '\0'; 2206 ret = vduse_destroy_dev(name); 2207 break; 2208 } 2209 default: 2210 ret = -EINVAL; 2211 break; 2212 } 2213 mutex_unlock(&vduse_lock); 2214 2215 return ret; 2216 } 2217 2218 static int vduse_release(struct inode *inode, struct file *file) 2219 { 2220 struct vduse_control *control = file->private_data; 2221 2222 kfree(control); 2223 return 0; 2224 } 2225 2226 static int vduse_open(struct inode *inode, struct file *file) 2227 { 2228 struct vduse_control *control; 2229 2230 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 2231 if (!control) 2232 return -ENOMEM; 2233 2234 control->api_version = VDUSE_API_VERSION_NOT_ASKED; 2235 file->private_data = control; 2236 2237 return 0; 2238 } 2239 2240 static const struct file_operations vduse_ctrl_fops = { 2241 .owner = THIS_MODULE, 2242 .open = vduse_open, 2243 .release = vduse_release, 2244 .unlocked_ioctl = vduse_ioctl, 2245 .compat_ioctl = compat_ptr_ioctl, 2246 .llseek = noop_llseek, 2247 }; 2248 2249 struct vduse_mgmt_dev { 2250 struct vdpa_mgmt_dev mgmt_dev; 2251 struct device dev; 2252 }; 2253 2254 static struct vduse_mgmt_dev *vduse_mgmt; 2255 2256 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 2257 { 2258 struct vduse_vdpa *vdev; 2259 2260 if (dev->vdev) 2261 return -EEXIST; 2262 2263 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 2264 &vduse_vdpa_config_ops, &vduse_map_ops, 2265 dev->ngroups, dev->nas, name, true); 2266 if (IS_ERR(vdev)) 2267 return PTR_ERR(vdev); 2268 2269 dev->vdev = vdev; 2270 vdev->dev = dev; 2271 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 2272 2273 return 0; 2274 } 2275 2276 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 2277 const struct vdpa_dev_set_config *config) 2278 { 2279 struct vduse_dev *dev; 2280 size_t domain_bounce_size; 2281 int ret, i; 2282 2283 mutex_lock(&vduse_lock); 2284 dev = vduse_find_dev(name); 2285 if (!dev || !vduse_dev_is_ready(dev)) { 2286 mutex_unlock(&vduse_lock); 2287 return -EINVAL; 2288 } 2289 ret = vduse_dev_init_vdpa(dev, name); 2290 mutex_unlock(&vduse_lock); 2291 if (ret) 2292 return ret; 2293 2294 mutex_lock(&dev->domain_lock); 2295 ret = 0; 2296 2297 domain_bounce_size = dev->bounce_size / dev->nas; 2298 for (i = 0; i < dev->nas; ++i) { 2299 dev->as[i].domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 2300 domain_bounce_size); 2301 if (!dev->as[i].domain) { 2302 ret = -ENOMEM; 2303 goto err; 2304 } 2305 } 2306 2307 mutex_unlock(&dev->domain_lock); 2308 2309 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 2310 if (ret) 2311 goto err_register; 2312 2313 return 0; 2314 2315 err_register: 2316 mutex_lock(&dev->domain_lock); 2317 2318 err: 2319 for (int j = 0; j < i; j++) { 2320 if (dev->as[j].domain) { 2321 vduse_domain_destroy(dev->as[j].domain); 2322 dev->as[j].domain = NULL; 2323 } 2324 } 2325 mutex_unlock(&dev->domain_lock); 2326 2327 put_device(&dev->vdev->vdpa.dev); 2328 2329 return ret; 2330 } 2331 2332 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 2333 { 2334 _vdpa_unregister_device(dev); 2335 } 2336 2337 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 2338 .dev_add = vdpa_dev_add, 2339 .dev_del = vdpa_dev_del, 2340 }; 2341 2342 static struct virtio_device_id id_table[] = { 2343 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 2344 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 2345 { 0 }, 2346 }; 2347 2348 static void vduse_mgmtdev_release(struct device *dev) 2349 { 2350 struct vduse_mgmt_dev *mgmt_dev; 2351 2352 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 2353 kfree(mgmt_dev); 2354 } 2355 2356 static int vduse_mgmtdev_init(void) 2357 { 2358 int ret; 2359 2360 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 2361 if (!vduse_mgmt) 2362 return -ENOMEM; 2363 2364 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 2365 if (ret) { 2366 kfree(vduse_mgmt); 2367 return ret; 2368 } 2369 2370 vduse_mgmt->dev.release = vduse_mgmtdev_release; 2371 2372 ret = device_register(&vduse_mgmt->dev); 2373 if (ret) 2374 goto dev_reg_err; 2375 2376 vduse_mgmt->mgmt_dev.id_table = id_table; 2377 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 2378 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 2379 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 2380 if (ret) 2381 device_unregister(&vduse_mgmt->dev); 2382 2383 return ret; 2384 2385 dev_reg_err: 2386 put_device(&vduse_mgmt->dev); 2387 return ret; 2388 } 2389 2390 static void vduse_mgmtdev_exit(void) 2391 { 2392 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 2393 device_unregister(&vduse_mgmt->dev); 2394 } 2395 2396 static int vduse_init(void) 2397 { 2398 int ret; 2399 struct device *dev; 2400 2401 ret = class_register(&vduse_class); 2402 if (ret) 2403 return ret; 2404 2405 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 2406 if (ret) 2407 goto err_chardev_region; 2408 2409 /* /dev/vduse/control */ 2410 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 2411 vduse_ctrl_cdev.owner = THIS_MODULE; 2412 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 2413 if (ret) 2414 goto err_ctrl_cdev; 2415 2416 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control"); 2417 if (IS_ERR(dev)) { 2418 ret = PTR_ERR(dev); 2419 goto err_device; 2420 } 2421 2422 /* /dev/vduse/$DEVICE */ 2423 cdev_init(&vduse_cdev, &vduse_dev_fops); 2424 vduse_cdev.owner = THIS_MODULE; 2425 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 2426 VDUSE_DEV_MAX - 1); 2427 if (ret) 2428 goto err_cdev; 2429 2430 ret = -ENOMEM; 2431 vduse_irq_wq = alloc_workqueue("vduse-irq", 2432 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 2433 if (!vduse_irq_wq) 2434 goto err_wq; 2435 2436 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", 2437 WQ_HIGHPRI | WQ_PERCPU, 0); 2438 if (!vduse_irq_bound_wq) 2439 goto err_bound_wq; 2440 2441 ret = vduse_domain_init(); 2442 if (ret) 2443 goto err_domain; 2444 2445 ret = vduse_mgmtdev_init(); 2446 if (ret) 2447 goto err_mgmtdev; 2448 2449 return 0; 2450 err_mgmtdev: 2451 vduse_domain_exit(); 2452 err_domain: 2453 destroy_workqueue(vduse_irq_bound_wq); 2454 err_bound_wq: 2455 destroy_workqueue(vduse_irq_wq); 2456 err_wq: 2457 cdev_del(&vduse_cdev); 2458 err_cdev: 2459 device_destroy(&vduse_class, vduse_major); 2460 err_device: 2461 cdev_del(&vduse_ctrl_cdev); 2462 err_ctrl_cdev: 2463 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2464 err_chardev_region: 2465 class_unregister(&vduse_class); 2466 return ret; 2467 } 2468 module_init(vduse_init); 2469 2470 static void vduse_exit(void) 2471 { 2472 vduse_mgmtdev_exit(); 2473 vduse_domain_exit(); 2474 destroy_workqueue(vduse_irq_bound_wq); 2475 destroy_workqueue(vduse_irq_wq); 2476 cdev_del(&vduse_cdev); 2477 device_destroy(&vduse_class, vduse_major); 2478 cdev_del(&vduse_ctrl_cdev); 2479 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2480 class_unregister(&vduse_class); 2481 idr_destroy(&vduse_idr); 2482 } 2483 module_exit(vduse_exit); 2484 2485 MODULE_LICENSE(DRV_LICENSE); 2486 MODULE_AUTHOR(DRV_AUTHOR); 2487 MODULE_DESCRIPTION(DRV_DESC); 2488