1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/eventfd.h> 16 #include <linux/slab.h> 17 #include <linux/wait.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/uio.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vmalloc.h> 25 #include <linux/sched/mm.h> 26 #include <uapi/linux/vduse.h> 27 #include <uapi/linux/vdpa.h> 28 #include <uapi/linux/virtio_config.h> 29 #include <uapi/linux/virtio_ids.h> 30 #include <uapi/linux/virtio_blk.h> 31 #include <linux/mod_devicetable.h> 32 33 #include "iova_domain.h" 34 35 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 36 #define DRV_DESC "vDPA Device in Userspace" 37 #define DRV_LICENSE "GPL v2" 38 39 #define VDUSE_DEV_MAX (1U << MINORBITS) 40 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 41 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) 42 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 43 44 struct vduse_virtqueue { 45 u16 index; 46 u16 num_max; 47 u32 num; 48 u64 desc_addr; 49 u64 driver_addr; 50 u64 device_addr; 51 struct vdpa_vq_state state; 52 bool ready; 53 bool kicked; 54 spinlock_t kick_lock; 55 spinlock_t irq_lock; 56 struct eventfd_ctx *kickfd; 57 struct vdpa_callback cb; 58 struct work_struct inject; 59 struct work_struct kick; 60 }; 61 62 struct vduse_dev; 63 64 struct vduse_vdpa { 65 struct vdpa_device vdpa; 66 struct vduse_dev *dev; 67 }; 68 69 struct vduse_umem { 70 unsigned long iova; 71 unsigned long npages; 72 struct page **pages; 73 struct mm_struct *mm; 74 }; 75 76 struct vduse_dev { 77 struct vduse_vdpa *vdev; 78 struct device *dev; 79 struct vduse_virtqueue *vqs; 80 struct vduse_iova_domain *domain; 81 char *name; 82 struct mutex lock; 83 spinlock_t msg_lock; 84 u64 msg_unique; 85 u32 msg_timeout; 86 wait_queue_head_t waitq; 87 struct list_head send_list; 88 struct list_head recv_list; 89 struct vdpa_callback config_cb; 90 struct work_struct inject; 91 spinlock_t irq_lock; 92 struct rw_semaphore rwsem; 93 int minor; 94 bool broken; 95 bool connected; 96 u64 api_version; 97 u64 device_features; 98 u64 driver_features; 99 u32 device_id; 100 u32 vendor_id; 101 u32 generation; 102 u32 config_size; 103 void *config; 104 u8 status; 105 u32 vq_num; 106 u32 vq_align; 107 struct vduse_umem *umem; 108 struct mutex mem_lock; 109 }; 110 111 struct vduse_dev_msg { 112 struct vduse_dev_request req; 113 struct vduse_dev_response resp; 114 struct list_head list; 115 wait_queue_head_t waitq; 116 bool completed; 117 }; 118 119 struct vduse_control { 120 u64 api_version; 121 }; 122 123 static DEFINE_MUTEX(vduse_lock); 124 static DEFINE_IDR(vduse_idr); 125 126 static dev_t vduse_major; 127 static struct class *vduse_class; 128 static struct cdev vduse_ctrl_cdev; 129 static struct cdev vduse_cdev; 130 static struct workqueue_struct *vduse_irq_wq; 131 132 static u32 allowed_device_id[] = { 133 VIRTIO_ID_BLOCK, 134 }; 135 136 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 137 { 138 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 139 140 return vdev->dev; 141 } 142 143 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 144 { 145 struct vdpa_device *vdpa = dev_to_vdpa(dev); 146 147 return vdpa_to_vduse(vdpa); 148 } 149 150 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 151 uint32_t request_id) 152 { 153 struct vduse_dev_msg *msg; 154 155 list_for_each_entry(msg, head, list) { 156 if (msg->req.request_id == request_id) { 157 list_del(&msg->list); 158 return msg; 159 } 160 } 161 162 return NULL; 163 } 164 165 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 166 { 167 struct vduse_dev_msg *msg = NULL; 168 169 if (!list_empty(head)) { 170 msg = list_first_entry(head, struct vduse_dev_msg, list); 171 list_del(&msg->list); 172 } 173 174 return msg; 175 } 176 177 static void vduse_enqueue_msg(struct list_head *head, 178 struct vduse_dev_msg *msg) 179 { 180 list_add_tail(&msg->list, head); 181 } 182 183 static void vduse_dev_broken(struct vduse_dev *dev) 184 { 185 struct vduse_dev_msg *msg, *tmp; 186 187 if (unlikely(dev->broken)) 188 return; 189 190 list_splice_init(&dev->recv_list, &dev->send_list); 191 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 192 list_del(&msg->list); 193 msg->completed = 1; 194 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 195 wake_up(&msg->waitq); 196 } 197 dev->broken = true; 198 wake_up(&dev->waitq); 199 } 200 201 static int vduse_dev_msg_sync(struct vduse_dev *dev, 202 struct vduse_dev_msg *msg) 203 { 204 int ret; 205 206 if (unlikely(dev->broken)) 207 return -EIO; 208 209 init_waitqueue_head(&msg->waitq); 210 spin_lock(&dev->msg_lock); 211 if (unlikely(dev->broken)) { 212 spin_unlock(&dev->msg_lock); 213 return -EIO; 214 } 215 msg->req.request_id = dev->msg_unique++; 216 vduse_enqueue_msg(&dev->send_list, msg); 217 wake_up(&dev->waitq); 218 spin_unlock(&dev->msg_lock); 219 if (dev->msg_timeout) 220 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 221 (long)dev->msg_timeout * HZ); 222 else 223 ret = wait_event_killable(msg->waitq, msg->completed); 224 225 spin_lock(&dev->msg_lock); 226 if (!msg->completed) { 227 list_del(&msg->list); 228 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 229 /* Mark the device as malfunction when there is a timeout */ 230 if (!ret) 231 vduse_dev_broken(dev); 232 } 233 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 234 spin_unlock(&dev->msg_lock); 235 236 return ret; 237 } 238 239 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 240 struct vduse_virtqueue *vq, 241 struct vdpa_vq_state_packed *packed) 242 { 243 struct vduse_dev_msg msg = { 0 }; 244 int ret; 245 246 msg.req.type = VDUSE_GET_VQ_STATE; 247 msg.req.vq_state.index = vq->index; 248 249 ret = vduse_dev_msg_sync(dev, &msg); 250 if (ret) 251 return ret; 252 253 packed->last_avail_counter = 254 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 255 packed->last_avail_idx = 256 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 257 packed->last_used_counter = 258 msg.resp.vq_state.packed.last_used_counter & 0x0001; 259 packed->last_used_idx = 260 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 261 262 return 0; 263 } 264 265 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 266 struct vduse_virtqueue *vq, 267 struct vdpa_vq_state_split *split) 268 { 269 struct vduse_dev_msg msg = { 0 }; 270 int ret; 271 272 msg.req.type = VDUSE_GET_VQ_STATE; 273 msg.req.vq_state.index = vq->index; 274 275 ret = vduse_dev_msg_sync(dev, &msg); 276 if (ret) 277 return ret; 278 279 split->avail_index = msg.resp.vq_state.split.avail_index; 280 281 return 0; 282 } 283 284 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 285 { 286 struct vduse_dev_msg msg = { 0 }; 287 288 msg.req.type = VDUSE_SET_STATUS; 289 msg.req.s.status = status; 290 291 return vduse_dev_msg_sync(dev, &msg); 292 } 293 294 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 295 u64 start, u64 last) 296 { 297 struct vduse_dev_msg msg = { 0 }; 298 299 if (last < start) 300 return -EINVAL; 301 302 msg.req.type = VDUSE_UPDATE_IOTLB; 303 msg.req.iova.start = start; 304 msg.req.iova.last = last; 305 306 return vduse_dev_msg_sync(dev, &msg); 307 } 308 309 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 310 { 311 struct file *file = iocb->ki_filp; 312 struct vduse_dev *dev = file->private_data; 313 struct vduse_dev_msg *msg; 314 int size = sizeof(struct vduse_dev_request); 315 ssize_t ret; 316 317 if (iov_iter_count(to) < size) 318 return -EINVAL; 319 320 spin_lock(&dev->msg_lock); 321 while (1) { 322 msg = vduse_dequeue_msg(&dev->send_list); 323 if (msg) 324 break; 325 326 ret = -EAGAIN; 327 if (file->f_flags & O_NONBLOCK) 328 goto unlock; 329 330 spin_unlock(&dev->msg_lock); 331 ret = wait_event_interruptible_exclusive(dev->waitq, 332 !list_empty(&dev->send_list)); 333 if (ret) 334 return ret; 335 336 spin_lock(&dev->msg_lock); 337 } 338 spin_unlock(&dev->msg_lock); 339 ret = copy_to_iter(&msg->req, size, to); 340 spin_lock(&dev->msg_lock); 341 if (ret != size) { 342 ret = -EFAULT; 343 vduse_enqueue_msg(&dev->send_list, msg); 344 goto unlock; 345 } 346 vduse_enqueue_msg(&dev->recv_list, msg); 347 unlock: 348 spin_unlock(&dev->msg_lock); 349 350 return ret; 351 } 352 353 static bool is_mem_zero(const char *ptr, int size) 354 { 355 int i; 356 357 for (i = 0; i < size; i++) { 358 if (ptr[i]) 359 return false; 360 } 361 return true; 362 } 363 364 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 365 { 366 struct file *file = iocb->ki_filp; 367 struct vduse_dev *dev = file->private_data; 368 struct vduse_dev_response resp; 369 struct vduse_dev_msg *msg; 370 size_t ret; 371 372 ret = copy_from_iter(&resp, sizeof(resp), from); 373 if (ret != sizeof(resp)) 374 return -EINVAL; 375 376 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 377 return -EINVAL; 378 379 spin_lock(&dev->msg_lock); 380 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 381 if (!msg) { 382 ret = -ENOENT; 383 goto unlock; 384 } 385 386 memcpy(&msg->resp, &resp, sizeof(resp)); 387 msg->completed = 1; 388 wake_up(&msg->waitq); 389 unlock: 390 spin_unlock(&dev->msg_lock); 391 392 return ret; 393 } 394 395 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 396 { 397 struct vduse_dev *dev = file->private_data; 398 __poll_t mask = 0; 399 400 poll_wait(file, &dev->waitq, wait); 401 402 spin_lock(&dev->msg_lock); 403 404 if (unlikely(dev->broken)) 405 mask |= EPOLLERR; 406 if (!list_empty(&dev->send_list)) 407 mask |= EPOLLIN | EPOLLRDNORM; 408 if (!list_empty(&dev->recv_list)) 409 mask |= EPOLLOUT | EPOLLWRNORM; 410 411 spin_unlock(&dev->msg_lock); 412 413 return mask; 414 } 415 416 static void vduse_dev_reset(struct vduse_dev *dev) 417 { 418 int i; 419 struct vduse_iova_domain *domain = dev->domain; 420 421 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 422 if (domain->bounce_map) 423 vduse_domain_reset_bounce_map(domain); 424 425 down_write(&dev->rwsem); 426 427 dev->status = 0; 428 dev->driver_features = 0; 429 dev->generation++; 430 spin_lock(&dev->irq_lock); 431 dev->config_cb.callback = NULL; 432 dev->config_cb.private = NULL; 433 spin_unlock(&dev->irq_lock); 434 flush_work(&dev->inject); 435 436 for (i = 0; i < dev->vq_num; i++) { 437 struct vduse_virtqueue *vq = &dev->vqs[i]; 438 439 vq->ready = false; 440 vq->desc_addr = 0; 441 vq->driver_addr = 0; 442 vq->device_addr = 0; 443 vq->num = 0; 444 memset(&vq->state, 0, sizeof(vq->state)); 445 446 spin_lock(&vq->kick_lock); 447 vq->kicked = false; 448 if (vq->kickfd) 449 eventfd_ctx_put(vq->kickfd); 450 vq->kickfd = NULL; 451 spin_unlock(&vq->kick_lock); 452 453 spin_lock(&vq->irq_lock); 454 vq->cb.callback = NULL; 455 vq->cb.private = NULL; 456 spin_unlock(&vq->irq_lock); 457 flush_work(&vq->inject); 458 flush_work(&vq->kick); 459 } 460 461 up_write(&dev->rwsem); 462 } 463 464 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 465 u64 desc_area, u64 driver_area, 466 u64 device_area) 467 { 468 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 469 struct vduse_virtqueue *vq = &dev->vqs[idx]; 470 471 vq->desc_addr = desc_area; 472 vq->driver_addr = driver_area; 473 vq->device_addr = device_area; 474 475 return 0; 476 } 477 478 static void vduse_vq_kick(struct vduse_virtqueue *vq) 479 { 480 spin_lock(&vq->kick_lock); 481 if (!vq->ready) 482 goto unlock; 483 484 if (vq->kickfd) 485 eventfd_signal(vq->kickfd, 1); 486 else 487 vq->kicked = true; 488 unlock: 489 spin_unlock(&vq->kick_lock); 490 } 491 492 static void vduse_vq_kick_work(struct work_struct *work) 493 { 494 struct vduse_virtqueue *vq = container_of(work, 495 struct vduse_virtqueue, kick); 496 497 vduse_vq_kick(vq); 498 } 499 500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 501 { 502 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 503 struct vduse_virtqueue *vq = &dev->vqs[idx]; 504 505 if (!eventfd_signal_allowed()) { 506 schedule_work(&vq->kick); 507 return; 508 } 509 vduse_vq_kick(vq); 510 } 511 512 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 513 struct vdpa_callback *cb) 514 { 515 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 516 struct vduse_virtqueue *vq = &dev->vqs[idx]; 517 518 spin_lock(&vq->irq_lock); 519 vq->cb.callback = cb->callback; 520 vq->cb.private = cb->private; 521 spin_unlock(&vq->irq_lock); 522 } 523 524 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 525 { 526 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 527 struct vduse_virtqueue *vq = &dev->vqs[idx]; 528 529 vq->num = num; 530 } 531 532 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 533 u16 idx, bool ready) 534 { 535 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 536 struct vduse_virtqueue *vq = &dev->vqs[idx]; 537 538 vq->ready = ready; 539 } 540 541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 542 { 543 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 544 struct vduse_virtqueue *vq = &dev->vqs[idx]; 545 546 return vq->ready; 547 } 548 549 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 550 const struct vdpa_vq_state *state) 551 { 552 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 553 struct vduse_virtqueue *vq = &dev->vqs[idx]; 554 555 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 556 vq->state.packed.last_avail_counter = 557 state->packed.last_avail_counter; 558 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 559 vq->state.packed.last_used_counter = 560 state->packed.last_used_counter; 561 vq->state.packed.last_used_idx = state->packed.last_used_idx; 562 } else 563 vq->state.split.avail_index = state->split.avail_index; 564 565 return 0; 566 } 567 568 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 569 struct vdpa_vq_state *state) 570 { 571 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 572 struct vduse_virtqueue *vq = &dev->vqs[idx]; 573 574 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 575 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 576 577 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 578 } 579 580 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 581 { 582 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 583 584 return dev->vq_align; 585 } 586 587 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 588 { 589 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 590 591 return dev->device_features; 592 } 593 594 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 595 { 596 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 597 598 dev->driver_features = features; 599 return 0; 600 } 601 602 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 603 { 604 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 605 606 return dev->driver_features; 607 } 608 609 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 610 struct vdpa_callback *cb) 611 { 612 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 613 614 spin_lock(&dev->irq_lock); 615 dev->config_cb.callback = cb->callback; 616 dev->config_cb.private = cb->private; 617 spin_unlock(&dev->irq_lock); 618 } 619 620 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 621 { 622 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 623 u16 num_max = 0; 624 int i; 625 626 for (i = 0; i < dev->vq_num; i++) 627 if (num_max < dev->vqs[i].num_max) 628 num_max = dev->vqs[i].num_max; 629 630 return num_max; 631 } 632 633 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 634 { 635 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 636 637 return dev->device_id; 638 } 639 640 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 641 { 642 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 643 644 return dev->vendor_id; 645 } 646 647 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 648 { 649 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 650 651 return dev->status; 652 } 653 654 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 655 { 656 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 657 658 if (vduse_dev_set_status(dev, status)) 659 return; 660 661 dev->status = status; 662 } 663 664 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 665 { 666 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 667 668 return dev->config_size; 669 } 670 671 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 672 void *buf, unsigned int len) 673 { 674 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 675 676 /* Initialize the buffer in case of partial copy. */ 677 memset(buf, 0, len); 678 679 if (offset > dev->config_size) 680 return; 681 682 if (len > dev->config_size - offset) 683 len = dev->config_size - offset; 684 685 memcpy(buf, dev->config + offset, len); 686 } 687 688 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 689 const void *buf, unsigned int len) 690 { 691 /* Now we only support read-only configuration space */ 692 } 693 694 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 695 { 696 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 697 int ret = vduse_dev_set_status(dev, 0); 698 699 vduse_dev_reset(dev); 700 701 return ret; 702 } 703 704 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 705 { 706 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 707 708 return dev->generation; 709 } 710 711 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 712 unsigned int asid, 713 struct vhost_iotlb *iotlb) 714 { 715 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 716 int ret; 717 718 ret = vduse_domain_set_map(dev->domain, iotlb); 719 if (ret) 720 return ret; 721 722 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 723 if (ret) { 724 vduse_domain_clear_map(dev->domain, iotlb); 725 return ret; 726 } 727 728 return 0; 729 } 730 731 static void vduse_vdpa_free(struct vdpa_device *vdpa) 732 { 733 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 734 735 dev->vdev = NULL; 736 } 737 738 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 739 .set_vq_address = vduse_vdpa_set_vq_address, 740 .kick_vq = vduse_vdpa_kick_vq, 741 .set_vq_cb = vduse_vdpa_set_vq_cb, 742 .set_vq_num = vduse_vdpa_set_vq_num, 743 .set_vq_ready = vduse_vdpa_set_vq_ready, 744 .get_vq_ready = vduse_vdpa_get_vq_ready, 745 .set_vq_state = vduse_vdpa_set_vq_state, 746 .get_vq_state = vduse_vdpa_get_vq_state, 747 .get_vq_align = vduse_vdpa_get_vq_align, 748 .get_device_features = vduse_vdpa_get_device_features, 749 .set_driver_features = vduse_vdpa_set_driver_features, 750 .get_driver_features = vduse_vdpa_get_driver_features, 751 .set_config_cb = vduse_vdpa_set_config_cb, 752 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 753 .get_device_id = vduse_vdpa_get_device_id, 754 .get_vendor_id = vduse_vdpa_get_vendor_id, 755 .get_status = vduse_vdpa_get_status, 756 .set_status = vduse_vdpa_set_status, 757 .get_config_size = vduse_vdpa_get_config_size, 758 .get_config = vduse_vdpa_get_config, 759 .set_config = vduse_vdpa_set_config, 760 .get_generation = vduse_vdpa_get_generation, 761 .reset = vduse_vdpa_reset, 762 .set_map = vduse_vdpa_set_map, 763 .free = vduse_vdpa_free, 764 }; 765 766 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 767 unsigned long offset, size_t size, 768 enum dma_data_direction dir, 769 unsigned long attrs) 770 { 771 struct vduse_dev *vdev = dev_to_vduse(dev); 772 struct vduse_iova_domain *domain = vdev->domain; 773 774 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 775 } 776 777 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 778 size_t size, enum dma_data_direction dir, 779 unsigned long attrs) 780 { 781 struct vduse_dev *vdev = dev_to_vduse(dev); 782 struct vduse_iova_domain *domain = vdev->domain; 783 784 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 785 } 786 787 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 788 dma_addr_t *dma_addr, gfp_t flag, 789 unsigned long attrs) 790 { 791 struct vduse_dev *vdev = dev_to_vduse(dev); 792 struct vduse_iova_domain *domain = vdev->domain; 793 unsigned long iova; 794 void *addr; 795 796 *dma_addr = DMA_MAPPING_ERROR; 797 addr = vduse_domain_alloc_coherent(domain, size, 798 (dma_addr_t *)&iova, flag, attrs); 799 if (!addr) 800 return NULL; 801 802 *dma_addr = (dma_addr_t)iova; 803 804 return addr; 805 } 806 807 static void vduse_dev_free_coherent(struct device *dev, size_t size, 808 void *vaddr, dma_addr_t dma_addr, 809 unsigned long attrs) 810 { 811 struct vduse_dev *vdev = dev_to_vduse(dev); 812 struct vduse_iova_domain *domain = vdev->domain; 813 814 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 815 } 816 817 static size_t vduse_dev_max_mapping_size(struct device *dev) 818 { 819 struct vduse_dev *vdev = dev_to_vduse(dev); 820 struct vduse_iova_domain *domain = vdev->domain; 821 822 return domain->bounce_size; 823 } 824 825 static const struct dma_map_ops vduse_dev_dma_ops = { 826 .map_page = vduse_dev_map_page, 827 .unmap_page = vduse_dev_unmap_page, 828 .alloc = vduse_dev_alloc_coherent, 829 .free = vduse_dev_free_coherent, 830 .max_mapping_size = vduse_dev_max_mapping_size, 831 }; 832 833 static unsigned int perm_to_file_flags(u8 perm) 834 { 835 unsigned int flags = 0; 836 837 switch (perm) { 838 case VDUSE_ACCESS_WO: 839 flags |= O_WRONLY; 840 break; 841 case VDUSE_ACCESS_RO: 842 flags |= O_RDONLY; 843 break; 844 case VDUSE_ACCESS_RW: 845 flags |= O_RDWR; 846 break; 847 default: 848 WARN(1, "invalidate vhost IOTLB permission\n"); 849 break; 850 } 851 852 return flags; 853 } 854 855 static int vduse_kickfd_setup(struct vduse_dev *dev, 856 struct vduse_vq_eventfd *eventfd) 857 { 858 struct eventfd_ctx *ctx = NULL; 859 struct vduse_virtqueue *vq; 860 u32 index; 861 862 if (eventfd->index >= dev->vq_num) 863 return -EINVAL; 864 865 index = array_index_nospec(eventfd->index, dev->vq_num); 866 vq = &dev->vqs[index]; 867 if (eventfd->fd >= 0) { 868 ctx = eventfd_ctx_fdget(eventfd->fd); 869 if (IS_ERR(ctx)) 870 return PTR_ERR(ctx); 871 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 872 return 0; 873 874 spin_lock(&vq->kick_lock); 875 if (vq->kickfd) 876 eventfd_ctx_put(vq->kickfd); 877 vq->kickfd = ctx; 878 if (vq->ready && vq->kicked && vq->kickfd) { 879 eventfd_signal(vq->kickfd, 1); 880 vq->kicked = false; 881 } 882 spin_unlock(&vq->kick_lock); 883 884 return 0; 885 } 886 887 static bool vduse_dev_is_ready(struct vduse_dev *dev) 888 { 889 int i; 890 891 for (i = 0; i < dev->vq_num; i++) 892 if (!dev->vqs[i].num_max) 893 return false; 894 895 return true; 896 } 897 898 static void vduse_dev_irq_inject(struct work_struct *work) 899 { 900 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 901 902 spin_lock_irq(&dev->irq_lock); 903 if (dev->config_cb.callback) 904 dev->config_cb.callback(dev->config_cb.private); 905 spin_unlock_irq(&dev->irq_lock); 906 } 907 908 static void vduse_vq_irq_inject(struct work_struct *work) 909 { 910 struct vduse_virtqueue *vq = container_of(work, 911 struct vduse_virtqueue, inject); 912 913 spin_lock_irq(&vq->irq_lock); 914 if (vq->ready && vq->cb.callback) 915 vq->cb.callback(vq->cb.private); 916 spin_unlock_irq(&vq->irq_lock); 917 } 918 919 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 920 struct work_struct *irq_work) 921 { 922 int ret = -EINVAL; 923 924 down_read(&dev->rwsem); 925 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 926 goto unlock; 927 928 ret = 0; 929 queue_work(vduse_irq_wq, irq_work); 930 unlock: 931 up_read(&dev->rwsem); 932 933 return ret; 934 } 935 936 static int vduse_dev_dereg_umem(struct vduse_dev *dev, 937 u64 iova, u64 size) 938 { 939 int ret; 940 941 mutex_lock(&dev->mem_lock); 942 ret = -ENOENT; 943 if (!dev->umem) 944 goto unlock; 945 946 ret = -EINVAL; 947 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 948 goto unlock; 949 950 vduse_domain_remove_user_bounce_pages(dev->domain); 951 unpin_user_pages_dirty_lock(dev->umem->pages, 952 dev->umem->npages, true); 953 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 954 mmdrop(dev->umem->mm); 955 vfree(dev->umem->pages); 956 kfree(dev->umem); 957 dev->umem = NULL; 958 ret = 0; 959 unlock: 960 mutex_unlock(&dev->mem_lock); 961 return ret; 962 } 963 964 static int vduse_dev_reg_umem(struct vduse_dev *dev, 965 u64 iova, u64 uaddr, u64 size) 966 { 967 struct page **page_list = NULL; 968 struct vduse_umem *umem = NULL; 969 long pinned = 0; 970 unsigned long npages, lock_limit; 971 int ret; 972 973 if (!dev->domain->bounce_map || 974 size != dev->domain->bounce_size || 975 iova != 0 || uaddr & ~PAGE_MASK) 976 return -EINVAL; 977 978 mutex_lock(&dev->mem_lock); 979 ret = -EEXIST; 980 if (dev->umem) 981 goto unlock; 982 983 ret = -ENOMEM; 984 npages = size >> PAGE_SHIFT; 985 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 986 GFP_KERNEL_ACCOUNT); 987 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 988 if (!page_list || !umem) 989 goto unlock; 990 991 mmap_read_lock(current->mm); 992 993 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 994 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 995 goto out; 996 997 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 998 page_list, NULL); 999 if (pinned != npages) { 1000 ret = pinned < 0 ? pinned : -ENOMEM; 1001 goto out; 1002 } 1003 1004 ret = vduse_domain_add_user_bounce_pages(dev->domain, 1005 page_list, pinned); 1006 if (ret) 1007 goto out; 1008 1009 atomic64_add(npages, ¤t->mm->pinned_vm); 1010 1011 umem->pages = page_list; 1012 umem->npages = pinned; 1013 umem->iova = iova; 1014 umem->mm = current->mm; 1015 mmgrab(current->mm); 1016 1017 dev->umem = umem; 1018 out: 1019 if (ret && pinned > 0) 1020 unpin_user_pages(page_list, pinned); 1021 1022 mmap_read_unlock(current->mm); 1023 unlock: 1024 if (ret) { 1025 vfree(page_list); 1026 kfree(umem); 1027 } 1028 mutex_unlock(&dev->mem_lock); 1029 return ret; 1030 } 1031 1032 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1033 unsigned long arg) 1034 { 1035 struct vduse_dev *dev = file->private_data; 1036 void __user *argp = (void __user *)arg; 1037 int ret; 1038 1039 if (unlikely(dev->broken)) 1040 return -EPERM; 1041 1042 switch (cmd) { 1043 case VDUSE_IOTLB_GET_FD: { 1044 struct vduse_iotlb_entry entry; 1045 struct vhost_iotlb_map *map; 1046 struct vdpa_map_file *map_file; 1047 struct vduse_iova_domain *domain = dev->domain; 1048 struct file *f = NULL; 1049 1050 ret = -EFAULT; 1051 if (copy_from_user(&entry, argp, sizeof(entry))) 1052 break; 1053 1054 ret = -EINVAL; 1055 if (entry.start > entry.last) 1056 break; 1057 1058 spin_lock(&domain->iotlb_lock); 1059 map = vhost_iotlb_itree_first(domain->iotlb, 1060 entry.start, entry.last); 1061 if (map) { 1062 map_file = (struct vdpa_map_file *)map->opaque; 1063 f = get_file(map_file->file); 1064 entry.offset = map_file->offset; 1065 entry.start = map->start; 1066 entry.last = map->last; 1067 entry.perm = map->perm; 1068 } 1069 spin_unlock(&domain->iotlb_lock); 1070 ret = -EINVAL; 1071 if (!f) 1072 break; 1073 1074 ret = -EFAULT; 1075 if (copy_to_user(argp, &entry, sizeof(entry))) { 1076 fput(f); 1077 break; 1078 } 1079 ret = receive_fd(f, perm_to_file_flags(entry.perm)); 1080 fput(f); 1081 break; 1082 } 1083 case VDUSE_DEV_GET_FEATURES: 1084 /* 1085 * Just mirror what driver wrote here. 1086 * The driver is expected to check FEATURE_OK later. 1087 */ 1088 ret = put_user(dev->driver_features, (u64 __user *)argp); 1089 break; 1090 case VDUSE_DEV_SET_CONFIG: { 1091 struct vduse_config_data config; 1092 unsigned long size = offsetof(struct vduse_config_data, 1093 buffer); 1094 1095 ret = -EFAULT; 1096 if (copy_from_user(&config, argp, size)) 1097 break; 1098 1099 ret = -EINVAL; 1100 if (config.offset > dev->config_size || 1101 config.length == 0 || 1102 config.length > dev->config_size - config.offset) 1103 break; 1104 1105 ret = -EFAULT; 1106 if (copy_from_user(dev->config + config.offset, argp + size, 1107 config.length)) 1108 break; 1109 1110 ret = 0; 1111 break; 1112 } 1113 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1114 ret = vduse_dev_queue_irq_work(dev, &dev->inject); 1115 break; 1116 case VDUSE_VQ_SETUP: { 1117 struct vduse_vq_config config; 1118 u32 index; 1119 1120 ret = -EFAULT; 1121 if (copy_from_user(&config, argp, sizeof(config))) 1122 break; 1123 1124 ret = -EINVAL; 1125 if (config.index >= dev->vq_num) 1126 break; 1127 1128 if (!is_mem_zero((const char *)config.reserved, 1129 sizeof(config.reserved))) 1130 break; 1131 1132 index = array_index_nospec(config.index, dev->vq_num); 1133 dev->vqs[index].num_max = config.max_size; 1134 ret = 0; 1135 break; 1136 } 1137 case VDUSE_VQ_GET_INFO: { 1138 struct vduse_vq_info vq_info; 1139 struct vduse_virtqueue *vq; 1140 u32 index; 1141 1142 ret = -EFAULT; 1143 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1144 break; 1145 1146 ret = -EINVAL; 1147 if (vq_info.index >= dev->vq_num) 1148 break; 1149 1150 index = array_index_nospec(vq_info.index, dev->vq_num); 1151 vq = &dev->vqs[index]; 1152 vq_info.desc_addr = vq->desc_addr; 1153 vq_info.driver_addr = vq->driver_addr; 1154 vq_info.device_addr = vq->device_addr; 1155 vq_info.num = vq->num; 1156 1157 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1158 vq_info.packed.last_avail_counter = 1159 vq->state.packed.last_avail_counter; 1160 vq_info.packed.last_avail_idx = 1161 vq->state.packed.last_avail_idx; 1162 vq_info.packed.last_used_counter = 1163 vq->state.packed.last_used_counter; 1164 vq_info.packed.last_used_idx = 1165 vq->state.packed.last_used_idx; 1166 } else 1167 vq_info.split.avail_index = 1168 vq->state.split.avail_index; 1169 1170 vq_info.ready = vq->ready; 1171 1172 ret = -EFAULT; 1173 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1174 break; 1175 1176 ret = 0; 1177 break; 1178 } 1179 case VDUSE_VQ_SETUP_KICKFD: { 1180 struct vduse_vq_eventfd eventfd; 1181 1182 ret = -EFAULT; 1183 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1184 break; 1185 1186 ret = vduse_kickfd_setup(dev, &eventfd); 1187 break; 1188 } 1189 case VDUSE_VQ_INJECT_IRQ: { 1190 u32 index; 1191 1192 ret = -EFAULT; 1193 if (get_user(index, (u32 __user *)argp)) 1194 break; 1195 1196 ret = -EINVAL; 1197 if (index >= dev->vq_num) 1198 break; 1199 1200 index = array_index_nospec(index, dev->vq_num); 1201 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); 1202 break; 1203 } 1204 case VDUSE_IOTLB_REG_UMEM: { 1205 struct vduse_iova_umem umem; 1206 1207 ret = -EFAULT; 1208 if (copy_from_user(&umem, argp, sizeof(umem))) 1209 break; 1210 1211 ret = -EINVAL; 1212 if (!is_mem_zero((const char *)umem.reserved, 1213 sizeof(umem.reserved))) 1214 break; 1215 1216 ret = vduse_dev_reg_umem(dev, umem.iova, 1217 umem.uaddr, umem.size); 1218 break; 1219 } 1220 case VDUSE_IOTLB_DEREG_UMEM: { 1221 struct vduse_iova_umem umem; 1222 1223 ret = -EFAULT; 1224 if (copy_from_user(&umem, argp, sizeof(umem))) 1225 break; 1226 1227 ret = -EINVAL; 1228 if (!is_mem_zero((const char *)umem.reserved, 1229 sizeof(umem.reserved))) 1230 break; 1231 1232 ret = vduse_dev_dereg_umem(dev, umem.iova, 1233 umem.size); 1234 break; 1235 } 1236 case VDUSE_IOTLB_GET_INFO: { 1237 struct vduse_iova_info info; 1238 struct vhost_iotlb_map *map; 1239 struct vduse_iova_domain *domain = dev->domain; 1240 1241 ret = -EFAULT; 1242 if (copy_from_user(&info, argp, sizeof(info))) 1243 break; 1244 1245 ret = -EINVAL; 1246 if (info.start > info.last) 1247 break; 1248 1249 if (!is_mem_zero((const char *)info.reserved, 1250 sizeof(info.reserved))) 1251 break; 1252 1253 spin_lock(&domain->iotlb_lock); 1254 map = vhost_iotlb_itree_first(domain->iotlb, 1255 info.start, info.last); 1256 if (map) { 1257 info.start = map->start; 1258 info.last = map->last; 1259 info.capability = 0; 1260 if (domain->bounce_map && map->start == 0 && 1261 map->last == domain->bounce_size - 1) 1262 info.capability |= VDUSE_IOVA_CAP_UMEM; 1263 } 1264 spin_unlock(&domain->iotlb_lock); 1265 if (!map) 1266 break; 1267 1268 ret = -EFAULT; 1269 if (copy_to_user(argp, &info, sizeof(info))) 1270 break; 1271 1272 ret = 0; 1273 break; 1274 } 1275 default: 1276 ret = -ENOIOCTLCMD; 1277 break; 1278 } 1279 1280 return ret; 1281 } 1282 1283 static int vduse_dev_release(struct inode *inode, struct file *file) 1284 { 1285 struct vduse_dev *dev = file->private_data; 1286 1287 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1288 spin_lock(&dev->msg_lock); 1289 /* Make sure the inflight messages can processed after reconncection */ 1290 list_splice_init(&dev->recv_list, &dev->send_list); 1291 spin_unlock(&dev->msg_lock); 1292 dev->connected = false; 1293 1294 return 0; 1295 } 1296 1297 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1298 { 1299 struct vduse_dev *dev; 1300 1301 mutex_lock(&vduse_lock); 1302 dev = idr_find(&vduse_idr, minor); 1303 mutex_unlock(&vduse_lock); 1304 1305 return dev; 1306 } 1307 1308 static int vduse_dev_open(struct inode *inode, struct file *file) 1309 { 1310 int ret; 1311 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1312 1313 if (!dev) 1314 return -ENODEV; 1315 1316 ret = -EBUSY; 1317 mutex_lock(&dev->lock); 1318 if (dev->connected) 1319 goto unlock; 1320 1321 ret = 0; 1322 dev->connected = true; 1323 file->private_data = dev; 1324 unlock: 1325 mutex_unlock(&dev->lock); 1326 1327 return ret; 1328 } 1329 1330 static const struct file_operations vduse_dev_fops = { 1331 .owner = THIS_MODULE, 1332 .open = vduse_dev_open, 1333 .release = vduse_dev_release, 1334 .read_iter = vduse_dev_read_iter, 1335 .write_iter = vduse_dev_write_iter, 1336 .poll = vduse_dev_poll, 1337 .unlocked_ioctl = vduse_dev_ioctl, 1338 .compat_ioctl = compat_ptr_ioctl, 1339 .llseek = noop_llseek, 1340 }; 1341 1342 static struct vduse_dev *vduse_dev_create(void) 1343 { 1344 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1345 1346 if (!dev) 1347 return NULL; 1348 1349 mutex_init(&dev->lock); 1350 mutex_init(&dev->mem_lock); 1351 spin_lock_init(&dev->msg_lock); 1352 INIT_LIST_HEAD(&dev->send_list); 1353 INIT_LIST_HEAD(&dev->recv_list); 1354 spin_lock_init(&dev->irq_lock); 1355 init_rwsem(&dev->rwsem); 1356 1357 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1358 init_waitqueue_head(&dev->waitq); 1359 1360 return dev; 1361 } 1362 1363 static void vduse_dev_destroy(struct vduse_dev *dev) 1364 { 1365 kfree(dev); 1366 } 1367 1368 static struct vduse_dev *vduse_find_dev(const char *name) 1369 { 1370 struct vduse_dev *dev; 1371 int id; 1372 1373 idr_for_each_entry(&vduse_idr, dev, id) 1374 if (!strcmp(dev->name, name)) 1375 return dev; 1376 1377 return NULL; 1378 } 1379 1380 static int vduse_destroy_dev(char *name) 1381 { 1382 struct vduse_dev *dev = vduse_find_dev(name); 1383 1384 if (!dev) 1385 return -EINVAL; 1386 1387 mutex_lock(&dev->lock); 1388 if (dev->vdev || dev->connected) { 1389 mutex_unlock(&dev->lock); 1390 return -EBUSY; 1391 } 1392 dev->connected = true; 1393 mutex_unlock(&dev->lock); 1394 1395 vduse_dev_reset(dev); 1396 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1397 idr_remove(&vduse_idr, dev->minor); 1398 kvfree(dev->config); 1399 kfree(dev->vqs); 1400 vduse_domain_destroy(dev->domain); 1401 kfree(dev->name); 1402 vduse_dev_destroy(dev); 1403 module_put(THIS_MODULE); 1404 1405 return 0; 1406 } 1407 1408 static bool device_is_allowed(u32 device_id) 1409 { 1410 int i; 1411 1412 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1413 if (allowed_device_id[i] == device_id) 1414 return true; 1415 1416 return false; 1417 } 1418 1419 static bool features_is_valid(u64 features) 1420 { 1421 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) 1422 return false; 1423 1424 /* Now we only support read-only configuration space */ 1425 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) 1426 return false; 1427 1428 return true; 1429 } 1430 1431 static bool vduse_validate_config(struct vduse_dev_config *config) 1432 { 1433 if (!is_mem_zero((const char *)config->reserved, 1434 sizeof(config->reserved))) 1435 return false; 1436 1437 if (config->vq_align > PAGE_SIZE) 1438 return false; 1439 1440 if (config->config_size > PAGE_SIZE) 1441 return false; 1442 1443 if (config->vq_num > 0xffff) 1444 return false; 1445 1446 if (!device_is_allowed(config->device_id)) 1447 return false; 1448 1449 if (!features_is_valid(config->features)) 1450 return false; 1451 1452 return true; 1453 } 1454 1455 static ssize_t msg_timeout_show(struct device *device, 1456 struct device_attribute *attr, char *buf) 1457 { 1458 struct vduse_dev *dev = dev_get_drvdata(device); 1459 1460 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1461 } 1462 1463 static ssize_t msg_timeout_store(struct device *device, 1464 struct device_attribute *attr, 1465 const char *buf, size_t count) 1466 { 1467 struct vduse_dev *dev = dev_get_drvdata(device); 1468 int ret; 1469 1470 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1471 if (ret < 0) 1472 return ret; 1473 1474 return count; 1475 } 1476 1477 static DEVICE_ATTR_RW(msg_timeout); 1478 1479 static struct attribute *vduse_dev_attrs[] = { 1480 &dev_attr_msg_timeout.attr, 1481 NULL 1482 }; 1483 1484 ATTRIBUTE_GROUPS(vduse_dev); 1485 1486 static int vduse_create_dev(struct vduse_dev_config *config, 1487 void *config_buf, u64 api_version) 1488 { 1489 int i, ret; 1490 struct vduse_dev *dev; 1491 1492 ret = -EEXIST; 1493 if (vduse_find_dev(config->name)) 1494 goto err; 1495 1496 ret = -ENOMEM; 1497 dev = vduse_dev_create(); 1498 if (!dev) 1499 goto err; 1500 1501 dev->api_version = api_version; 1502 dev->device_features = config->features; 1503 dev->device_id = config->device_id; 1504 dev->vendor_id = config->vendor_id; 1505 dev->name = kstrdup(config->name, GFP_KERNEL); 1506 if (!dev->name) 1507 goto err_str; 1508 1509 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1510 VDUSE_BOUNCE_SIZE); 1511 if (!dev->domain) 1512 goto err_domain; 1513 1514 dev->config = config_buf; 1515 dev->config_size = config->config_size; 1516 dev->vq_align = config->vq_align; 1517 dev->vq_num = config->vq_num; 1518 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1519 if (!dev->vqs) 1520 goto err_vqs; 1521 1522 for (i = 0; i < dev->vq_num; i++) { 1523 dev->vqs[i].index = i; 1524 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); 1525 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); 1526 spin_lock_init(&dev->vqs[i].kick_lock); 1527 spin_lock_init(&dev->vqs[i].irq_lock); 1528 } 1529 1530 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1531 if (ret < 0) 1532 goto err_idr; 1533 1534 dev->minor = ret; 1535 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1536 dev->dev = device_create_with_groups(vduse_class, NULL, 1537 MKDEV(MAJOR(vduse_major), dev->minor), 1538 dev, vduse_dev_groups, "%s", config->name); 1539 if (IS_ERR(dev->dev)) { 1540 ret = PTR_ERR(dev->dev); 1541 goto err_dev; 1542 } 1543 __module_get(THIS_MODULE); 1544 1545 return 0; 1546 err_dev: 1547 idr_remove(&vduse_idr, dev->minor); 1548 err_idr: 1549 kfree(dev->vqs); 1550 err_vqs: 1551 vduse_domain_destroy(dev->domain); 1552 err_domain: 1553 kfree(dev->name); 1554 err_str: 1555 vduse_dev_destroy(dev); 1556 err: 1557 return ret; 1558 } 1559 1560 static long vduse_ioctl(struct file *file, unsigned int cmd, 1561 unsigned long arg) 1562 { 1563 int ret; 1564 void __user *argp = (void __user *)arg; 1565 struct vduse_control *control = file->private_data; 1566 1567 mutex_lock(&vduse_lock); 1568 switch (cmd) { 1569 case VDUSE_GET_API_VERSION: 1570 ret = put_user(control->api_version, (u64 __user *)argp); 1571 break; 1572 case VDUSE_SET_API_VERSION: { 1573 u64 api_version; 1574 1575 ret = -EFAULT; 1576 if (get_user(api_version, (u64 __user *)argp)) 1577 break; 1578 1579 ret = -EINVAL; 1580 if (api_version > VDUSE_API_VERSION) 1581 break; 1582 1583 ret = 0; 1584 control->api_version = api_version; 1585 break; 1586 } 1587 case VDUSE_CREATE_DEV: { 1588 struct vduse_dev_config config; 1589 unsigned long size = offsetof(struct vduse_dev_config, config); 1590 void *buf; 1591 1592 ret = -EFAULT; 1593 if (copy_from_user(&config, argp, size)) 1594 break; 1595 1596 ret = -EINVAL; 1597 if (vduse_validate_config(&config) == false) 1598 break; 1599 1600 buf = vmemdup_user(argp + size, config.config_size); 1601 if (IS_ERR(buf)) { 1602 ret = PTR_ERR(buf); 1603 break; 1604 } 1605 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1606 ret = vduse_create_dev(&config, buf, control->api_version); 1607 if (ret) 1608 kvfree(buf); 1609 break; 1610 } 1611 case VDUSE_DESTROY_DEV: { 1612 char name[VDUSE_NAME_MAX]; 1613 1614 ret = -EFAULT; 1615 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1616 break; 1617 1618 name[VDUSE_NAME_MAX - 1] = '\0'; 1619 ret = vduse_destroy_dev(name); 1620 break; 1621 } 1622 default: 1623 ret = -EINVAL; 1624 break; 1625 } 1626 mutex_unlock(&vduse_lock); 1627 1628 return ret; 1629 } 1630 1631 static int vduse_release(struct inode *inode, struct file *file) 1632 { 1633 struct vduse_control *control = file->private_data; 1634 1635 kfree(control); 1636 return 0; 1637 } 1638 1639 static int vduse_open(struct inode *inode, struct file *file) 1640 { 1641 struct vduse_control *control; 1642 1643 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1644 if (!control) 1645 return -ENOMEM; 1646 1647 control->api_version = VDUSE_API_VERSION; 1648 file->private_data = control; 1649 1650 return 0; 1651 } 1652 1653 static const struct file_operations vduse_ctrl_fops = { 1654 .owner = THIS_MODULE, 1655 .open = vduse_open, 1656 .release = vduse_release, 1657 .unlocked_ioctl = vduse_ioctl, 1658 .compat_ioctl = compat_ptr_ioctl, 1659 .llseek = noop_llseek, 1660 }; 1661 1662 static char *vduse_devnode(const struct device *dev, umode_t *mode) 1663 { 1664 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1665 } 1666 1667 struct vduse_mgmt_dev { 1668 struct vdpa_mgmt_dev mgmt_dev; 1669 struct device dev; 1670 }; 1671 1672 static struct vduse_mgmt_dev *vduse_mgmt; 1673 1674 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 1675 { 1676 struct vduse_vdpa *vdev; 1677 int ret; 1678 1679 if (dev->vdev) 1680 return -EEXIST; 1681 1682 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 1683 &vduse_vdpa_config_ops, 1, 1, name, true); 1684 if (IS_ERR(vdev)) 1685 return PTR_ERR(vdev); 1686 1687 dev->vdev = vdev; 1688 vdev->dev = dev; 1689 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 1690 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 1691 if (ret) { 1692 put_device(&vdev->vdpa.dev); 1693 return ret; 1694 } 1695 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 1696 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 1697 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 1698 1699 return 0; 1700 } 1701 1702 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 1703 const struct vdpa_dev_set_config *config) 1704 { 1705 struct vduse_dev *dev; 1706 int ret; 1707 1708 mutex_lock(&vduse_lock); 1709 dev = vduse_find_dev(name); 1710 if (!dev || !vduse_dev_is_ready(dev)) { 1711 mutex_unlock(&vduse_lock); 1712 return -EINVAL; 1713 } 1714 ret = vduse_dev_init_vdpa(dev, name); 1715 mutex_unlock(&vduse_lock); 1716 if (ret) 1717 return ret; 1718 1719 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 1720 if (ret) { 1721 put_device(&dev->vdev->vdpa.dev); 1722 return ret; 1723 } 1724 1725 return 0; 1726 } 1727 1728 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 1729 { 1730 _vdpa_unregister_device(dev); 1731 } 1732 1733 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 1734 .dev_add = vdpa_dev_add, 1735 .dev_del = vdpa_dev_del, 1736 }; 1737 1738 static struct virtio_device_id id_table[] = { 1739 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1740 { 0 }, 1741 }; 1742 1743 static void vduse_mgmtdev_release(struct device *dev) 1744 { 1745 struct vduse_mgmt_dev *mgmt_dev; 1746 1747 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 1748 kfree(mgmt_dev); 1749 } 1750 1751 static int vduse_mgmtdev_init(void) 1752 { 1753 int ret; 1754 1755 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 1756 if (!vduse_mgmt) 1757 return -ENOMEM; 1758 1759 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 1760 if (ret) { 1761 kfree(vduse_mgmt); 1762 return ret; 1763 } 1764 1765 vduse_mgmt->dev.release = vduse_mgmtdev_release; 1766 1767 ret = device_register(&vduse_mgmt->dev); 1768 if (ret) 1769 goto dev_reg_err; 1770 1771 vduse_mgmt->mgmt_dev.id_table = id_table; 1772 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 1773 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 1774 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 1775 if (ret) 1776 device_unregister(&vduse_mgmt->dev); 1777 1778 return ret; 1779 1780 dev_reg_err: 1781 put_device(&vduse_mgmt->dev); 1782 return ret; 1783 } 1784 1785 static void vduse_mgmtdev_exit(void) 1786 { 1787 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 1788 device_unregister(&vduse_mgmt->dev); 1789 } 1790 1791 static int vduse_init(void) 1792 { 1793 int ret; 1794 struct device *dev; 1795 1796 vduse_class = class_create(THIS_MODULE, "vduse"); 1797 if (IS_ERR(vduse_class)) 1798 return PTR_ERR(vduse_class); 1799 1800 vduse_class->devnode = vduse_devnode; 1801 1802 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 1803 if (ret) 1804 goto err_chardev_region; 1805 1806 /* /dev/vduse/control */ 1807 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 1808 vduse_ctrl_cdev.owner = THIS_MODULE; 1809 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 1810 if (ret) 1811 goto err_ctrl_cdev; 1812 1813 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); 1814 if (IS_ERR(dev)) { 1815 ret = PTR_ERR(dev); 1816 goto err_device; 1817 } 1818 1819 /* /dev/vduse/$DEVICE */ 1820 cdev_init(&vduse_cdev, &vduse_dev_fops); 1821 vduse_cdev.owner = THIS_MODULE; 1822 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 1823 VDUSE_DEV_MAX - 1); 1824 if (ret) 1825 goto err_cdev; 1826 1827 vduse_irq_wq = alloc_workqueue("vduse-irq", 1828 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 1829 if (!vduse_irq_wq) { 1830 ret = -ENOMEM; 1831 goto err_wq; 1832 } 1833 1834 ret = vduse_domain_init(); 1835 if (ret) 1836 goto err_domain; 1837 1838 ret = vduse_mgmtdev_init(); 1839 if (ret) 1840 goto err_mgmtdev; 1841 1842 return 0; 1843 err_mgmtdev: 1844 vduse_domain_exit(); 1845 err_domain: 1846 destroy_workqueue(vduse_irq_wq); 1847 err_wq: 1848 cdev_del(&vduse_cdev); 1849 err_cdev: 1850 device_destroy(vduse_class, vduse_major); 1851 err_device: 1852 cdev_del(&vduse_ctrl_cdev); 1853 err_ctrl_cdev: 1854 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1855 err_chardev_region: 1856 class_destroy(vduse_class); 1857 return ret; 1858 } 1859 module_init(vduse_init); 1860 1861 static void vduse_exit(void) 1862 { 1863 vduse_mgmtdev_exit(); 1864 vduse_domain_exit(); 1865 destroy_workqueue(vduse_irq_wq); 1866 cdev_del(&vduse_cdev); 1867 device_destroy(vduse_class, vduse_major); 1868 cdev_del(&vduse_ctrl_cdev); 1869 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1870 class_destroy(vduse_class); 1871 } 1872 module_exit(vduse_exit); 1873 1874 MODULE_LICENSE(DRV_LICENSE); 1875 MODULE_AUTHOR(DRV_AUTHOR); 1876 MODULE_DESCRIPTION(DRV_DESC); 1877