1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/eventfd.h> 16 #include <linux/slab.h> 17 #include <linux/wait.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/poll.h> 20 #include <linux/file.h> 21 #include <linux/uio.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <uapi/linux/vduse.h> 25 #include <uapi/linux/vdpa.h> 26 #include <uapi/linux/virtio_config.h> 27 #include <uapi/linux/virtio_ids.h> 28 #include <uapi/linux/virtio_blk.h> 29 #include <linux/mod_devicetable.h> 30 31 #include "iova_domain.h" 32 33 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 34 #define DRV_DESC "vDPA Device in Userspace" 35 #define DRV_LICENSE "GPL v2" 36 37 #define VDUSE_DEV_MAX (1U << MINORBITS) 38 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 39 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) 40 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 41 42 struct vduse_virtqueue { 43 u16 index; 44 u16 num_max; 45 u32 num; 46 u64 desc_addr; 47 u64 driver_addr; 48 u64 device_addr; 49 struct vdpa_vq_state state; 50 bool ready; 51 bool kicked; 52 spinlock_t kick_lock; 53 spinlock_t irq_lock; 54 struct eventfd_ctx *kickfd; 55 struct vdpa_callback cb; 56 struct work_struct inject; 57 struct work_struct kick; 58 }; 59 60 struct vduse_dev; 61 62 struct vduse_vdpa { 63 struct vdpa_device vdpa; 64 struct vduse_dev *dev; 65 }; 66 67 struct vduse_dev { 68 struct vduse_vdpa *vdev; 69 struct device *dev; 70 struct vduse_virtqueue *vqs; 71 struct vduse_iova_domain *domain; 72 char *name; 73 struct mutex lock; 74 spinlock_t msg_lock; 75 u64 msg_unique; 76 u32 msg_timeout; 77 wait_queue_head_t waitq; 78 struct list_head send_list; 79 struct list_head recv_list; 80 struct vdpa_callback config_cb; 81 struct work_struct inject; 82 spinlock_t irq_lock; 83 struct rw_semaphore rwsem; 84 int minor; 85 bool broken; 86 bool connected; 87 u64 api_version; 88 u64 device_features; 89 u64 driver_features; 90 u32 device_id; 91 u32 vendor_id; 92 u32 generation; 93 u32 config_size; 94 void *config; 95 u8 status; 96 u32 vq_num; 97 u32 vq_align; 98 }; 99 100 struct vduse_dev_msg { 101 struct vduse_dev_request req; 102 struct vduse_dev_response resp; 103 struct list_head list; 104 wait_queue_head_t waitq; 105 bool completed; 106 }; 107 108 struct vduse_control { 109 u64 api_version; 110 }; 111 112 static DEFINE_MUTEX(vduse_lock); 113 static DEFINE_IDR(vduse_idr); 114 115 static dev_t vduse_major; 116 static struct class *vduse_class; 117 static struct cdev vduse_ctrl_cdev; 118 static struct cdev vduse_cdev; 119 static struct workqueue_struct *vduse_irq_wq; 120 121 static u32 allowed_device_id[] = { 122 VIRTIO_ID_BLOCK, 123 }; 124 125 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 126 { 127 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 128 129 return vdev->dev; 130 } 131 132 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 133 { 134 struct vdpa_device *vdpa = dev_to_vdpa(dev); 135 136 return vdpa_to_vduse(vdpa); 137 } 138 139 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 140 uint32_t request_id) 141 { 142 struct vduse_dev_msg *msg; 143 144 list_for_each_entry(msg, head, list) { 145 if (msg->req.request_id == request_id) { 146 list_del(&msg->list); 147 return msg; 148 } 149 } 150 151 return NULL; 152 } 153 154 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 155 { 156 struct vduse_dev_msg *msg = NULL; 157 158 if (!list_empty(head)) { 159 msg = list_first_entry(head, struct vduse_dev_msg, list); 160 list_del(&msg->list); 161 } 162 163 return msg; 164 } 165 166 static void vduse_enqueue_msg(struct list_head *head, 167 struct vduse_dev_msg *msg) 168 { 169 list_add_tail(&msg->list, head); 170 } 171 172 static void vduse_dev_broken(struct vduse_dev *dev) 173 { 174 struct vduse_dev_msg *msg, *tmp; 175 176 if (unlikely(dev->broken)) 177 return; 178 179 list_splice_init(&dev->recv_list, &dev->send_list); 180 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 181 list_del(&msg->list); 182 msg->completed = 1; 183 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 184 wake_up(&msg->waitq); 185 } 186 dev->broken = true; 187 wake_up(&dev->waitq); 188 } 189 190 static int vduse_dev_msg_sync(struct vduse_dev *dev, 191 struct vduse_dev_msg *msg) 192 { 193 int ret; 194 195 if (unlikely(dev->broken)) 196 return -EIO; 197 198 init_waitqueue_head(&msg->waitq); 199 spin_lock(&dev->msg_lock); 200 if (unlikely(dev->broken)) { 201 spin_unlock(&dev->msg_lock); 202 return -EIO; 203 } 204 msg->req.request_id = dev->msg_unique++; 205 vduse_enqueue_msg(&dev->send_list, msg); 206 wake_up(&dev->waitq); 207 spin_unlock(&dev->msg_lock); 208 if (dev->msg_timeout) 209 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 210 (long)dev->msg_timeout * HZ); 211 else 212 ret = wait_event_killable(msg->waitq, msg->completed); 213 214 spin_lock(&dev->msg_lock); 215 if (!msg->completed) { 216 list_del(&msg->list); 217 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 218 /* Mark the device as malfunction when there is a timeout */ 219 if (!ret) 220 vduse_dev_broken(dev); 221 } 222 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 223 spin_unlock(&dev->msg_lock); 224 225 return ret; 226 } 227 228 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 229 struct vduse_virtqueue *vq, 230 struct vdpa_vq_state_packed *packed) 231 { 232 struct vduse_dev_msg msg = { 0 }; 233 int ret; 234 235 msg.req.type = VDUSE_GET_VQ_STATE; 236 msg.req.vq_state.index = vq->index; 237 238 ret = vduse_dev_msg_sync(dev, &msg); 239 if (ret) 240 return ret; 241 242 packed->last_avail_counter = 243 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 244 packed->last_avail_idx = 245 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 246 packed->last_used_counter = 247 msg.resp.vq_state.packed.last_used_counter & 0x0001; 248 packed->last_used_idx = 249 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 250 251 return 0; 252 } 253 254 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 255 struct vduse_virtqueue *vq, 256 struct vdpa_vq_state_split *split) 257 { 258 struct vduse_dev_msg msg = { 0 }; 259 int ret; 260 261 msg.req.type = VDUSE_GET_VQ_STATE; 262 msg.req.vq_state.index = vq->index; 263 264 ret = vduse_dev_msg_sync(dev, &msg); 265 if (ret) 266 return ret; 267 268 split->avail_index = msg.resp.vq_state.split.avail_index; 269 270 return 0; 271 } 272 273 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 274 { 275 struct vduse_dev_msg msg = { 0 }; 276 277 msg.req.type = VDUSE_SET_STATUS; 278 msg.req.s.status = status; 279 280 return vduse_dev_msg_sync(dev, &msg); 281 } 282 283 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 284 u64 start, u64 last) 285 { 286 struct vduse_dev_msg msg = { 0 }; 287 288 if (last < start) 289 return -EINVAL; 290 291 msg.req.type = VDUSE_UPDATE_IOTLB; 292 msg.req.iova.start = start; 293 msg.req.iova.last = last; 294 295 return vduse_dev_msg_sync(dev, &msg); 296 } 297 298 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 299 { 300 struct file *file = iocb->ki_filp; 301 struct vduse_dev *dev = file->private_data; 302 struct vduse_dev_msg *msg; 303 int size = sizeof(struct vduse_dev_request); 304 ssize_t ret; 305 306 if (iov_iter_count(to) < size) 307 return -EINVAL; 308 309 spin_lock(&dev->msg_lock); 310 while (1) { 311 msg = vduse_dequeue_msg(&dev->send_list); 312 if (msg) 313 break; 314 315 ret = -EAGAIN; 316 if (file->f_flags & O_NONBLOCK) 317 goto unlock; 318 319 spin_unlock(&dev->msg_lock); 320 ret = wait_event_interruptible_exclusive(dev->waitq, 321 !list_empty(&dev->send_list)); 322 if (ret) 323 return ret; 324 325 spin_lock(&dev->msg_lock); 326 } 327 spin_unlock(&dev->msg_lock); 328 ret = copy_to_iter(&msg->req, size, to); 329 spin_lock(&dev->msg_lock); 330 if (ret != size) { 331 ret = -EFAULT; 332 vduse_enqueue_msg(&dev->send_list, msg); 333 goto unlock; 334 } 335 vduse_enqueue_msg(&dev->recv_list, msg); 336 unlock: 337 spin_unlock(&dev->msg_lock); 338 339 return ret; 340 } 341 342 static bool is_mem_zero(const char *ptr, int size) 343 { 344 int i; 345 346 for (i = 0; i < size; i++) { 347 if (ptr[i]) 348 return false; 349 } 350 return true; 351 } 352 353 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 354 { 355 struct file *file = iocb->ki_filp; 356 struct vduse_dev *dev = file->private_data; 357 struct vduse_dev_response resp; 358 struct vduse_dev_msg *msg; 359 size_t ret; 360 361 ret = copy_from_iter(&resp, sizeof(resp), from); 362 if (ret != sizeof(resp)) 363 return -EINVAL; 364 365 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 366 return -EINVAL; 367 368 spin_lock(&dev->msg_lock); 369 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 370 if (!msg) { 371 ret = -ENOENT; 372 goto unlock; 373 } 374 375 memcpy(&msg->resp, &resp, sizeof(resp)); 376 msg->completed = 1; 377 wake_up(&msg->waitq); 378 unlock: 379 spin_unlock(&dev->msg_lock); 380 381 return ret; 382 } 383 384 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 385 { 386 struct vduse_dev *dev = file->private_data; 387 __poll_t mask = 0; 388 389 poll_wait(file, &dev->waitq, wait); 390 391 spin_lock(&dev->msg_lock); 392 393 if (unlikely(dev->broken)) 394 mask |= EPOLLERR; 395 if (!list_empty(&dev->send_list)) 396 mask |= EPOLLIN | EPOLLRDNORM; 397 if (!list_empty(&dev->recv_list)) 398 mask |= EPOLLOUT | EPOLLWRNORM; 399 400 spin_unlock(&dev->msg_lock); 401 402 return mask; 403 } 404 405 static void vduse_dev_reset(struct vduse_dev *dev) 406 { 407 int i; 408 struct vduse_iova_domain *domain = dev->domain; 409 410 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 411 if (domain->bounce_map) 412 vduse_domain_reset_bounce_map(domain); 413 414 down_write(&dev->rwsem); 415 416 dev->status = 0; 417 dev->driver_features = 0; 418 dev->generation++; 419 spin_lock(&dev->irq_lock); 420 dev->config_cb.callback = NULL; 421 dev->config_cb.private = NULL; 422 spin_unlock(&dev->irq_lock); 423 flush_work(&dev->inject); 424 425 for (i = 0; i < dev->vq_num; i++) { 426 struct vduse_virtqueue *vq = &dev->vqs[i]; 427 428 vq->ready = false; 429 vq->desc_addr = 0; 430 vq->driver_addr = 0; 431 vq->device_addr = 0; 432 vq->num = 0; 433 memset(&vq->state, 0, sizeof(vq->state)); 434 435 spin_lock(&vq->kick_lock); 436 vq->kicked = false; 437 if (vq->kickfd) 438 eventfd_ctx_put(vq->kickfd); 439 vq->kickfd = NULL; 440 spin_unlock(&vq->kick_lock); 441 442 spin_lock(&vq->irq_lock); 443 vq->cb.callback = NULL; 444 vq->cb.private = NULL; 445 spin_unlock(&vq->irq_lock); 446 flush_work(&vq->inject); 447 flush_work(&vq->kick); 448 } 449 450 up_write(&dev->rwsem); 451 } 452 453 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 454 u64 desc_area, u64 driver_area, 455 u64 device_area) 456 { 457 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 458 struct vduse_virtqueue *vq = &dev->vqs[idx]; 459 460 vq->desc_addr = desc_area; 461 vq->driver_addr = driver_area; 462 vq->device_addr = device_area; 463 464 return 0; 465 } 466 467 static void vduse_vq_kick(struct vduse_virtqueue *vq) 468 { 469 spin_lock(&vq->kick_lock); 470 if (!vq->ready) 471 goto unlock; 472 473 if (vq->kickfd) 474 eventfd_signal(vq->kickfd, 1); 475 else 476 vq->kicked = true; 477 unlock: 478 spin_unlock(&vq->kick_lock); 479 } 480 481 static void vduse_vq_kick_work(struct work_struct *work) 482 { 483 struct vduse_virtqueue *vq = container_of(work, 484 struct vduse_virtqueue, kick); 485 486 vduse_vq_kick(vq); 487 } 488 489 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 490 { 491 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 492 struct vduse_virtqueue *vq = &dev->vqs[idx]; 493 494 if (!eventfd_signal_allowed()) { 495 schedule_work(&vq->kick); 496 return; 497 } 498 vduse_vq_kick(vq); 499 } 500 501 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 502 struct vdpa_callback *cb) 503 { 504 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 505 struct vduse_virtqueue *vq = &dev->vqs[idx]; 506 507 spin_lock(&vq->irq_lock); 508 vq->cb.callback = cb->callback; 509 vq->cb.private = cb->private; 510 spin_unlock(&vq->irq_lock); 511 } 512 513 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 514 { 515 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 516 struct vduse_virtqueue *vq = &dev->vqs[idx]; 517 518 vq->num = num; 519 } 520 521 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 522 u16 idx, bool ready) 523 { 524 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 525 struct vduse_virtqueue *vq = &dev->vqs[idx]; 526 527 vq->ready = ready; 528 } 529 530 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 531 { 532 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 533 struct vduse_virtqueue *vq = &dev->vqs[idx]; 534 535 return vq->ready; 536 } 537 538 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 539 const struct vdpa_vq_state *state) 540 { 541 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 542 struct vduse_virtqueue *vq = &dev->vqs[idx]; 543 544 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 545 vq->state.packed.last_avail_counter = 546 state->packed.last_avail_counter; 547 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 548 vq->state.packed.last_used_counter = 549 state->packed.last_used_counter; 550 vq->state.packed.last_used_idx = state->packed.last_used_idx; 551 } else 552 vq->state.split.avail_index = state->split.avail_index; 553 554 return 0; 555 } 556 557 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 558 struct vdpa_vq_state *state) 559 { 560 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 561 struct vduse_virtqueue *vq = &dev->vqs[idx]; 562 563 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 564 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 565 566 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 567 } 568 569 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 570 { 571 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 572 573 return dev->vq_align; 574 } 575 576 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 577 { 578 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 579 580 return dev->device_features; 581 } 582 583 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 584 { 585 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 586 587 dev->driver_features = features; 588 return 0; 589 } 590 591 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 592 { 593 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 594 595 return dev->driver_features; 596 } 597 598 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 599 struct vdpa_callback *cb) 600 { 601 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 602 603 spin_lock(&dev->irq_lock); 604 dev->config_cb.callback = cb->callback; 605 dev->config_cb.private = cb->private; 606 spin_unlock(&dev->irq_lock); 607 } 608 609 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 610 { 611 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 612 u16 num_max = 0; 613 int i; 614 615 for (i = 0; i < dev->vq_num; i++) 616 if (num_max < dev->vqs[i].num_max) 617 num_max = dev->vqs[i].num_max; 618 619 return num_max; 620 } 621 622 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 623 { 624 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 625 626 return dev->device_id; 627 } 628 629 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 630 { 631 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 632 633 return dev->vendor_id; 634 } 635 636 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 637 { 638 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 639 640 return dev->status; 641 } 642 643 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 644 { 645 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 646 647 if (vduse_dev_set_status(dev, status)) 648 return; 649 650 dev->status = status; 651 } 652 653 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 654 { 655 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 656 657 return dev->config_size; 658 } 659 660 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 661 void *buf, unsigned int len) 662 { 663 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 664 665 if (offset > dev->config_size || 666 len > dev->config_size - offset) 667 return; 668 669 memcpy(buf, dev->config + offset, len); 670 } 671 672 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 673 const void *buf, unsigned int len) 674 { 675 /* Now we only support read-only configuration space */ 676 } 677 678 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 679 { 680 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 681 int ret = vduse_dev_set_status(dev, 0); 682 683 vduse_dev_reset(dev); 684 685 return ret; 686 } 687 688 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 689 { 690 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 691 692 return dev->generation; 693 } 694 695 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 696 unsigned int asid, 697 struct vhost_iotlb *iotlb) 698 { 699 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 700 int ret; 701 702 ret = vduse_domain_set_map(dev->domain, iotlb); 703 if (ret) 704 return ret; 705 706 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 707 if (ret) { 708 vduse_domain_clear_map(dev->domain, iotlb); 709 return ret; 710 } 711 712 return 0; 713 } 714 715 static void vduse_vdpa_free(struct vdpa_device *vdpa) 716 { 717 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 718 719 dev->vdev = NULL; 720 } 721 722 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 723 .set_vq_address = vduse_vdpa_set_vq_address, 724 .kick_vq = vduse_vdpa_kick_vq, 725 .set_vq_cb = vduse_vdpa_set_vq_cb, 726 .set_vq_num = vduse_vdpa_set_vq_num, 727 .set_vq_ready = vduse_vdpa_set_vq_ready, 728 .get_vq_ready = vduse_vdpa_get_vq_ready, 729 .set_vq_state = vduse_vdpa_set_vq_state, 730 .get_vq_state = vduse_vdpa_get_vq_state, 731 .get_vq_align = vduse_vdpa_get_vq_align, 732 .get_device_features = vduse_vdpa_get_device_features, 733 .set_driver_features = vduse_vdpa_set_driver_features, 734 .get_driver_features = vduse_vdpa_get_driver_features, 735 .set_config_cb = vduse_vdpa_set_config_cb, 736 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 737 .get_device_id = vduse_vdpa_get_device_id, 738 .get_vendor_id = vduse_vdpa_get_vendor_id, 739 .get_status = vduse_vdpa_get_status, 740 .set_status = vduse_vdpa_set_status, 741 .get_config_size = vduse_vdpa_get_config_size, 742 .get_config = vduse_vdpa_get_config, 743 .set_config = vduse_vdpa_set_config, 744 .get_generation = vduse_vdpa_get_generation, 745 .reset = vduse_vdpa_reset, 746 .set_map = vduse_vdpa_set_map, 747 .free = vduse_vdpa_free, 748 }; 749 750 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 751 unsigned long offset, size_t size, 752 enum dma_data_direction dir, 753 unsigned long attrs) 754 { 755 struct vduse_dev *vdev = dev_to_vduse(dev); 756 struct vduse_iova_domain *domain = vdev->domain; 757 758 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 759 } 760 761 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 762 size_t size, enum dma_data_direction dir, 763 unsigned long attrs) 764 { 765 struct vduse_dev *vdev = dev_to_vduse(dev); 766 struct vduse_iova_domain *domain = vdev->domain; 767 768 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 769 } 770 771 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 772 dma_addr_t *dma_addr, gfp_t flag, 773 unsigned long attrs) 774 { 775 struct vduse_dev *vdev = dev_to_vduse(dev); 776 struct vduse_iova_domain *domain = vdev->domain; 777 unsigned long iova; 778 void *addr; 779 780 *dma_addr = DMA_MAPPING_ERROR; 781 addr = vduse_domain_alloc_coherent(domain, size, 782 (dma_addr_t *)&iova, flag, attrs); 783 if (!addr) 784 return NULL; 785 786 *dma_addr = (dma_addr_t)iova; 787 788 return addr; 789 } 790 791 static void vduse_dev_free_coherent(struct device *dev, size_t size, 792 void *vaddr, dma_addr_t dma_addr, 793 unsigned long attrs) 794 { 795 struct vduse_dev *vdev = dev_to_vduse(dev); 796 struct vduse_iova_domain *domain = vdev->domain; 797 798 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 799 } 800 801 static size_t vduse_dev_max_mapping_size(struct device *dev) 802 { 803 struct vduse_dev *vdev = dev_to_vduse(dev); 804 struct vduse_iova_domain *domain = vdev->domain; 805 806 return domain->bounce_size; 807 } 808 809 static const struct dma_map_ops vduse_dev_dma_ops = { 810 .map_page = vduse_dev_map_page, 811 .unmap_page = vduse_dev_unmap_page, 812 .alloc = vduse_dev_alloc_coherent, 813 .free = vduse_dev_free_coherent, 814 .max_mapping_size = vduse_dev_max_mapping_size, 815 }; 816 817 static unsigned int perm_to_file_flags(u8 perm) 818 { 819 unsigned int flags = 0; 820 821 switch (perm) { 822 case VDUSE_ACCESS_WO: 823 flags |= O_WRONLY; 824 break; 825 case VDUSE_ACCESS_RO: 826 flags |= O_RDONLY; 827 break; 828 case VDUSE_ACCESS_RW: 829 flags |= O_RDWR; 830 break; 831 default: 832 WARN(1, "invalidate vhost IOTLB permission\n"); 833 break; 834 } 835 836 return flags; 837 } 838 839 static int vduse_kickfd_setup(struct vduse_dev *dev, 840 struct vduse_vq_eventfd *eventfd) 841 { 842 struct eventfd_ctx *ctx = NULL; 843 struct vduse_virtqueue *vq; 844 u32 index; 845 846 if (eventfd->index >= dev->vq_num) 847 return -EINVAL; 848 849 index = array_index_nospec(eventfd->index, dev->vq_num); 850 vq = &dev->vqs[index]; 851 if (eventfd->fd >= 0) { 852 ctx = eventfd_ctx_fdget(eventfd->fd); 853 if (IS_ERR(ctx)) 854 return PTR_ERR(ctx); 855 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 856 return 0; 857 858 spin_lock(&vq->kick_lock); 859 if (vq->kickfd) 860 eventfd_ctx_put(vq->kickfd); 861 vq->kickfd = ctx; 862 if (vq->ready && vq->kicked && vq->kickfd) { 863 eventfd_signal(vq->kickfd, 1); 864 vq->kicked = false; 865 } 866 spin_unlock(&vq->kick_lock); 867 868 return 0; 869 } 870 871 static bool vduse_dev_is_ready(struct vduse_dev *dev) 872 { 873 int i; 874 875 for (i = 0; i < dev->vq_num; i++) 876 if (!dev->vqs[i].num_max) 877 return false; 878 879 return true; 880 } 881 882 static void vduse_dev_irq_inject(struct work_struct *work) 883 { 884 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 885 886 spin_lock_irq(&dev->irq_lock); 887 if (dev->config_cb.callback) 888 dev->config_cb.callback(dev->config_cb.private); 889 spin_unlock_irq(&dev->irq_lock); 890 } 891 892 static void vduse_vq_irq_inject(struct work_struct *work) 893 { 894 struct vduse_virtqueue *vq = container_of(work, 895 struct vduse_virtqueue, inject); 896 897 spin_lock_irq(&vq->irq_lock); 898 if (vq->ready && vq->cb.callback) 899 vq->cb.callback(vq->cb.private); 900 spin_unlock_irq(&vq->irq_lock); 901 } 902 903 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 904 struct work_struct *irq_work) 905 { 906 int ret = -EINVAL; 907 908 down_read(&dev->rwsem); 909 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 910 goto unlock; 911 912 ret = 0; 913 queue_work(vduse_irq_wq, irq_work); 914 unlock: 915 up_read(&dev->rwsem); 916 917 return ret; 918 } 919 920 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 921 unsigned long arg) 922 { 923 struct vduse_dev *dev = file->private_data; 924 void __user *argp = (void __user *)arg; 925 int ret; 926 927 if (unlikely(dev->broken)) 928 return -EPERM; 929 930 switch (cmd) { 931 case VDUSE_IOTLB_GET_FD: { 932 struct vduse_iotlb_entry entry; 933 struct vhost_iotlb_map *map; 934 struct vdpa_map_file *map_file; 935 struct vduse_iova_domain *domain = dev->domain; 936 struct file *f = NULL; 937 938 ret = -EFAULT; 939 if (copy_from_user(&entry, argp, sizeof(entry))) 940 break; 941 942 ret = -EINVAL; 943 if (entry.start > entry.last) 944 break; 945 946 spin_lock(&domain->iotlb_lock); 947 map = vhost_iotlb_itree_first(domain->iotlb, 948 entry.start, entry.last); 949 if (map) { 950 map_file = (struct vdpa_map_file *)map->opaque; 951 f = get_file(map_file->file); 952 entry.offset = map_file->offset; 953 entry.start = map->start; 954 entry.last = map->last; 955 entry.perm = map->perm; 956 } 957 spin_unlock(&domain->iotlb_lock); 958 ret = -EINVAL; 959 if (!f) 960 break; 961 962 ret = -EFAULT; 963 if (copy_to_user(argp, &entry, sizeof(entry))) { 964 fput(f); 965 break; 966 } 967 ret = receive_fd(f, perm_to_file_flags(entry.perm)); 968 fput(f); 969 break; 970 } 971 case VDUSE_DEV_GET_FEATURES: 972 /* 973 * Just mirror what driver wrote here. 974 * The driver is expected to check FEATURE_OK later. 975 */ 976 ret = put_user(dev->driver_features, (u64 __user *)argp); 977 break; 978 case VDUSE_DEV_SET_CONFIG: { 979 struct vduse_config_data config; 980 unsigned long size = offsetof(struct vduse_config_data, 981 buffer); 982 983 ret = -EFAULT; 984 if (copy_from_user(&config, argp, size)) 985 break; 986 987 ret = -EINVAL; 988 if (config.offset > dev->config_size || 989 config.length == 0 || 990 config.length > dev->config_size - config.offset) 991 break; 992 993 ret = -EFAULT; 994 if (copy_from_user(dev->config + config.offset, argp + size, 995 config.length)) 996 break; 997 998 ret = 0; 999 break; 1000 } 1001 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1002 ret = vduse_dev_queue_irq_work(dev, &dev->inject); 1003 break; 1004 case VDUSE_VQ_SETUP: { 1005 struct vduse_vq_config config; 1006 u32 index; 1007 1008 ret = -EFAULT; 1009 if (copy_from_user(&config, argp, sizeof(config))) 1010 break; 1011 1012 ret = -EINVAL; 1013 if (config.index >= dev->vq_num) 1014 break; 1015 1016 if (!is_mem_zero((const char *)config.reserved, 1017 sizeof(config.reserved))) 1018 break; 1019 1020 index = array_index_nospec(config.index, dev->vq_num); 1021 dev->vqs[index].num_max = config.max_size; 1022 ret = 0; 1023 break; 1024 } 1025 case VDUSE_VQ_GET_INFO: { 1026 struct vduse_vq_info vq_info; 1027 struct vduse_virtqueue *vq; 1028 u32 index; 1029 1030 ret = -EFAULT; 1031 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1032 break; 1033 1034 ret = -EINVAL; 1035 if (vq_info.index >= dev->vq_num) 1036 break; 1037 1038 index = array_index_nospec(vq_info.index, dev->vq_num); 1039 vq = &dev->vqs[index]; 1040 vq_info.desc_addr = vq->desc_addr; 1041 vq_info.driver_addr = vq->driver_addr; 1042 vq_info.device_addr = vq->device_addr; 1043 vq_info.num = vq->num; 1044 1045 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1046 vq_info.packed.last_avail_counter = 1047 vq->state.packed.last_avail_counter; 1048 vq_info.packed.last_avail_idx = 1049 vq->state.packed.last_avail_idx; 1050 vq_info.packed.last_used_counter = 1051 vq->state.packed.last_used_counter; 1052 vq_info.packed.last_used_idx = 1053 vq->state.packed.last_used_idx; 1054 } else 1055 vq_info.split.avail_index = 1056 vq->state.split.avail_index; 1057 1058 vq_info.ready = vq->ready; 1059 1060 ret = -EFAULT; 1061 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1062 break; 1063 1064 ret = 0; 1065 break; 1066 } 1067 case VDUSE_VQ_SETUP_KICKFD: { 1068 struct vduse_vq_eventfd eventfd; 1069 1070 ret = -EFAULT; 1071 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1072 break; 1073 1074 ret = vduse_kickfd_setup(dev, &eventfd); 1075 break; 1076 } 1077 case VDUSE_VQ_INJECT_IRQ: { 1078 u32 index; 1079 1080 ret = -EFAULT; 1081 if (get_user(index, (u32 __user *)argp)) 1082 break; 1083 1084 ret = -EINVAL; 1085 if (index >= dev->vq_num) 1086 break; 1087 1088 index = array_index_nospec(index, dev->vq_num); 1089 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); 1090 break; 1091 } 1092 default: 1093 ret = -ENOIOCTLCMD; 1094 break; 1095 } 1096 1097 return ret; 1098 } 1099 1100 static int vduse_dev_release(struct inode *inode, struct file *file) 1101 { 1102 struct vduse_dev *dev = file->private_data; 1103 1104 spin_lock(&dev->msg_lock); 1105 /* Make sure the inflight messages can processed after reconncection */ 1106 list_splice_init(&dev->recv_list, &dev->send_list); 1107 spin_unlock(&dev->msg_lock); 1108 dev->connected = false; 1109 1110 return 0; 1111 } 1112 1113 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1114 { 1115 struct vduse_dev *dev; 1116 1117 mutex_lock(&vduse_lock); 1118 dev = idr_find(&vduse_idr, minor); 1119 mutex_unlock(&vduse_lock); 1120 1121 return dev; 1122 } 1123 1124 static int vduse_dev_open(struct inode *inode, struct file *file) 1125 { 1126 int ret; 1127 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1128 1129 if (!dev) 1130 return -ENODEV; 1131 1132 ret = -EBUSY; 1133 mutex_lock(&dev->lock); 1134 if (dev->connected) 1135 goto unlock; 1136 1137 ret = 0; 1138 dev->connected = true; 1139 file->private_data = dev; 1140 unlock: 1141 mutex_unlock(&dev->lock); 1142 1143 return ret; 1144 } 1145 1146 static const struct file_operations vduse_dev_fops = { 1147 .owner = THIS_MODULE, 1148 .open = vduse_dev_open, 1149 .release = vduse_dev_release, 1150 .read_iter = vduse_dev_read_iter, 1151 .write_iter = vduse_dev_write_iter, 1152 .poll = vduse_dev_poll, 1153 .unlocked_ioctl = vduse_dev_ioctl, 1154 .compat_ioctl = compat_ptr_ioctl, 1155 .llseek = noop_llseek, 1156 }; 1157 1158 static struct vduse_dev *vduse_dev_create(void) 1159 { 1160 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1161 1162 if (!dev) 1163 return NULL; 1164 1165 mutex_init(&dev->lock); 1166 spin_lock_init(&dev->msg_lock); 1167 INIT_LIST_HEAD(&dev->send_list); 1168 INIT_LIST_HEAD(&dev->recv_list); 1169 spin_lock_init(&dev->irq_lock); 1170 init_rwsem(&dev->rwsem); 1171 1172 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1173 init_waitqueue_head(&dev->waitq); 1174 1175 return dev; 1176 } 1177 1178 static void vduse_dev_destroy(struct vduse_dev *dev) 1179 { 1180 kfree(dev); 1181 } 1182 1183 static struct vduse_dev *vduse_find_dev(const char *name) 1184 { 1185 struct vduse_dev *dev; 1186 int id; 1187 1188 idr_for_each_entry(&vduse_idr, dev, id) 1189 if (!strcmp(dev->name, name)) 1190 return dev; 1191 1192 return NULL; 1193 } 1194 1195 static int vduse_destroy_dev(char *name) 1196 { 1197 struct vduse_dev *dev = vduse_find_dev(name); 1198 1199 if (!dev) 1200 return -EINVAL; 1201 1202 mutex_lock(&dev->lock); 1203 if (dev->vdev || dev->connected) { 1204 mutex_unlock(&dev->lock); 1205 return -EBUSY; 1206 } 1207 dev->connected = true; 1208 mutex_unlock(&dev->lock); 1209 1210 vduse_dev_reset(dev); 1211 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1212 idr_remove(&vduse_idr, dev->minor); 1213 kvfree(dev->config); 1214 kfree(dev->vqs); 1215 vduse_domain_destroy(dev->domain); 1216 kfree(dev->name); 1217 vduse_dev_destroy(dev); 1218 module_put(THIS_MODULE); 1219 1220 return 0; 1221 } 1222 1223 static bool device_is_allowed(u32 device_id) 1224 { 1225 int i; 1226 1227 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1228 if (allowed_device_id[i] == device_id) 1229 return true; 1230 1231 return false; 1232 } 1233 1234 static bool features_is_valid(u64 features) 1235 { 1236 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) 1237 return false; 1238 1239 /* Now we only support read-only configuration space */ 1240 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) 1241 return false; 1242 1243 return true; 1244 } 1245 1246 static bool vduse_validate_config(struct vduse_dev_config *config) 1247 { 1248 if (!is_mem_zero((const char *)config->reserved, 1249 sizeof(config->reserved))) 1250 return false; 1251 1252 if (config->vq_align > PAGE_SIZE) 1253 return false; 1254 1255 if (config->config_size > PAGE_SIZE) 1256 return false; 1257 1258 if (!device_is_allowed(config->device_id)) 1259 return false; 1260 1261 if (!features_is_valid(config->features)) 1262 return false; 1263 1264 return true; 1265 } 1266 1267 static ssize_t msg_timeout_show(struct device *device, 1268 struct device_attribute *attr, char *buf) 1269 { 1270 struct vduse_dev *dev = dev_get_drvdata(device); 1271 1272 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1273 } 1274 1275 static ssize_t msg_timeout_store(struct device *device, 1276 struct device_attribute *attr, 1277 const char *buf, size_t count) 1278 { 1279 struct vduse_dev *dev = dev_get_drvdata(device); 1280 int ret; 1281 1282 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1283 if (ret < 0) 1284 return ret; 1285 1286 return count; 1287 } 1288 1289 static DEVICE_ATTR_RW(msg_timeout); 1290 1291 static struct attribute *vduse_dev_attrs[] = { 1292 &dev_attr_msg_timeout.attr, 1293 NULL 1294 }; 1295 1296 ATTRIBUTE_GROUPS(vduse_dev); 1297 1298 static int vduse_create_dev(struct vduse_dev_config *config, 1299 void *config_buf, u64 api_version) 1300 { 1301 int i, ret; 1302 struct vduse_dev *dev; 1303 1304 ret = -EEXIST; 1305 if (vduse_find_dev(config->name)) 1306 goto err; 1307 1308 ret = -ENOMEM; 1309 dev = vduse_dev_create(); 1310 if (!dev) 1311 goto err; 1312 1313 dev->api_version = api_version; 1314 dev->device_features = config->features; 1315 dev->device_id = config->device_id; 1316 dev->vendor_id = config->vendor_id; 1317 dev->name = kstrdup(config->name, GFP_KERNEL); 1318 if (!dev->name) 1319 goto err_str; 1320 1321 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 1322 VDUSE_BOUNCE_SIZE); 1323 if (!dev->domain) 1324 goto err_domain; 1325 1326 dev->config = config_buf; 1327 dev->config_size = config->config_size; 1328 dev->vq_align = config->vq_align; 1329 dev->vq_num = config->vq_num; 1330 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1331 if (!dev->vqs) 1332 goto err_vqs; 1333 1334 for (i = 0; i < dev->vq_num; i++) { 1335 dev->vqs[i].index = i; 1336 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); 1337 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); 1338 spin_lock_init(&dev->vqs[i].kick_lock); 1339 spin_lock_init(&dev->vqs[i].irq_lock); 1340 } 1341 1342 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1343 if (ret < 0) 1344 goto err_idr; 1345 1346 dev->minor = ret; 1347 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1348 dev->dev = device_create(vduse_class, NULL, 1349 MKDEV(MAJOR(vduse_major), dev->minor), 1350 dev, "%s", config->name); 1351 if (IS_ERR(dev->dev)) { 1352 ret = PTR_ERR(dev->dev); 1353 goto err_dev; 1354 } 1355 __module_get(THIS_MODULE); 1356 1357 return 0; 1358 err_dev: 1359 idr_remove(&vduse_idr, dev->minor); 1360 err_idr: 1361 kfree(dev->vqs); 1362 err_vqs: 1363 vduse_domain_destroy(dev->domain); 1364 err_domain: 1365 kfree(dev->name); 1366 err_str: 1367 vduse_dev_destroy(dev); 1368 err: 1369 return ret; 1370 } 1371 1372 static long vduse_ioctl(struct file *file, unsigned int cmd, 1373 unsigned long arg) 1374 { 1375 int ret; 1376 void __user *argp = (void __user *)arg; 1377 struct vduse_control *control = file->private_data; 1378 1379 mutex_lock(&vduse_lock); 1380 switch (cmd) { 1381 case VDUSE_GET_API_VERSION: 1382 ret = put_user(control->api_version, (u64 __user *)argp); 1383 break; 1384 case VDUSE_SET_API_VERSION: { 1385 u64 api_version; 1386 1387 ret = -EFAULT; 1388 if (get_user(api_version, (u64 __user *)argp)) 1389 break; 1390 1391 ret = -EINVAL; 1392 if (api_version > VDUSE_API_VERSION) 1393 break; 1394 1395 ret = 0; 1396 control->api_version = api_version; 1397 break; 1398 } 1399 case VDUSE_CREATE_DEV: { 1400 struct vduse_dev_config config; 1401 unsigned long size = offsetof(struct vduse_dev_config, config); 1402 void *buf; 1403 1404 ret = -EFAULT; 1405 if (copy_from_user(&config, argp, size)) 1406 break; 1407 1408 ret = -EINVAL; 1409 if (vduse_validate_config(&config) == false) 1410 break; 1411 1412 buf = vmemdup_user(argp + size, config.config_size); 1413 if (IS_ERR(buf)) { 1414 ret = PTR_ERR(buf); 1415 break; 1416 } 1417 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1418 ret = vduse_create_dev(&config, buf, control->api_version); 1419 if (ret) 1420 kvfree(buf); 1421 break; 1422 } 1423 case VDUSE_DESTROY_DEV: { 1424 char name[VDUSE_NAME_MAX]; 1425 1426 ret = -EFAULT; 1427 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1428 break; 1429 1430 name[VDUSE_NAME_MAX - 1] = '\0'; 1431 ret = vduse_destroy_dev(name); 1432 break; 1433 } 1434 default: 1435 ret = -EINVAL; 1436 break; 1437 } 1438 mutex_unlock(&vduse_lock); 1439 1440 return ret; 1441 } 1442 1443 static int vduse_release(struct inode *inode, struct file *file) 1444 { 1445 struct vduse_control *control = file->private_data; 1446 1447 kfree(control); 1448 return 0; 1449 } 1450 1451 static int vduse_open(struct inode *inode, struct file *file) 1452 { 1453 struct vduse_control *control; 1454 1455 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1456 if (!control) 1457 return -ENOMEM; 1458 1459 control->api_version = VDUSE_API_VERSION; 1460 file->private_data = control; 1461 1462 return 0; 1463 } 1464 1465 static const struct file_operations vduse_ctrl_fops = { 1466 .owner = THIS_MODULE, 1467 .open = vduse_open, 1468 .release = vduse_release, 1469 .unlocked_ioctl = vduse_ioctl, 1470 .compat_ioctl = compat_ptr_ioctl, 1471 .llseek = noop_llseek, 1472 }; 1473 1474 static char *vduse_devnode(struct device *dev, umode_t *mode) 1475 { 1476 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1477 } 1478 1479 static void vduse_mgmtdev_release(struct device *dev) 1480 { 1481 } 1482 1483 static struct device vduse_mgmtdev = { 1484 .init_name = "vduse", 1485 .release = vduse_mgmtdev_release, 1486 }; 1487 1488 static struct vdpa_mgmt_dev mgmt_dev; 1489 1490 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 1491 { 1492 struct vduse_vdpa *vdev; 1493 int ret; 1494 1495 if (dev->vdev) 1496 return -EEXIST; 1497 1498 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 1499 &vduse_vdpa_config_ops, 1, 1, name, true); 1500 if (IS_ERR(vdev)) 1501 return PTR_ERR(vdev); 1502 1503 dev->vdev = vdev; 1504 vdev->dev = dev; 1505 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 1506 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 1507 if (ret) { 1508 put_device(&vdev->vdpa.dev); 1509 return ret; 1510 } 1511 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 1512 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 1513 vdev->vdpa.mdev = &mgmt_dev; 1514 1515 return 0; 1516 } 1517 1518 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 1519 const struct vdpa_dev_set_config *config) 1520 { 1521 struct vduse_dev *dev; 1522 int ret; 1523 1524 mutex_lock(&vduse_lock); 1525 dev = vduse_find_dev(name); 1526 if (!dev || !vduse_dev_is_ready(dev)) { 1527 mutex_unlock(&vduse_lock); 1528 return -EINVAL; 1529 } 1530 ret = vduse_dev_init_vdpa(dev, name); 1531 mutex_unlock(&vduse_lock); 1532 if (ret) 1533 return ret; 1534 1535 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 1536 if (ret) { 1537 put_device(&dev->vdev->vdpa.dev); 1538 return ret; 1539 } 1540 1541 return 0; 1542 } 1543 1544 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 1545 { 1546 _vdpa_unregister_device(dev); 1547 } 1548 1549 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 1550 .dev_add = vdpa_dev_add, 1551 .dev_del = vdpa_dev_del, 1552 }; 1553 1554 static struct virtio_device_id id_table[] = { 1555 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 1556 { 0 }, 1557 }; 1558 1559 static struct vdpa_mgmt_dev mgmt_dev = { 1560 .device = &vduse_mgmtdev, 1561 .id_table = id_table, 1562 .ops = &vdpa_dev_mgmtdev_ops, 1563 }; 1564 1565 static int vduse_mgmtdev_init(void) 1566 { 1567 int ret; 1568 1569 ret = device_register(&vduse_mgmtdev); 1570 if (ret) 1571 return ret; 1572 1573 ret = vdpa_mgmtdev_register(&mgmt_dev); 1574 if (ret) 1575 goto err; 1576 1577 return 0; 1578 err: 1579 device_unregister(&vduse_mgmtdev); 1580 return ret; 1581 } 1582 1583 static void vduse_mgmtdev_exit(void) 1584 { 1585 vdpa_mgmtdev_unregister(&mgmt_dev); 1586 device_unregister(&vduse_mgmtdev); 1587 } 1588 1589 static int vduse_init(void) 1590 { 1591 int ret; 1592 struct device *dev; 1593 1594 vduse_class = class_create(THIS_MODULE, "vduse"); 1595 if (IS_ERR(vduse_class)) 1596 return PTR_ERR(vduse_class); 1597 1598 vduse_class->devnode = vduse_devnode; 1599 vduse_class->dev_groups = vduse_dev_groups; 1600 1601 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 1602 if (ret) 1603 goto err_chardev_region; 1604 1605 /* /dev/vduse/control */ 1606 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 1607 vduse_ctrl_cdev.owner = THIS_MODULE; 1608 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 1609 if (ret) 1610 goto err_ctrl_cdev; 1611 1612 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); 1613 if (IS_ERR(dev)) { 1614 ret = PTR_ERR(dev); 1615 goto err_device; 1616 } 1617 1618 /* /dev/vduse/$DEVICE */ 1619 cdev_init(&vduse_cdev, &vduse_dev_fops); 1620 vduse_cdev.owner = THIS_MODULE; 1621 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 1622 VDUSE_DEV_MAX - 1); 1623 if (ret) 1624 goto err_cdev; 1625 1626 vduse_irq_wq = alloc_workqueue("vduse-irq", 1627 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 1628 if (!vduse_irq_wq) { 1629 ret = -ENOMEM; 1630 goto err_wq; 1631 } 1632 1633 ret = vduse_domain_init(); 1634 if (ret) 1635 goto err_domain; 1636 1637 ret = vduse_mgmtdev_init(); 1638 if (ret) 1639 goto err_mgmtdev; 1640 1641 return 0; 1642 err_mgmtdev: 1643 vduse_domain_exit(); 1644 err_domain: 1645 destroy_workqueue(vduse_irq_wq); 1646 err_wq: 1647 cdev_del(&vduse_cdev); 1648 err_cdev: 1649 device_destroy(vduse_class, vduse_major); 1650 err_device: 1651 cdev_del(&vduse_ctrl_cdev); 1652 err_ctrl_cdev: 1653 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1654 err_chardev_region: 1655 class_destroy(vduse_class); 1656 return ret; 1657 } 1658 module_init(vduse_init); 1659 1660 static void vduse_exit(void) 1661 { 1662 vduse_mgmtdev_exit(); 1663 vduse_domain_exit(); 1664 destroy_workqueue(vduse_irq_wq); 1665 cdev_del(&vduse_cdev); 1666 device_destroy(vduse_class, vduse_major); 1667 cdev_del(&vduse_ctrl_cdev); 1668 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 1669 class_destroy(vduse_class); 1670 } 1671 module_exit(vduse_exit); 1672 1673 MODULE_LICENSE(DRV_LICENSE); 1674 MODULE_AUTHOR(DRV_AUTHOR); 1675 MODULE_DESCRIPTION(DRV_DESC); 1676