1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include "linux/virtio_net.h" 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/cdev.h> 15 #include <linux/device.h> 16 #include <linux/eventfd.h> 17 #include <linux/slab.h> 18 #include <linux/wait.h> 19 #include <linux/dma-map-ops.h> 20 #include <linux/poll.h> 21 #include <linux/file.h> 22 #include <linux/uio.h> 23 #include <linux/vdpa.h> 24 #include <linux/nospec.h> 25 #include <linux/vmalloc.h> 26 #include <linux/sched/mm.h> 27 #include <uapi/linux/vduse.h> 28 #include <uapi/linux/vdpa.h> 29 #include <uapi/linux/virtio_config.h> 30 #include <uapi/linux/virtio_ids.h> 31 #include <uapi/linux/virtio_blk.h> 32 #include <uapi/linux/virtio_ring.h> 33 #include <linux/mod_devicetable.h> 34 35 #include "iova_domain.h" 36 37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 38 #define DRV_DESC "vDPA Device in Userspace" 39 #define DRV_LICENSE "GPL v2" 40 41 #define VDUSE_DEV_MAX (1U << MINORBITS) 42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) 43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) 44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 45 /* 128 MB reserved for virtqueue creation */ 46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024) 47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 48 49 #define IRQ_UNBOUND -1 50 51 struct vduse_virtqueue { 52 u16 index; 53 u16 num_max; 54 u32 num; 55 u64 desc_addr; 56 u64 driver_addr; 57 u64 device_addr; 58 struct vdpa_vq_state state; 59 bool ready; 60 bool kicked; 61 spinlock_t kick_lock; 62 spinlock_t irq_lock; 63 struct eventfd_ctx *kickfd; 64 struct vdpa_callback cb; 65 struct work_struct inject; 66 struct work_struct kick; 67 int irq_effective_cpu; 68 struct cpumask irq_affinity; 69 struct kobject kobj; 70 }; 71 72 struct vduse_dev; 73 74 struct vduse_vdpa { 75 struct vdpa_device vdpa; 76 struct vduse_dev *dev; 77 }; 78 79 struct vduse_umem { 80 unsigned long iova; 81 unsigned long npages; 82 struct page **pages; 83 struct mm_struct *mm; 84 }; 85 86 struct vduse_dev { 87 struct vduse_vdpa *vdev; 88 struct device *dev; 89 struct vduse_virtqueue **vqs; 90 struct vduse_iova_domain *domain; 91 char *name; 92 struct mutex lock; 93 spinlock_t msg_lock; 94 u64 msg_unique; 95 u32 msg_timeout; 96 wait_queue_head_t waitq; 97 struct list_head send_list; 98 struct list_head recv_list; 99 struct vdpa_callback config_cb; 100 struct work_struct inject; 101 spinlock_t irq_lock; 102 struct rw_semaphore rwsem; 103 int minor; 104 bool broken; 105 bool connected; 106 u64 api_version; 107 u64 device_features; 108 u64 driver_features; 109 u32 device_id; 110 u32 vendor_id; 111 u32 generation; 112 u32 config_size; 113 void *config; 114 u8 status; 115 u32 vq_num; 116 u32 vq_align; 117 struct vduse_umem *umem; 118 struct mutex mem_lock; 119 unsigned int bounce_size; 120 struct mutex domain_lock; 121 }; 122 123 struct vduse_dev_msg { 124 struct vduse_dev_request req; 125 struct vduse_dev_response resp; 126 struct list_head list; 127 wait_queue_head_t waitq; 128 bool completed; 129 }; 130 131 struct vduse_control { 132 u64 api_version; 133 }; 134 135 static DEFINE_MUTEX(vduse_lock); 136 static DEFINE_IDR(vduse_idr); 137 138 static dev_t vduse_major; 139 static struct cdev vduse_ctrl_cdev; 140 static struct cdev vduse_cdev; 141 static struct workqueue_struct *vduse_irq_wq; 142 static struct workqueue_struct *vduse_irq_bound_wq; 143 144 static u32 allowed_device_id[] = { 145 VIRTIO_ID_BLOCK, 146 VIRTIO_ID_NET, 147 VIRTIO_ID_FS, 148 }; 149 150 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 151 { 152 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 153 154 return vdev->dev; 155 } 156 157 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 158 { 159 struct vdpa_device *vdpa = dev_to_vdpa(dev); 160 161 return vdpa_to_vduse(vdpa); 162 } 163 164 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 165 uint32_t request_id) 166 { 167 struct vduse_dev_msg *msg; 168 169 list_for_each_entry(msg, head, list) { 170 if (msg->req.request_id == request_id) { 171 list_del(&msg->list); 172 return msg; 173 } 174 } 175 176 return NULL; 177 } 178 179 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 180 { 181 struct vduse_dev_msg *msg = NULL; 182 183 if (!list_empty(head)) { 184 msg = list_first_entry(head, struct vduse_dev_msg, list); 185 list_del(&msg->list); 186 } 187 188 return msg; 189 } 190 191 static void vduse_enqueue_msg(struct list_head *head, 192 struct vduse_dev_msg *msg) 193 { 194 list_add_tail(&msg->list, head); 195 } 196 197 static void vduse_dev_broken(struct vduse_dev *dev) 198 { 199 struct vduse_dev_msg *msg, *tmp; 200 201 if (unlikely(dev->broken)) 202 return; 203 204 list_splice_init(&dev->recv_list, &dev->send_list); 205 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 206 list_del(&msg->list); 207 msg->completed = 1; 208 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 209 wake_up(&msg->waitq); 210 } 211 dev->broken = true; 212 wake_up(&dev->waitq); 213 } 214 215 static int vduse_dev_msg_sync(struct vduse_dev *dev, 216 struct vduse_dev_msg *msg) 217 { 218 int ret; 219 220 if (unlikely(dev->broken)) 221 return -EIO; 222 223 init_waitqueue_head(&msg->waitq); 224 spin_lock(&dev->msg_lock); 225 if (unlikely(dev->broken)) { 226 spin_unlock(&dev->msg_lock); 227 return -EIO; 228 } 229 msg->req.request_id = dev->msg_unique++; 230 vduse_enqueue_msg(&dev->send_list, msg); 231 wake_up(&dev->waitq); 232 spin_unlock(&dev->msg_lock); 233 if (dev->msg_timeout) 234 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 235 (long)dev->msg_timeout * HZ); 236 else 237 ret = wait_event_killable(msg->waitq, msg->completed); 238 239 spin_lock(&dev->msg_lock); 240 if (!msg->completed) { 241 list_del(&msg->list); 242 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 243 /* Mark the device as malfunction when there is a timeout */ 244 if (!ret) 245 vduse_dev_broken(dev); 246 } 247 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 248 spin_unlock(&dev->msg_lock); 249 250 return ret; 251 } 252 253 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 254 struct vduse_virtqueue *vq, 255 struct vdpa_vq_state_packed *packed) 256 { 257 struct vduse_dev_msg msg = { 0 }; 258 int ret; 259 260 msg.req.type = VDUSE_GET_VQ_STATE; 261 msg.req.vq_state.index = vq->index; 262 263 ret = vduse_dev_msg_sync(dev, &msg); 264 if (ret) 265 return ret; 266 267 packed->last_avail_counter = 268 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 269 packed->last_avail_idx = 270 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 271 packed->last_used_counter = 272 msg.resp.vq_state.packed.last_used_counter & 0x0001; 273 packed->last_used_idx = 274 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 275 276 return 0; 277 } 278 279 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 280 struct vduse_virtqueue *vq, 281 struct vdpa_vq_state_split *split) 282 { 283 struct vduse_dev_msg msg = { 0 }; 284 int ret; 285 286 msg.req.type = VDUSE_GET_VQ_STATE; 287 msg.req.vq_state.index = vq->index; 288 289 ret = vduse_dev_msg_sync(dev, &msg); 290 if (ret) 291 return ret; 292 293 split->avail_index = msg.resp.vq_state.split.avail_index; 294 295 return 0; 296 } 297 298 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 299 { 300 struct vduse_dev_msg msg = { 0 }; 301 302 msg.req.type = VDUSE_SET_STATUS; 303 msg.req.s.status = status; 304 305 return vduse_dev_msg_sync(dev, &msg); 306 } 307 308 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 309 u64 start, u64 last) 310 { 311 struct vduse_dev_msg msg = { 0 }; 312 313 if (last < start) 314 return -EINVAL; 315 316 msg.req.type = VDUSE_UPDATE_IOTLB; 317 msg.req.iova.start = start; 318 msg.req.iova.last = last; 319 320 return vduse_dev_msg_sync(dev, &msg); 321 } 322 323 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 324 { 325 struct file *file = iocb->ki_filp; 326 struct vduse_dev *dev = file->private_data; 327 struct vduse_dev_msg *msg; 328 int size = sizeof(struct vduse_dev_request); 329 ssize_t ret; 330 331 if (iov_iter_count(to) < size) 332 return -EINVAL; 333 334 spin_lock(&dev->msg_lock); 335 while (1) { 336 msg = vduse_dequeue_msg(&dev->send_list); 337 if (msg) 338 break; 339 340 ret = -EAGAIN; 341 if (file->f_flags & O_NONBLOCK) 342 goto unlock; 343 344 spin_unlock(&dev->msg_lock); 345 ret = wait_event_interruptible_exclusive(dev->waitq, 346 !list_empty(&dev->send_list)); 347 if (ret) 348 return ret; 349 350 spin_lock(&dev->msg_lock); 351 } 352 spin_unlock(&dev->msg_lock); 353 ret = copy_to_iter(&msg->req, size, to); 354 spin_lock(&dev->msg_lock); 355 if (ret != size) { 356 ret = -EFAULT; 357 vduse_enqueue_msg(&dev->send_list, msg); 358 goto unlock; 359 } 360 vduse_enqueue_msg(&dev->recv_list, msg); 361 unlock: 362 spin_unlock(&dev->msg_lock); 363 364 return ret; 365 } 366 367 static bool is_mem_zero(const char *ptr, int size) 368 { 369 int i; 370 371 for (i = 0; i < size; i++) { 372 if (ptr[i]) 373 return false; 374 } 375 return true; 376 } 377 378 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 379 { 380 struct file *file = iocb->ki_filp; 381 struct vduse_dev *dev = file->private_data; 382 struct vduse_dev_response resp; 383 struct vduse_dev_msg *msg; 384 size_t ret; 385 386 ret = copy_from_iter(&resp, sizeof(resp), from); 387 if (ret != sizeof(resp)) 388 return -EINVAL; 389 390 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 391 return -EINVAL; 392 393 spin_lock(&dev->msg_lock); 394 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 395 if (!msg) { 396 ret = -ENOENT; 397 goto unlock; 398 } 399 400 memcpy(&msg->resp, &resp, sizeof(resp)); 401 msg->completed = 1; 402 wake_up(&msg->waitq); 403 unlock: 404 spin_unlock(&dev->msg_lock); 405 406 return ret; 407 } 408 409 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 410 { 411 struct vduse_dev *dev = file->private_data; 412 __poll_t mask = 0; 413 414 poll_wait(file, &dev->waitq, wait); 415 416 spin_lock(&dev->msg_lock); 417 418 if (unlikely(dev->broken)) 419 mask |= EPOLLERR; 420 if (!list_empty(&dev->send_list)) 421 mask |= EPOLLIN | EPOLLRDNORM; 422 if (!list_empty(&dev->recv_list)) 423 mask |= EPOLLOUT | EPOLLWRNORM; 424 425 spin_unlock(&dev->msg_lock); 426 427 return mask; 428 } 429 430 static void vduse_dev_reset(struct vduse_dev *dev) 431 { 432 int i; 433 struct vduse_iova_domain *domain = dev->domain; 434 435 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 436 if (domain && domain->bounce_map) 437 vduse_domain_reset_bounce_map(domain); 438 439 down_write(&dev->rwsem); 440 441 dev->status = 0; 442 dev->driver_features = 0; 443 dev->generation++; 444 spin_lock(&dev->irq_lock); 445 dev->config_cb.callback = NULL; 446 dev->config_cb.private = NULL; 447 spin_unlock(&dev->irq_lock); 448 flush_work(&dev->inject); 449 450 for (i = 0; i < dev->vq_num; i++) { 451 struct vduse_virtqueue *vq = dev->vqs[i]; 452 453 vq->ready = false; 454 vq->desc_addr = 0; 455 vq->driver_addr = 0; 456 vq->device_addr = 0; 457 vq->num = 0; 458 memset(&vq->state, 0, sizeof(vq->state)); 459 460 spin_lock(&vq->kick_lock); 461 vq->kicked = false; 462 if (vq->kickfd) 463 eventfd_ctx_put(vq->kickfd); 464 vq->kickfd = NULL; 465 spin_unlock(&vq->kick_lock); 466 467 spin_lock(&vq->irq_lock); 468 vq->cb.callback = NULL; 469 vq->cb.private = NULL; 470 vq->cb.trigger = NULL; 471 spin_unlock(&vq->irq_lock); 472 flush_work(&vq->inject); 473 flush_work(&vq->kick); 474 } 475 476 up_write(&dev->rwsem); 477 } 478 479 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 480 u64 desc_area, u64 driver_area, 481 u64 device_area) 482 { 483 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 484 struct vduse_virtqueue *vq = dev->vqs[idx]; 485 486 vq->desc_addr = desc_area; 487 vq->driver_addr = driver_area; 488 vq->device_addr = device_area; 489 490 return 0; 491 } 492 493 static void vduse_vq_kick(struct vduse_virtqueue *vq) 494 { 495 spin_lock(&vq->kick_lock); 496 if (!vq->ready) 497 goto unlock; 498 499 if (vq->kickfd) 500 eventfd_signal(vq->kickfd); 501 else 502 vq->kicked = true; 503 unlock: 504 spin_unlock(&vq->kick_lock); 505 } 506 507 static void vduse_vq_kick_work(struct work_struct *work) 508 { 509 struct vduse_virtqueue *vq = container_of(work, 510 struct vduse_virtqueue, kick); 511 512 vduse_vq_kick(vq); 513 } 514 515 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 516 { 517 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 518 struct vduse_virtqueue *vq = dev->vqs[idx]; 519 520 if (!eventfd_signal_allowed()) { 521 schedule_work(&vq->kick); 522 return; 523 } 524 vduse_vq_kick(vq); 525 } 526 527 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 528 struct vdpa_callback *cb) 529 { 530 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 531 struct vduse_virtqueue *vq = dev->vqs[idx]; 532 533 spin_lock(&vq->irq_lock); 534 vq->cb.callback = cb->callback; 535 vq->cb.private = cb->private; 536 vq->cb.trigger = cb->trigger; 537 spin_unlock(&vq->irq_lock); 538 } 539 540 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 541 { 542 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 543 struct vduse_virtqueue *vq = dev->vqs[idx]; 544 545 vq->num = num; 546 } 547 548 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx) 549 { 550 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 551 struct vduse_virtqueue *vq = dev->vqs[idx]; 552 553 if (vq->num) 554 return vq->num; 555 else 556 return vq->num_max; 557 } 558 559 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 560 u16 idx, bool ready) 561 { 562 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 563 struct vduse_virtqueue *vq = dev->vqs[idx]; 564 565 vq->ready = ready; 566 } 567 568 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 569 { 570 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 571 struct vduse_virtqueue *vq = dev->vqs[idx]; 572 573 return vq->ready; 574 } 575 576 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 577 const struct vdpa_vq_state *state) 578 { 579 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 580 struct vduse_virtqueue *vq = dev->vqs[idx]; 581 582 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 583 vq->state.packed.last_avail_counter = 584 state->packed.last_avail_counter; 585 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 586 vq->state.packed.last_used_counter = 587 state->packed.last_used_counter; 588 vq->state.packed.last_used_idx = state->packed.last_used_idx; 589 } else 590 vq->state.split.avail_index = state->split.avail_index; 591 592 return 0; 593 } 594 595 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 596 struct vdpa_vq_state *state) 597 { 598 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 599 struct vduse_virtqueue *vq = dev->vqs[idx]; 600 601 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 602 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 603 604 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 605 } 606 607 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 608 { 609 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 610 611 return dev->vq_align; 612 } 613 614 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 615 { 616 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 617 618 return dev->device_features; 619 } 620 621 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 622 { 623 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 624 625 dev->driver_features = features; 626 return 0; 627 } 628 629 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 630 { 631 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 632 633 return dev->driver_features; 634 } 635 636 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 637 struct vdpa_callback *cb) 638 { 639 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 640 641 spin_lock(&dev->irq_lock); 642 dev->config_cb.callback = cb->callback; 643 dev->config_cb.private = cb->private; 644 spin_unlock(&dev->irq_lock); 645 } 646 647 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 648 { 649 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 650 u16 num_max = 0; 651 int i; 652 653 for (i = 0; i < dev->vq_num; i++) 654 if (num_max < dev->vqs[i]->num_max) 655 num_max = dev->vqs[i]->num_max; 656 657 return num_max; 658 } 659 660 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 661 { 662 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 663 664 return dev->device_id; 665 } 666 667 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 668 { 669 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 670 671 return dev->vendor_id; 672 } 673 674 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 675 { 676 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 677 678 return dev->status; 679 } 680 681 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 682 { 683 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 684 685 if (vduse_dev_set_status(dev, status)) 686 return; 687 688 dev->status = status; 689 } 690 691 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 692 { 693 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 694 695 return dev->config_size; 696 } 697 698 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 699 void *buf, unsigned int len) 700 { 701 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 702 703 /* Initialize the buffer in case of partial copy. */ 704 memset(buf, 0, len); 705 706 if (offset > dev->config_size) 707 return; 708 709 if (len > dev->config_size - offset) 710 len = dev->config_size - offset; 711 712 memcpy(buf, dev->config + offset, len); 713 } 714 715 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 716 const void *buf, unsigned int len) 717 { 718 /* Now we only support read-only configuration space */ 719 } 720 721 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 722 { 723 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 724 int ret = vduse_dev_set_status(dev, 0); 725 726 vduse_dev_reset(dev); 727 728 return ret; 729 } 730 731 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 732 { 733 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 734 735 return dev->generation; 736 } 737 738 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, 739 const struct cpumask *cpu_mask) 740 { 741 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 742 743 if (cpu_mask) 744 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); 745 else 746 cpumask_setall(&dev->vqs[idx]->irq_affinity); 747 748 return 0; 749 } 750 751 static const struct cpumask * 752 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx) 753 { 754 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 755 756 return &dev->vqs[idx]->irq_affinity; 757 } 758 759 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 760 unsigned int asid, 761 struct vhost_iotlb *iotlb) 762 { 763 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 764 int ret; 765 766 ret = vduse_domain_set_map(dev->domain, iotlb); 767 if (ret) 768 return ret; 769 770 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 771 if (ret) { 772 vduse_domain_clear_map(dev->domain, iotlb); 773 return ret; 774 } 775 776 return 0; 777 } 778 779 static void vduse_vdpa_free(struct vdpa_device *vdpa) 780 { 781 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 782 783 dev->vdev = NULL; 784 } 785 786 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 787 .set_vq_address = vduse_vdpa_set_vq_address, 788 .kick_vq = vduse_vdpa_kick_vq, 789 .set_vq_cb = vduse_vdpa_set_vq_cb, 790 .set_vq_num = vduse_vdpa_set_vq_num, 791 .get_vq_size = vduse_vdpa_get_vq_size, 792 .set_vq_ready = vduse_vdpa_set_vq_ready, 793 .get_vq_ready = vduse_vdpa_get_vq_ready, 794 .set_vq_state = vduse_vdpa_set_vq_state, 795 .get_vq_state = vduse_vdpa_get_vq_state, 796 .get_vq_align = vduse_vdpa_get_vq_align, 797 .get_device_features = vduse_vdpa_get_device_features, 798 .set_driver_features = vduse_vdpa_set_driver_features, 799 .get_driver_features = vduse_vdpa_get_driver_features, 800 .set_config_cb = vduse_vdpa_set_config_cb, 801 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 802 .get_device_id = vduse_vdpa_get_device_id, 803 .get_vendor_id = vduse_vdpa_get_vendor_id, 804 .get_status = vduse_vdpa_get_status, 805 .set_status = vduse_vdpa_set_status, 806 .get_config_size = vduse_vdpa_get_config_size, 807 .get_config = vduse_vdpa_get_config, 808 .set_config = vduse_vdpa_set_config, 809 .get_generation = vduse_vdpa_get_generation, 810 .set_vq_affinity = vduse_vdpa_set_vq_affinity, 811 .get_vq_affinity = vduse_vdpa_get_vq_affinity, 812 .reset = vduse_vdpa_reset, 813 .set_map = vduse_vdpa_set_map, 814 .free = vduse_vdpa_free, 815 }; 816 817 static void vduse_dev_sync_single_for_device(union virtio_map token, 818 dma_addr_t dma_addr, size_t size, 819 enum dma_data_direction dir) 820 { 821 struct vduse_iova_domain *domain = token.iova_domain; 822 823 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); 824 } 825 826 static void vduse_dev_sync_single_for_cpu(union virtio_map token, 827 dma_addr_t dma_addr, size_t size, 828 enum dma_data_direction dir) 829 { 830 struct vduse_iova_domain *domain = token.iova_domain; 831 832 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); 833 } 834 835 static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page, 836 unsigned long offset, size_t size, 837 enum dma_data_direction dir, 838 unsigned long attrs) 839 { 840 struct vduse_iova_domain *domain = token.iova_domain; 841 842 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 843 } 844 845 static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr, 846 size_t size, enum dma_data_direction dir, 847 unsigned long attrs) 848 { 849 struct vduse_iova_domain *domain = token.iova_domain; 850 851 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 852 } 853 854 static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size, 855 dma_addr_t *dma_addr, gfp_t flag) 856 { 857 struct vduse_iova_domain *domain = token.iova_domain; 858 unsigned long iova; 859 void *addr; 860 861 *dma_addr = DMA_MAPPING_ERROR; 862 addr = vduse_domain_alloc_coherent(domain, size, 863 (dma_addr_t *)&iova, flag); 864 if (!addr) 865 return NULL; 866 867 *dma_addr = (dma_addr_t)iova; 868 869 return addr; 870 } 871 872 static void vduse_dev_free_coherent(union virtio_map token, size_t size, 873 void *vaddr, dma_addr_t dma_addr, 874 unsigned long attrs) 875 { 876 struct vduse_iova_domain *domain = token.iova_domain; 877 878 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 879 } 880 881 static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr) 882 { 883 struct vduse_iova_domain *domain = token.iova_domain; 884 885 return dma_addr < domain->bounce_size; 886 } 887 888 static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr) 889 { 890 if (unlikely(dma_addr == DMA_MAPPING_ERROR)) 891 return -ENOMEM; 892 return 0; 893 } 894 895 static size_t vduse_dev_max_mapping_size(union virtio_map token) 896 { 897 struct vduse_iova_domain *domain = token.iova_domain; 898 899 return domain->bounce_size; 900 } 901 902 static const struct virtio_map_ops vduse_map_ops = { 903 .sync_single_for_device = vduse_dev_sync_single_for_device, 904 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, 905 .map_page = vduse_dev_map_page, 906 .unmap_page = vduse_dev_unmap_page, 907 .alloc = vduse_dev_alloc_coherent, 908 .free = vduse_dev_free_coherent, 909 .need_sync = vduse_dev_need_sync, 910 .mapping_error = vduse_dev_mapping_error, 911 .max_mapping_size = vduse_dev_max_mapping_size, 912 }; 913 914 static unsigned int perm_to_file_flags(u8 perm) 915 { 916 unsigned int flags = 0; 917 918 switch (perm) { 919 case VDUSE_ACCESS_WO: 920 flags |= O_WRONLY; 921 break; 922 case VDUSE_ACCESS_RO: 923 flags |= O_RDONLY; 924 break; 925 case VDUSE_ACCESS_RW: 926 flags |= O_RDWR; 927 break; 928 default: 929 WARN(1, "invalidate vhost IOTLB permission\n"); 930 break; 931 } 932 933 return flags; 934 } 935 936 static int vduse_kickfd_setup(struct vduse_dev *dev, 937 struct vduse_vq_eventfd *eventfd) 938 { 939 struct eventfd_ctx *ctx = NULL; 940 struct vduse_virtqueue *vq; 941 u32 index; 942 943 if (eventfd->index >= dev->vq_num) 944 return -EINVAL; 945 946 index = array_index_nospec(eventfd->index, dev->vq_num); 947 vq = dev->vqs[index]; 948 if (eventfd->fd >= 0) { 949 ctx = eventfd_ctx_fdget(eventfd->fd); 950 if (IS_ERR(ctx)) 951 return PTR_ERR(ctx); 952 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 953 return 0; 954 955 spin_lock(&vq->kick_lock); 956 if (vq->kickfd) 957 eventfd_ctx_put(vq->kickfd); 958 vq->kickfd = ctx; 959 if (vq->ready && vq->kicked && vq->kickfd) { 960 eventfd_signal(vq->kickfd); 961 vq->kicked = false; 962 } 963 spin_unlock(&vq->kick_lock); 964 965 return 0; 966 } 967 968 static bool vduse_dev_is_ready(struct vduse_dev *dev) 969 { 970 int i; 971 972 for (i = 0; i < dev->vq_num; i++) 973 if (!dev->vqs[i]->num_max) 974 return false; 975 976 return true; 977 } 978 979 static void vduse_dev_irq_inject(struct work_struct *work) 980 { 981 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 982 983 spin_lock_bh(&dev->irq_lock); 984 if (dev->config_cb.callback) 985 dev->config_cb.callback(dev->config_cb.private); 986 spin_unlock_bh(&dev->irq_lock); 987 } 988 989 static void vduse_vq_irq_inject(struct work_struct *work) 990 { 991 struct vduse_virtqueue *vq = container_of(work, 992 struct vduse_virtqueue, inject); 993 994 spin_lock_bh(&vq->irq_lock); 995 if (vq->ready && vq->cb.callback) 996 vq->cb.callback(vq->cb.private); 997 spin_unlock_bh(&vq->irq_lock); 998 } 999 1000 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) 1001 { 1002 bool signal = false; 1003 1004 if (!vq->cb.trigger) 1005 return false; 1006 1007 spin_lock_irq(&vq->irq_lock); 1008 if (vq->ready && vq->cb.trigger) { 1009 eventfd_signal(vq->cb.trigger); 1010 signal = true; 1011 } 1012 spin_unlock_irq(&vq->irq_lock); 1013 1014 return signal; 1015 } 1016 1017 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 1018 struct work_struct *irq_work, 1019 int irq_effective_cpu) 1020 { 1021 int ret = -EINVAL; 1022 1023 down_read(&dev->rwsem); 1024 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1025 goto unlock; 1026 1027 ret = 0; 1028 if (irq_effective_cpu == IRQ_UNBOUND) 1029 queue_work(vduse_irq_wq, irq_work); 1030 else 1031 queue_work_on(irq_effective_cpu, 1032 vduse_irq_bound_wq, irq_work); 1033 unlock: 1034 up_read(&dev->rwsem); 1035 1036 return ret; 1037 } 1038 1039 static int vduse_dev_dereg_umem(struct vduse_dev *dev, 1040 u64 iova, u64 size) 1041 { 1042 int ret; 1043 1044 mutex_lock(&dev->mem_lock); 1045 ret = -ENOENT; 1046 if (!dev->umem) 1047 goto unlock; 1048 1049 ret = -EINVAL; 1050 if (!dev->domain) 1051 goto unlock; 1052 1053 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 1054 goto unlock; 1055 1056 vduse_domain_remove_user_bounce_pages(dev->domain); 1057 unpin_user_pages_dirty_lock(dev->umem->pages, 1058 dev->umem->npages, true); 1059 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 1060 mmdrop(dev->umem->mm); 1061 vfree(dev->umem->pages); 1062 kfree(dev->umem); 1063 dev->umem = NULL; 1064 ret = 0; 1065 unlock: 1066 mutex_unlock(&dev->mem_lock); 1067 return ret; 1068 } 1069 1070 static int vduse_dev_reg_umem(struct vduse_dev *dev, 1071 u64 iova, u64 uaddr, u64 size) 1072 { 1073 struct page **page_list = NULL; 1074 struct vduse_umem *umem = NULL; 1075 long pinned = 0; 1076 unsigned long npages, lock_limit; 1077 int ret; 1078 1079 if (!dev->domain || !dev->domain->bounce_map || 1080 size != dev->domain->bounce_size || 1081 iova != 0 || uaddr & ~PAGE_MASK) 1082 return -EINVAL; 1083 1084 mutex_lock(&dev->mem_lock); 1085 ret = -EEXIST; 1086 if (dev->umem) 1087 goto unlock; 1088 1089 ret = -ENOMEM; 1090 npages = size >> PAGE_SHIFT; 1091 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 1092 GFP_KERNEL_ACCOUNT); 1093 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 1094 if (!page_list || !umem) 1095 goto unlock; 1096 1097 mmap_read_lock(current->mm); 1098 1099 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 1100 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 1101 goto out; 1102 1103 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 1104 page_list); 1105 if (pinned != npages) { 1106 ret = pinned < 0 ? pinned : -ENOMEM; 1107 goto out; 1108 } 1109 1110 ret = vduse_domain_add_user_bounce_pages(dev->domain, 1111 page_list, pinned); 1112 if (ret) 1113 goto out; 1114 1115 atomic64_add(npages, ¤t->mm->pinned_vm); 1116 1117 umem->pages = page_list; 1118 umem->npages = pinned; 1119 umem->iova = iova; 1120 umem->mm = current->mm; 1121 mmgrab(current->mm); 1122 1123 dev->umem = umem; 1124 out: 1125 if (ret && pinned > 0) 1126 unpin_user_pages(page_list, pinned); 1127 1128 mmap_read_unlock(current->mm); 1129 unlock: 1130 if (ret) { 1131 vfree(page_list); 1132 kfree(umem); 1133 } 1134 mutex_unlock(&dev->mem_lock); 1135 return ret; 1136 } 1137 1138 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) 1139 { 1140 int curr_cpu = vq->irq_effective_cpu; 1141 1142 while (true) { 1143 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity); 1144 if (cpu_online(curr_cpu)) 1145 break; 1146 1147 if (curr_cpu >= nr_cpu_ids) 1148 curr_cpu = IRQ_UNBOUND; 1149 } 1150 1151 vq->irq_effective_cpu = curr_cpu; 1152 } 1153 1154 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1155 unsigned long arg) 1156 { 1157 struct vduse_dev *dev = file->private_data; 1158 void __user *argp = (void __user *)arg; 1159 int ret; 1160 1161 if (unlikely(dev->broken)) 1162 return -EPERM; 1163 1164 switch (cmd) { 1165 case VDUSE_IOTLB_GET_FD: { 1166 struct vduse_iotlb_entry entry; 1167 struct vhost_iotlb_map *map; 1168 struct vdpa_map_file *map_file; 1169 struct file *f = NULL; 1170 1171 ret = -EFAULT; 1172 if (copy_from_user(&entry, argp, sizeof(entry))) 1173 break; 1174 1175 ret = -EINVAL; 1176 if (entry.start > entry.last) 1177 break; 1178 1179 mutex_lock(&dev->domain_lock); 1180 if (!dev->domain) { 1181 mutex_unlock(&dev->domain_lock); 1182 break; 1183 } 1184 spin_lock(&dev->domain->iotlb_lock); 1185 map = vhost_iotlb_itree_first(dev->domain->iotlb, 1186 entry.start, entry.last); 1187 if (map) { 1188 map_file = (struct vdpa_map_file *)map->opaque; 1189 f = get_file(map_file->file); 1190 entry.offset = map_file->offset; 1191 entry.start = map->start; 1192 entry.last = map->last; 1193 entry.perm = map->perm; 1194 } 1195 spin_unlock(&dev->domain->iotlb_lock); 1196 mutex_unlock(&dev->domain_lock); 1197 ret = -EINVAL; 1198 if (!f) 1199 break; 1200 1201 ret = -EFAULT; 1202 if (copy_to_user(argp, &entry, sizeof(entry))) { 1203 fput(f); 1204 break; 1205 } 1206 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); 1207 fput(f); 1208 break; 1209 } 1210 case VDUSE_DEV_GET_FEATURES: 1211 /* 1212 * Just mirror what driver wrote here. 1213 * The driver is expected to check FEATURE_OK later. 1214 */ 1215 ret = put_user(dev->driver_features, (u64 __user *)argp); 1216 break; 1217 case VDUSE_DEV_SET_CONFIG: { 1218 struct vduse_config_data config; 1219 unsigned long size = offsetof(struct vduse_config_data, 1220 buffer); 1221 1222 ret = -EFAULT; 1223 if (copy_from_user(&config, argp, size)) 1224 break; 1225 1226 ret = -EINVAL; 1227 if (config.offset > dev->config_size || 1228 config.length == 0 || 1229 config.length > dev->config_size - config.offset) 1230 break; 1231 1232 ret = -EFAULT; 1233 if (copy_from_user(dev->config + config.offset, argp + size, 1234 config.length)) 1235 break; 1236 1237 ret = 0; 1238 break; 1239 } 1240 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1241 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); 1242 break; 1243 case VDUSE_VQ_SETUP: { 1244 struct vduse_vq_config config; 1245 u32 index; 1246 1247 ret = -EFAULT; 1248 if (copy_from_user(&config, argp, sizeof(config))) 1249 break; 1250 1251 ret = -EINVAL; 1252 if (config.index >= dev->vq_num) 1253 break; 1254 1255 if (!is_mem_zero((const char *)config.reserved, 1256 sizeof(config.reserved))) 1257 break; 1258 1259 index = array_index_nospec(config.index, dev->vq_num); 1260 dev->vqs[index]->num_max = config.max_size; 1261 ret = 0; 1262 break; 1263 } 1264 case VDUSE_VQ_GET_INFO: { 1265 struct vduse_vq_info vq_info; 1266 struct vduse_virtqueue *vq; 1267 u32 index; 1268 1269 ret = -EFAULT; 1270 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1271 break; 1272 1273 ret = -EINVAL; 1274 if (vq_info.index >= dev->vq_num) 1275 break; 1276 1277 index = array_index_nospec(vq_info.index, dev->vq_num); 1278 vq = dev->vqs[index]; 1279 vq_info.desc_addr = vq->desc_addr; 1280 vq_info.driver_addr = vq->driver_addr; 1281 vq_info.device_addr = vq->device_addr; 1282 vq_info.num = vq->num; 1283 1284 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1285 vq_info.packed.last_avail_counter = 1286 vq->state.packed.last_avail_counter; 1287 vq_info.packed.last_avail_idx = 1288 vq->state.packed.last_avail_idx; 1289 vq_info.packed.last_used_counter = 1290 vq->state.packed.last_used_counter; 1291 vq_info.packed.last_used_idx = 1292 vq->state.packed.last_used_idx; 1293 } else 1294 vq_info.split.avail_index = 1295 vq->state.split.avail_index; 1296 1297 vq_info.ready = vq->ready; 1298 1299 ret = -EFAULT; 1300 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1301 break; 1302 1303 ret = 0; 1304 break; 1305 } 1306 case VDUSE_VQ_SETUP_KICKFD: { 1307 struct vduse_vq_eventfd eventfd; 1308 1309 ret = -EFAULT; 1310 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1311 break; 1312 1313 ret = vduse_kickfd_setup(dev, &eventfd); 1314 break; 1315 } 1316 case VDUSE_VQ_INJECT_IRQ: { 1317 u32 index; 1318 1319 ret = -EFAULT; 1320 if (get_user(index, (u32 __user *)argp)) 1321 break; 1322 1323 ret = -EINVAL; 1324 if (index >= dev->vq_num) 1325 break; 1326 1327 ret = 0; 1328 index = array_index_nospec(index, dev->vq_num); 1329 if (!vduse_vq_signal_irqfd(dev->vqs[index])) { 1330 vduse_vq_update_effective_cpu(dev->vqs[index]); 1331 ret = vduse_dev_queue_irq_work(dev, 1332 &dev->vqs[index]->inject, 1333 dev->vqs[index]->irq_effective_cpu); 1334 } 1335 break; 1336 } 1337 case VDUSE_IOTLB_REG_UMEM: { 1338 struct vduse_iova_umem umem; 1339 1340 ret = -EFAULT; 1341 if (copy_from_user(&umem, argp, sizeof(umem))) 1342 break; 1343 1344 ret = -EINVAL; 1345 if (!is_mem_zero((const char *)umem.reserved, 1346 sizeof(umem.reserved))) 1347 break; 1348 1349 mutex_lock(&dev->domain_lock); 1350 ret = vduse_dev_reg_umem(dev, umem.iova, 1351 umem.uaddr, umem.size); 1352 mutex_unlock(&dev->domain_lock); 1353 break; 1354 } 1355 case VDUSE_IOTLB_DEREG_UMEM: { 1356 struct vduse_iova_umem umem; 1357 1358 ret = -EFAULT; 1359 if (copy_from_user(&umem, argp, sizeof(umem))) 1360 break; 1361 1362 ret = -EINVAL; 1363 if (!is_mem_zero((const char *)umem.reserved, 1364 sizeof(umem.reserved))) 1365 break; 1366 mutex_lock(&dev->domain_lock); 1367 ret = vduse_dev_dereg_umem(dev, umem.iova, 1368 umem.size); 1369 mutex_unlock(&dev->domain_lock); 1370 break; 1371 } 1372 case VDUSE_IOTLB_GET_INFO: { 1373 struct vduse_iova_info info; 1374 struct vhost_iotlb_map *map; 1375 1376 ret = -EFAULT; 1377 if (copy_from_user(&info, argp, sizeof(info))) 1378 break; 1379 1380 ret = -EINVAL; 1381 if (info.start > info.last) 1382 break; 1383 1384 if (!is_mem_zero((const char *)info.reserved, 1385 sizeof(info.reserved))) 1386 break; 1387 1388 mutex_lock(&dev->domain_lock); 1389 if (!dev->domain) { 1390 mutex_unlock(&dev->domain_lock); 1391 break; 1392 } 1393 spin_lock(&dev->domain->iotlb_lock); 1394 map = vhost_iotlb_itree_first(dev->domain->iotlb, 1395 info.start, info.last); 1396 if (map) { 1397 info.start = map->start; 1398 info.last = map->last; 1399 info.capability = 0; 1400 if (dev->domain->bounce_map && map->start == 0 && 1401 map->last == dev->domain->bounce_size - 1) 1402 info.capability |= VDUSE_IOVA_CAP_UMEM; 1403 } 1404 spin_unlock(&dev->domain->iotlb_lock); 1405 mutex_unlock(&dev->domain_lock); 1406 if (!map) 1407 break; 1408 1409 ret = -EFAULT; 1410 if (copy_to_user(argp, &info, sizeof(info))) 1411 break; 1412 1413 ret = 0; 1414 break; 1415 } 1416 default: 1417 ret = -ENOIOCTLCMD; 1418 break; 1419 } 1420 1421 return ret; 1422 } 1423 1424 static int vduse_dev_release(struct inode *inode, struct file *file) 1425 { 1426 struct vduse_dev *dev = file->private_data; 1427 1428 mutex_lock(&dev->domain_lock); 1429 if (dev->domain) 1430 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1431 mutex_unlock(&dev->domain_lock); 1432 spin_lock(&dev->msg_lock); 1433 /* Make sure the inflight messages can processed after reconncection */ 1434 list_splice_init(&dev->recv_list, &dev->send_list); 1435 spin_unlock(&dev->msg_lock); 1436 dev->connected = false; 1437 1438 return 0; 1439 } 1440 1441 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1442 { 1443 struct vduse_dev *dev; 1444 1445 mutex_lock(&vduse_lock); 1446 dev = idr_find(&vduse_idr, minor); 1447 mutex_unlock(&vduse_lock); 1448 1449 return dev; 1450 } 1451 1452 static int vduse_dev_open(struct inode *inode, struct file *file) 1453 { 1454 int ret; 1455 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1456 1457 if (!dev) 1458 return -ENODEV; 1459 1460 ret = -EBUSY; 1461 mutex_lock(&dev->lock); 1462 if (dev->connected) 1463 goto unlock; 1464 1465 ret = 0; 1466 dev->connected = true; 1467 file->private_data = dev; 1468 unlock: 1469 mutex_unlock(&dev->lock); 1470 1471 return ret; 1472 } 1473 1474 static const struct file_operations vduse_dev_fops = { 1475 .owner = THIS_MODULE, 1476 .open = vduse_dev_open, 1477 .release = vduse_dev_release, 1478 .read_iter = vduse_dev_read_iter, 1479 .write_iter = vduse_dev_write_iter, 1480 .poll = vduse_dev_poll, 1481 .unlocked_ioctl = vduse_dev_ioctl, 1482 .compat_ioctl = compat_ptr_ioctl, 1483 .llseek = noop_llseek, 1484 }; 1485 1486 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf) 1487 { 1488 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity)); 1489 } 1490 1491 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq, 1492 const char *buf, size_t count) 1493 { 1494 cpumask_var_t new_value; 1495 int ret; 1496 1497 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) 1498 return -ENOMEM; 1499 1500 ret = cpumask_parse(buf, new_value); 1501 if (ret) 1502 goto free_mask; 1503 1504 ret = -EINVAL; 1505 if (!cpumask_intersects(new_value, cpu_online_mask)) 1506 goto free_mask; 1507 1508 cpumask_copy(&vq->irq_affinity, new_value); 1509 ret = count; 1510 free_mask: 1511 free_cpumask_var(new_value); 1512 return ret; 1513 } 1514 1515 struct vq_sysfs_entry { 1516 struct attribute attr; 1517 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf); 1518 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf, 1519 size_t count); 1520 }; 1521 1522 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity); 1523 1524 static struct attribute *vq_attrs[] = { 1525 &irq_cb_affinity_attr.attr, 1526 NULL, 1527 }; 1528 ATTRIBUTE_GROUPS(vq); 1529 1530 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr, 1531 char *buf) 1532 { 1533 struct vduse_virtqueue *vq = container_of(kobj, 1534 struct vduse_virtqueue, kobj); 1535 struct vq_sysfs_entry *entry = container_of(attr, 1536 struct vq_sysfs_entry, attr); 1537 1538 if (!entry->show) 1539 return -EIO; 1540 1541 return entry->show(vq, buf); 1542 } 1543 1544 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr, 1545 const char *buf, size_t count) 1546 { 1547 struct vduse_virtqueue *vq = container_of(kobj, 1548 struct vduse_virtqueue, kobj); 1549 struct vq_sysfs_entry *entry = container_of(attr, 1550 struct vq_sysfs_entry, attr); 1551 1552 if (!entry->store) 1553 return -EIO; 1554 1555 return entry->store(vq, buf, count); 1556 } 1557 1558 static const struct sysfs_ops vq_sysfs_ops = { 1559 .show = vq_attr_show, 1560 .store = vq_attr_store, 1561 }; 1562 1563 static void vq_release(struct kobject *kobj) 1564 { 1565 struct vduse_virtqueue *vq = container_of(kobj, 1566 struct vduse_virtqueue, kobj); 1567 kfree(vq); 1568 } 1569 1570 static const struct kobj_type vq_type = { 1571 .release = vq_release, 1572 .sysfs_ops = &vq_sysfs_ops, 1573 .default_groups = vq_groups, 1574 }; 1575 1576 static char *vduse_devnode(const struct device *dev, umode_t *mode) 1577 { 1578 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1579 } 1580 1581 static const struct class vduse_class = { 1582 .name = "vduse", 1583 .devnode = vduse_devnode, 1584 }; 1585 1586 static void vduse_dev_deinit_vqs(struct vduse_dev *dev) 1587 { 1588 int i; 1589 1590 if (!dev->vqs) 1591 return; 1592 1593 for (i = 0; i < dev->vq_num; i++) 1594 kobject_put(&dev->vqs[i]->kobj); 1595 kfree(dev->vqs); 1596 } 1597 1598 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) 1599 { 1600 int ret, i; 1601 1602 dev->vq_align = vq_align; 1603 dev->vq_num = vq_num; 1604 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1605 if (!dev->vqs) 1606 return -ENOMEM; 1607 1608 for (i = 0; i < vq_num; i++) { 1609 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); 1610 if (!dev->vqs[i]) { 1611 ret = -ENOMEM; 1612 goto err; 1613 } 1614 1615 dev->vqs[i]->index = i; 1616 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND; 1617 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); 1618 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); 1619 spin_lock_init(&dev->vqs[i]->kick_lock); 1620 spin_lock_init(&dev->vqs[i]->irq_lock); 1621 cpumask_setall(&dev->vqs[i]->irq_affinity); 1622 1623 kobject_init(&dev->vqs[i]->kobj, &vq_type); 1624 ret = kobject_add(&dev->vqs[i]->kobj, 1625 &dev->dev->kobj, "vq%d", i); 1626 if (ret) { 1627 kfree(dev->vqs[i]); 1628 goto err; 1629 } 1630 } 1631 1632 return 0; 1633 err: 1634 while (i--) 1635 kobject_put(&dev->vqs[i]->kobj); 1636 kfree(dev->vqs); 1637 dev->vqs = NULL; 1638 return ret; 1639 } 1640 1641 static struct vduse_dev *vduse_dev_create(void) 1642 { 1643 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1644 1645 if (!dev) 1646 return NULL; 1647 1648 mutex_init(&dev->lock); 1649 mutex_init(&dev->mem_lock); 1650 mutex_init(&dev->domain_lock); 1651 spin_lock_init(&dev->msg_lock); 1652 INIT_LIST_HEAD(&dev->send_list); 1653 INIT_LIST_HEAD(&dev->recv_list); 1654 spin_lock_init(&dev->irq_lock); 1655 init_rwsem(&dev->rwsem); 1656 1657 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1658 init_waitqueue_head(&dev->waitq); 1659 1660 return dev; 1661 } 1662 1663 static void vduse_dev_destroy(struct vduse_dev *dev) 1664 { 1665 kfree(dev); 1666 } 1667 1668 static struct vduse_dev *vduse_find_dev(const char *name) 1669 { 1670 struct vduse_dev *dev; 1671 int id; 1672 1673 idr_for_each_entry(&vduse_idr, dev, id) 1674 if (!strcmp(dev->name, name)) 1675 return dev; 1676 1677 return NULL; 1678 } 1679 1680 static int vduse_destroy_dev(char *name) 1681 { 1682 struct vduse_dev *dev = vduse_find_dev(name); 1683 1684 if (!dev) 1685 return -EINVAL; 1686 1687 mutex_lock(&dev->lock); 1688 if (dev->vdev || dev->connected) { 1689 mutex_unlock(&dev->lock); 1690 return -EBUSY; 1691 } 1692 dev->connected = true; 1693 mutex_unlock(&dev->lock); 1694 1695 vduse_dev_reset(dev); 1696 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1697 idr_remove(&vduse_idr, dev->minor); 1698 kvfree(dev->config); 1699 vduse_dev_deinit_vqs(dev); 1700 if (dev->domain) 1701 vduse_domain_destroy(dev->domain); 1702 kfree(dev->name); 1703 vduse_dev_destroy(dev); 1704 module_put(THIS_MODULE); 1705 1706 return 0; 1707 } 1708 1709 static bool device_is_allowed(u32 device_id) 1710 { 1711 int i; 1712 1713 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1714 if (allowed_device_id[i] == device_id) 1715 return true; 1716 1717 return false; 1718 } 1719 1720 static bool features_is_valid(struct vduse_dev_config *config) 1721 { 1722 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 1723 return false; 1724 1725 /* Now we only support read-only configuration space */ 1726 if ((config->device_id == VIRTIO_ID_BLOCK) && 1727 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) 1728 return false; 1729 else if ((config->device_id == VIRTIO_ID_NET) && 1730 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1731 return false; 1732 1733 if ((config->device_id == VIRTIO_ID_NET) && 1734 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1))) 1735 return false; 1736 1737 return true; 1738 } 1739 1740 static bool vduse_validate_config(struct vduse_dev_config *config) 1741 { 1742 if (!is_mem_zero((const char *)config->reserved, 1743 sizeof(config->reserved))) 1744 return false; 1745 1746 if (config->vq_align > PAGE_SIZE) 1747 return false; 1748 1749 if (config->config_size > PAGE_SIZE) 1750 return false; 1751 1752 if (config->vq_num > 0xffff) 1753 return false; 1754 1755 if (!config->name[0]) 1756 return false; 1757 1758 if (!device_is_allowed(config->device_id)) 1759 return false; 1760 1761 if (!features_is_valid(config)) 1762 return false; 1763 1764 return true; 1765 } 1766 1767 static ssize_t msg_timeout_show(struct device *device, 1768 struct device_attribute *attr, char *buf) 1769 { 1770 struct vduse_dev *dev = dev_get_drvdata(device); 1771 1772 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1773 } 1774 1775 static ssize_t msg_timeout_store(struct device *device, 1776 struct device_attribute *attr, 1777 const char *buf, size_t count) 1778 { 1779 struct vduse_dev *dev = dev_get_drvdata(device); 1780 int ret; 1781 1782 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1783 if (ret < 0) 1784 return ret; 1785 1786 return count; 1787 } 1788 1789 static DEVICE_ATTR_RW(msg_timeout); 1790 1791 static ssize_t bounce_size_show(struct device *device, 1792 struct device_attribute *attr, char *buf) 1793 { 1794 struct vduse_dev *dev = dev_get_drvdata(device); 1795 1796 return sysfs_emit(buf, "%u\n", dev->bounce_size); 1797 } 1798 1799 static ssize_t bounce_size_store(struct device *device, 1800 struct device_attribute *attr, 1801 const char *buf, size_t count) 1802 { 1803 struct vduse_dev *dev = dev_get_drvdata(device); 1804 unsigned int bounce_size; 1805 int ret; 1806 1807 ret = -EPERM; 1808 mutex_lock(&dev->domain_lock); 1809 if (dev->domain) 1810 goto unlock; 1811 1812 ret = kstrtouint(buf, 10, &bounce_size); 1813 if (ret < 0) 1814 goto unlock; 1815 1816 ret = -EINVAL; 1817 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE || 1818 bounce_size < VDUSE_MIN_BOUNCE_SIZE) 1819 goto unlock; 1820 1821 dev->bounce_size = bounce_size & PAGE_MASK; 1822 ret = count; 1823 unlock: 1824 mutex_unlock(&dev->domain_lock); 1825 return ret; 1826 } 1827 1828 static DEVICE_ATTR_RW(bounce_size); 1829 1830 static struct attribute *vduse_dev_attrs[] = { 1831 &dev_attr_msg_timeout.attr, 1832 &dev_attr_bounce_size.attr, 1833 NULL 1834 }; 1835 1836 ATTRIBUTE_GROUPS(vduse_dev); 1837 1838 static int vduse_create_dev(struct vduse_dev_config *config, 1839 void *config_buf, u64 api_version) 1840 { 1841 int ret; 1842 struct vduse_dev *dev; 1843 1844 ret = -EPERM; 1845 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN)) 1846 goto err; 1847 1848 ret = -EEXIST; 1849 if (vduse_find_dev(config->name)) 1850 goto err; 1851 1852 ret = -ENOMEM; 1853 dev = vduse_dev_create(); 1854 if (!dev) 1855 goto err; 1856 1857 dev->api_version = api_version; 1858 dev->device_features = config->features; 1859 dev->device_id = config->device_id; 1860 dev->vendor_id = config->vendor_id; 1861 dev->name = kstrdup(config->name, GFP_KERNEL); 1862 if (!dev->name) 1863 goto err_str; 1864 1865 dev->bounce_size = VDUSE_BOUNCE_SIZE; 1866 dev->config = config_buf; 1867 dev->config_size = config->config_size; 1868 1869 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1870 if (ret < 0) 1871 goto err_idr; 1872 1873 dev->minor = ret; 1874 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1875 dev->dev = device_create_with_groups(&vduse_class, NULL, 1876 MKDEV(MAJOR(vduse_major), dev->minor), 1877 dev, vduse_dev_groups, "%s", config->name); 1878 if (IS_ERR(dev->dev)) { 1879 ret = PTR_ERR(dev->dev); 1880 goto err_dev; 1881 } 1882 1883 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); 1884 if (ret) 1885 goto err_vqs; 1886 1887 __module_get(THIS_MODULE); 1888 1889 return 0; 1890 err_vqs: 1891 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1892 err_dev: 1893 idr_remove(&vduse_idr, dev->minor); 1894 err_idr: 1895 kfree(dev->name); 1896 err_str: 1897 vduse_dev_destroy(dev); 1898 err: 1899 return ret; 1900 } 1901 1902 static long vduse_ioctl(struct file *file, unsigned int cmd, 1903 unsigned long arg) 1904 { 1905 int ret; 1906 void __user *argp = (void __user *)arg; 1907 struct vduse_control *control = file->private_data; 1908 1909 mutex_lock(&vduse_lock); 1910 switch (cmd) { 1911 case VDUSE_GET_API_VERSION: 1912 ret = put_user(control->api_version, (u64 __user *)argp); 1913 break; 1914 case VDUSE_SET_API_VERSION: { 1915 u64 api_version; 1916 1917 ret = -EFAULT; 1918 if (get_user(api_version, (u64 __user *)argp)) 1919 break; 1920 1921 ret = -EINVAL; 1922 if (api_version > VDUSE_API_VERSION) 1923 break; 1924 1925 ret = 0; 1926 control->api_version = api_version; 1927 break; 1928 } 1929 case VDUSE_CREATE_DEV: { 1930 struct vduse_dev_config config; 1931 unsigned long size = offsetof(struct vduse_dev_config, config); 1932 void *buf; 1933 1934 ret = -EFAULT; 1935 if (copy_from_user(&config, argp, size)) 1936 break; 1937 1938 ret = -EINVAL; 1939 if (vduse_validate_config(&config) == false) 1940 break; 1941 1942 buf = vmemdup_user(argp + size, config.config_size); 1943 if (IS_ERR(buf)) { 1944 ret = PTR_ERR(buf); 1945 break; 1946 } 1947 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1948 ret = vduse_create_dev(&config, buf, control->api_version); 1949 if (ret) 1950 kvfree(buf); 1951 break; 1952 } 1953 case VDUSE_DESTROY_DEV: { 1954 char name[VDUSE_NAME_MAX]; 1955 1956 ret = -EFAULT; 1957 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1958 break; 1959 1960 name[VDUSE_NAME_MAX - 1] = '\0'; 1961 ret = vduse_destroy_dev(name); 1962 break; 1963 } 1964 default: 1965 ret = -EINVAL; 1966 break; 1967 } 1968 mutex_unlock(&vduse_lock); 1969 1970 return ret; 1971 } 1972 1973 static int vduse_release(struct inode *inode, struct file *file) 1974 { 1975 struct vduse_control *control = file->private_data; 1976 1977 kfree(control); 1978 return 0; 1979 } 1980 1981 static int vduse_open(struct inode *inode, struct file *file) 1982 { 1983 struct vduse_control *control; 1984 1985 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1986 if (!control) 1987 return -ENOMEM; 1988 1989 control->api_version = VDUSE_API_VERSION; 1990 file->private_data = control; 1991 1992 return 0; 1993 } 1994 1995 static const struct file_operations vduse_ctrl_fops = { 1996 .owner = THIS_MODULE, 1997 .open = vduse_open, 1998 .release = vduse_release, 1999 .unlocked_ioctl = vduse_ioctl, 2000 .compat_ioctl = compat_ptr_ioctl, 2001 .llseek = noop_llseek, 2002 }; 2003 2004 struct vduse_mgmt_dev { 2005 struct vdpa_mgmt_dev mgmt_dev; 2006 struct device dev; 2007 }; 2008 2009 static struct vduse_mgmt_dev *vduse_mgmt; 2010 2011 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 2012 { 2013 struct vduse_vdpa *vdev; 2014 2015 if (dev->vdev) 2016 return -EEXIST; 2017 2018 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 2019 &vduse_vdpa_config_ops, &vduse_map_ops, 2020 1, 1, name, true); 2021 if (IS_ERR(vdev)) 2022 return PTR_ERR(vdev); 2023 2024 dev->vdev = vdev; 2025 vdev->dev = dev; 2026 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 2027 2028 return 0; 2029 } 2030 2031 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 2032 const struct vdpa_dev_set_config *config) 2033 { 2034 struct vduse_dev *dev; 2035 int ret; 2036 2037 mutex_lock(&vduse_lock); 2038 dev = vduse_find_dev(name); 2039 if (!dev || !vduse_dev_is_ready(dev)) { 2040 mutex_unlock(&vduse_lock); 2041 return -EINVAL; 2042 } 2043 ret = vduse_dev_init_vdpa(dev, name); 2044 mutex_unlock(&vduse_lock); 2045 if (ret) 2046 return ret; 2047 2048 mutex_lock(&dev->domain_lock); 2049 if (!dev->domain) 2050 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 2051 dev->bounce_size); 2052 mutex_unlock(&dev->domain_lock); 2053 if (!dev->domain) { 2054 put_device(&dev->vdev->vdpa.dev); 2055 return -ENOMEM; 2056 } 2057 2058 dev->vdev->vdpa.vmap.iova_domain = dev->domain; 2059 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 2060 if (ret) { 2061 put_device(&dev->vdev->vdpa.dev); 2062 mutex_lock(&dev->domain_lock); 2063 vduse_domain_destroy(dev->domain); 2064 dev->domain = NULL; 2065 mutex_unlock(&dev->domain_lock); 2066 return ret; 2067 } 2068 2069 return 0; 2070 } 2071 2072 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 2073 { 2074 _vdpa_unregister_device(dev); 2075 } 2076 2077 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 2078 .dev_add = vdpa_dev_add, 2079 .dev_del = vdpa_dev_del, 2080 }; 2081 2082 static struct virtio_device_id id_table[] = { 2083 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 2084 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 2085 { 0 }, 2086 }; 2087 2088 static void vduse_mgmtdev_release(struct device *dev) 2089 { 2090 struct vduse_mgmt_dev *mgmt_dev; 2091 2092 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 2093 kfree(mgmt_dev); 2094 } 2095 2096 static int vduse_mgmtdev_init(void) 2097 { 2098 int ret; 2099 2100 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 2101 if (!vduse_mgmt) 2102 return -ENOMEM; 2103 2104 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 2105 if (ret) { 2106 kfree(vduse_mgmt); 2107 return ret; 2108 } 2109 2110 vduse_mgmt->dev.release = vduse_mgmtdev_release; 2111 2112 ret = device_register(&vduse_mgmt->dev); 2113 if (ret) 2114 goto dev_reg_err; 2115 2116 vduse_mgmt->mgmt_dev.id_table = id_table; 2117 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 2118 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 2119 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 2120 if (ret) 2121 device_unregister(&vduse_mgmt->dev); 2122 2123 return ret; 2124 2125 dev_reg_err: 2126 put_device(&vduse_mgmt->dev); 2127 return ret; 2128 } 2129 2130 static void vduse_mgmtdev_exit(void) 2131 { 2132 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 2133 device_unregister(&vduse_mgmt->dev); 2134 } 2135 2136 static int vduse_init(void) 2137 { 2138 int ret; 2139 struct device *dev; 2140 2141 ret = class_register(&vduse_class); 2142 if (ret) 2143 return ret; 2144 2145 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 2146 if (ret) 2147 goto err_chardev_region; 2148 2149 /* /dev/vduse/control */ 2150 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 2151 vduse_ctrl_cdev.owner = THIS_MODULE; 2152 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 2153 if (ret) 2154 goto err_ctrl_cdev; 2155 2156 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control"); 2157 if (IS_ERR(dev)) { 2158 ret = PTR_ERR(dev); 2159 goto err_device; 2160 } 2161 2162 /* /dev/vduse/$DEVICE */ 2163 cdev_init(&vduse_cdev, &vduse_dev_fops); 2164 vduse_cdev.owner = THIS_MODULE; 2165 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 2166 VDUSE_DEV_MAX - 1); 2167 if (ret) 2168 goto err_cdev; 2169 2170 ret = -ENOMEM; 2171 vduse_irq_wq = alloc_workqueue("vduse-irq", 2172 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 2173 if (!vduse_irq_wq) 2174 goto err_wq; 2175 2176 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0); 2177 if (!vduse_irq_bound_wq) 2178 goto err_bound_wq; 2179 2180 ret = vduse_domain_init(); 2181 if (ret) 2182 goto err_domain; 2183 2184 ret = vduse_mgmtdev_init(); 2185 if (ret) 2186 goto err_mgmtdev; 2187 2188 return 0; 2189 err_mgmtdev: 2190 vduse_domain_exit(); 2191 err_domain: 2192 destroy_workqueue(vduse_irq_bound_wq); 2193 err_bound_wq: 2194 destroy_workqueue(vduse_irq_wq); 2195 err_wq: 2196 cdev_del(&vduse_cdev); 2197 err_cdev: 2198 device_destroy(&vduse_class, vduse_major); 2199 err_device: 2200 cdev_del(&vduse_ctrl_cdev); 2201 err_ctrl_cdev: 2202 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2203 err_chardev_region: 2204 class_unregister(&vduse_class); 2205 return ret; 2206 } 2207 module_init(vduse_init); 2208 2209 static void vduse_exit(void) 2210 { 2211 vduse_mgmtdev_exit(); 2212 vduse_domain_exit(); 2213 destroy_workqueue(vduse_irq_bound_wq); 2214 destroy_workqueue(vduse_irq_wq); 2215 cdev_del(&vduse_cdev); 2216 device_destroy(&vduse_class, vduse_major); 2217 cdev_del(&vduse_ctrl_cdev); 2218 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2219 class_unregister(&vduse_class); 2220 idr_destroy(&vduse_idr); 2221 } 2222 module_exit(vduse_exit); 2223 2224 MODULE_LICENSE(DRV_LICENSE); 2225 MODULE_AUTHOR(DRV_AUTHOR); 2226 MODULE_DESCRIPTION(DRV_DESC); 2227