1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDUSE: vDPA Device in Userspace 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include "linux/virtio_net.h" 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/cdev.h> 15 #include <linux/device.h> 16 #include <linux/eventfd.h> 17 #include <linux/slab.h> 18 #include <linux/wait.h> 19 #include <linux/dma-map-ops.h> 20 #include <linux/poll.h> 21 #include <linux/file.h> 22 #include <linux/uio.h> 23 #include <linux/vdpa.h> 24 #include <linux/nospec.h> 25 #include <linux/vmalloc.h> 26 #include <linux/sched/mm.h> 27 #include <uapi/linux/vduse.h> 28 #include <uapi/linux/vdpa.h> 29 #include <uapi/linux/virtio_config.h> 30 #include <uapi/linux/virtio_ids.h> 31 #include <uapi/linux/virtio_blk.h> 32 #include <uapi/linux/virtio_ring.h> 33 #include <linux/mod_devicetable.h> 34 35 #include "iova_domain.h" 36 37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" 38 #define DRV_DESC "vDPA Device in Userspace" 39 #define DRV_LICENSE "GPL v2" 40 41 #define VDUSE_DEV_MAX (1U << MINORBITS) 42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) 43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) 44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) 45 /* 128 MB reserved for virtqueue creation */ 46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024) 47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30 48 49 #define IRQ_UNBOUND -1 50 51 struct vduse_virtqueue { 52 u16 index; 53 u16 num_max; 54 u32 num; 55 u64 desc_addr; 56 u64 driver_addr; 57 u64 device_addr; 58 struct vdpa_vq_state state; 59 bool ready; 60 bool kicked; 61 spinlock_t kick_lock; 62 spinlock_t irq_lock; 63 struct eventfd_ctx *kickfd; 64 struct vdpa_callback cb; 65 struct work_struct inject; 66 struct work_struct kick; 67 int irq_effective_cpu; 68 struct cpumask irq_affinity; 69 struct kobject kobj; 70 }; 71 72 struct vduse_dev; 73 74 struct vduse_vdpa { 75 struct vdpa_device vdpa; 76 struct vduse_dev *dev; 77 }; 78 79 struct vduse_umem { 80 unsigned long iova; 81 unsigned long npages; 82 struct page **pages; 83 struct mm_struct *mm; 84 }; 85 86 struct vduse_dev { 87 struct vduse_vdpa *vdev; 88 struct device *dev; 89 struct vduse_virtqueue **vqs; 90 struct vduse_iova_domain *domain; 91 char *name; 92 struct mutex lock; 93 spinlock_t msg_lock; 94 u64 msg_unique; 95 u32 msg_timeout; 96 wait_queue_head_t waitq; 97 struct list_head send_list; 98 struct list_head recv_list; 99 struct vdpa_callback config_cb; 100 struct work_struct inject; 101 spinlock_t irq_lock; 102 struct rw_semaphore rwsem; 103 int minor; 104 bool broken; 105 bool connected; 106 u64 api_version; 107 u64 device_features; 108 u64 driver_features; 109 u32 device_id; 110 u32 vendor_id; 111 u32 generation; 112 u32 config_size; 113 void *config; 114 u8 status; 115 u32 vq_num; 116 u32 vq_align; 117 struct vduse_umem *umem; 118 struct mutex mem_lock; 119 unsigned int bounce_size; 120 struct mutex domain_lock; 121 }; 122 123 struct vduse_dev_msg { 124 struct vduse_dev_request req; 125 struct vduse_dev_response resp; 126 struct list_head list; 127 wait_queue_head_t waitq; 128 bool completed; 129 }; 130 131 struct vduse_control { 132 u64 api_version; 133 }; 134 135 static DEFINE_MUTEX(vduse_lock); 136 static DEFINE_IDR(vduse_idr); 137 138 static dev_t vduse_major; 139 static struct cdev vduse_ctrl_cdev; 140 static struct cdev vduse_cdev; 141 static struct workqueue_struct *vduse_irq_wq; 142 static struct workqueue_struct *vduse_irq_bound_wq; 143 144 static u32 allowed_device_id[] = { 145 VIRTIO_ID_BLOCK, 146 VIRTIO_ID_NET, 147 }; 148 149 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) 150 { 151 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); 152 153 return vdev->dev; 154 } 155 156 static inline struct vduse_dev *dev_to_vduse(struct device *dev) 157 { 158 struct vdpa_device *vdpa = dev_to_vdpa(dev); 159 160 return vdpa_to_vduse(vdpa); 161 } 162 163 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, 164 uint32_t request_id) 165 { 166 struct vduse_dev_msg *msg; 167 168 list_for_each_entry(msg, head, list) { 169 if (msg->req.request_id == request_id) { 170 list_del(&msg->list); 171 return msg; 172 } 173 } 174 175 return NULL; 176 } 177 178 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) 179 { 180 struct vduse_dev_msg *msg = NULL; 181 182 if (!list_empty(head)) { 183 msg = list_first_entry(head, struct vduse_dev_msg, list); 184 list_del(&msg->list); 185 } 186 187 return msg; 188 } 189 190 static void vduse_enqueue_msg(struct list_head *head, 191 struct vduse_dev_msg *msg) 192 { 193 list_add_tail(&msg->list, head); 194 } 195 196 static void vduse_dev_broken(struct vduse_dev *dev) 197 { 198 struct vduse_dev_msg *msg, *tmp; 199 200 if (unlikely(dev->broken)) 201 return; 202 203 list_splice_init(&dev->recv_list, &dev->send_list); 204 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { 205 list_del(&msg->list); 206 msg->completed = 1; 207 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 208 wake_up(&msg->waitq); 209 } 210 dev->broken = true; 211 wake_up(&dev->waitq); 212 } 213 214 static int vduse_dev_msg_sync(struct vduse_dev *dev, 215 struct vduse_dev_msg *msg) 216 { 217 int ret; 218 219 if (unlikely(dev->broken)) 220 return -EIO; 221 222 init_waitqueue_head(&msg->waitq); 223 spin_lock(&dev->msg_lock); 224 if (unlikely(dev->broken)) { 225 spin_unlock(&dev->msg_lock); 226 return -EIO; 227 } 228 msg->req.request_id = dev->msg_unique++; 229 vduse_enqueue_msg(&dev->send_list, msg); 230 wake_up(&dev->waitq); 231 spin_unlock(&dev->msg_lock); 232 if (dev->msg_timeout) 233 ret = wait_event_killable_timeout(msg->waitq, msg->completed, 234 (long)dev->msg_timeout * HZ); 235 else 236 ret = wait_event_killable(msg->waitq, msg->completed); 237 238 spin_lock(&dev->msg_lock); 239 if (!msg->completed) { 240 list_del(&msg->list); 241 msg->resp.result = VDUSE_REQ_RESULT_FAILED; 242 /* Mark the device as malfunction when there is a timeout */ 243 if (!ret) 244 vduse_dev_broken(dev); 245 } 246 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; 247 spin_unlock(&dev->msg_lock); 248 249 return ret; 250 } 251 252 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, 253 struct vduse_virtqueue *vq, 254 struct vdpa_vq_state_packed *packed) 255 { 256 struct vduse_dev_msg msg = { 0 }; 257 int ret; 258 259 msg.req.type = VDUSE_GET_VQ_STATE; 260 msg.req.vq_state.index = vq->index; 261 262 ret = vduse_dev_msg_sync(dev, &msg); 263 if (ret) 264 return ret; 265 266 packed->last_avail_counter = 267 msg.resp.vq_state.packed.last_avail_counter & 0x0001; 268 packed->last_avail_idx = 269 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; 270 packed->last_used_counter = 271 msg.resp.vq_state.packed.last_used_counter & 0x0001; 272 packed->last_used_idx = 273 msg.resp.vq_state.packed.last_used_idx & 0x7FFF; 274 275 return 0; 276 } 277 278 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, 279 struct vduse_virtqueue *vq, 280 struct vdpa_vq_state_split *split) 281 { 282 struct vduse_dev_msg msg = { 0 }; 283 int ret; 284 285 msg.req.type = VDUSE_GET_VQ_STATE; 286 msg.req.vq_state.index = vq->index; 287 288 ret = vduse_dev_msg_sync(dev, &msg); 289 if (ret) 290 return ret; 291 292 split->avail_index = msg.resp.vq_state.split.avail_index; 293 294 return 0; 295 } 296 297 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) 298 { 299 struct vduse_dev_msg msg = { 0 }; 300 301 msg.req.type = VDUSE_SET_STATUS; 302 msg.req.s.status = status; 303 304 return vduse_dev_msg_sync(dev, &msg); 305 } 306 307 static int vduse_dev_update_iotlb(struct vduse_dev *dev, 308 u64 start, u64 last) 309 { 310 struct vduse_dev_msg msg = { 0 }; 311 312 if (last < start) 313 return -EINVAL; 314 315 msg.req.type = VDUSE_UPDATE_IOTLB; 316 msg.req.iova.start = start; 317 msg.req.iova.last = last; 318 319 return vduse_dev_msg_sync(dev, &msg); 320 } 321 322 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) 323 { 324 struct file *file = iocb->ki_filp; 325 struct vduse_dev *dev = file->private_data; 326 struct vduse_dev_msg *msg; 327 int size = sizeof(struct vduse_dev_request); 328 ssize_t ret; 329 330 if (iov_iter_count(to) < size) 331 return -EINVAL; 332 333 spin_lock(&dev->msg_lock); 334 while (1) { 335 msg = vduse_dequeue_msg(&dev->send_list); 336 if (msg) 337 break; 338 339 ret = -EAGAIN; 340 if (file->f_flags & O_NONBLOCK) 341 goto unlock; 342 343 spin_unlock(&dev->msg_lock); 344 ret = wait_event_interruptible_exclusive(dev->waitq, 345 !list_empty(&dev->send_list)); 346 if (ret) 347 return ret; 348 349 spin_lock(&dev->msg_lock); 350 } 351 spin_unlock(&dev->msg_lock); 352 ret = copy_to_iter(&msg->req, size, to); 353 spin_lock(&dev->msg_lock); 354 if (ret != size) { 355 ret = -EFAULT; 356 vduse_enqueue_msg(&dev->send_list, msg); 357 goto unlock; 358 } 359 vduse_enqueue_msg(&dev->recv_list, msg); 360 unlock: 361 spin_unlock(&dev->msg_lock); 362 363 return ret; 364 } 365 366 static bool is_mem_zero(const char *ptr, int size) 367 { 368 int i; 369 370 for (i = 0; i < size; i++) { 371 if (ptr[i]) 372 return false; 373 } 374 return true; 375 } 376 377 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) 378 { 379 struct file *file = iocb->ki_filp; 380 struct vduse_dev *dev = file->private_data; 381 struct vduse_dev_response resp; 382 struct vduse_dev_msg *msg; 383 size_t ret; 384 385 ret = copy_from_iter(&resp, sizeof(resp), from); 386 if (ret != sizeof(resp)) 387 return -EINVAL; 388 389 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) 390 return -EINVAL; 391 392 spin_lock(&dev->msg_lock); 393 msg = vduse_find_msg(&dev->recv_list, resp.request_id); 394 if (!msg) { 395 ret = -ENOENT; 396 goto unlock; 397 } 398 399 memcpy(&msg->resp, &resp, sizeof(resp)); 400 msg->completed = 1; 401 wake_up(&msg->waitq); 402 unlock: 403 spin_unlock(&dev->msg_lock); 404 405 return ret; 406 } 407 408 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) 409 { 410 struct vduse_dev *dev = file->private_data; 411 __poll_t mask = 0; 412 413 poll_wait(file, &dev->waitq, wait); 414 415 spin_lock(&dev->msg_lock); 416 417 if (unlikely(dev->broken)) 418 mask |= EPOLLERR; 419 if (!list_empty(&dev->send_list)) 420 mask |= EPOLLIN | EPOLLRDNORM; 421 if (!list_empty(&dev->recv_list)) 422 mask |= EPOLLOUT | EPOLLWRNORM; 423 424 spin_unlock(&dev->msg_lock); 425 426 return mask; 427 } 428 429 static void vduse_dev_reset(struct vduse_dev *dev) 430 { 431 int i; 432 struct vduse_iova_domain *domain = dev->domain; 433 434 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 435 if (domain && domain->bounce_map) 436 vduse_domain_reset_bounce_map(domain); 437 438 down_write(&dev->rwsem); 439 440 dev->status = 0; 441 dev->driver_features = 0; 442 dev->generation++; 443 spin_lock(&dev->irq_lock); 444 dev->config_cb.callback = NULL; 445 dev->config_cb.private = NULL; 446 spin_unlock(&dev->irq_lock); 447 flush_work(&dev->inject); 448 449 for (i = 0; i < dev->vq_num; i++) { 450 struct vduse_virtqueue *vq = dev->vqs[i]; 451 452 vq->ready = false; 453 vq->desc_addr = 0; 454 vq->driver_addr = 0; 455 vq->device_addr = 0; 456 vq->num = 0; 457 memset(&vq->state, 0, sizeof(vq->state)); 458 459 spin_lock(&vq->kick_lock); 460 vq->kicked = false; 461 if (vq->kickfd) 462 eventfd_ctx_put(vq->kickfd); 463 vq->kickfd = NULL; 464 spin_unlock(&vq->kick_lock); 465 466 spin_lock(&vq->irq_lock); 467 vq->cb.callback = NULL; 468 vq->cb.private = NULL; 469 vq->cb.trigger = NULL; 470 spin_unlock(&vq->irq_lock); 471 flush_work(&vq->inject); 472 flush_work(&vq->kick); 473 } 474 475 up_write(&dev->rwsem); 476 } 477 478 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, 479 u64 desc_area, u64 driver_area, 480 u64 device_area) 481 { 482 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 483 struct vduse_virtqueue *vq = dev->vqs[idx]; 484 485 vq->desc_addr = desc_area; 486 vq->driver_addr = driver_area; 487 vq->device_addr = device_area; 488 489 return 0; 490 } 491 492 static void vduse_vq_kick(struct vduse_virtqueue *vq) 493 { 494 spin_lock(&vq->kick_lock); 495 if (!vq->ready) 496 goto unlock; 497 498 if (vq->kickfd) 499 eventfd_signal(vq->kickfd); 500 else 501 vq->kicked = true; 502 unlock: 503 spin_unlock(&vq->kick_lock); 504 } 505 506 static void vduse_vq_kick_work(struct work_struct *work) 507 { 508 struct vduse_virtqueue *vq = container_of(work, 509 struct vduse_virtqueue, kick); 510 511 vduse_vq_kick(vq); 512 } 513 514 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) 515 { 516 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 517 struct vduse_virtqueue *vq = dev->vqs[idx]; 518 519 if (!eventfd_signal_allowed()) { 520 schedule_work(&vq->kick); 521 return; 522 } 523 vduse_vq_kick(vq); 524 } 525 526 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, 527 struct vdpa_callback *cb) 528 { 529 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 530 struct vduse_virtqueue *vq = dev->vqs[idx]; 531 532 spin_lock(&vq->irq_lock); 533 vq->cb.callback = cb->callback; 534 vq->cb.private = cb->private; 535 vq->cb.trigger = cb->trigger; 536 spin_unlock(&vq->irq_lock); 537 } 538 539 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) 540 { 541 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 542 struct vduse_virtqueue *vq = dev->vqs[idx]; 543 544 vq->num = num; 545 } 546 547 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx) 548 { 549 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 550 struct vduse_virtqueue *vq = dev->vqs[idx]; 551 552 if (vq->num) 553 return vq->num; 554 else 555 return vq->num_max; 556 } 557 558 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, 559 u16 idx, bool ready) 560 { 561 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 562 struct vduse_virtqueue *vq = dev->vqs[idx]; 563 564 vq->ready = ready; 565 } 566 567 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) 568 { 569 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 570 struct vduse_virtqueue *vq = dev->vqs[idx]; 571 572 return vq->ready; 573 } 574 575 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, 576 const struct vdpa_vq_state *state) 577 { 578 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 579 struct vduse_virtqueue *vq = dev->vqs[idx]; 580 581 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 582 vq->state.packed.last_avail_counter = 583 state->packed.last_avail_counter; 584 vq->state.packed.last_avail_idx = state->packed.last_avail_idx; 585 vq->state.packed.last_used_counter = 586 state->packed.last_used_counter; 587 vq->state.packed.last_used_idx = state->packed.last_used_idx; 588 } else 589 vq->state.split.avail_index = state->split.avail_index; 590 591 return 0; 592 } 593 594 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 595 struct vdpa_vq_state *state) 596 { 597 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 598 struct vduse_virtqueue *vq = dev->vqs[idx]; 599 600 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) 601 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); 602 603 return vduse_dev_get_vq_state_split(dev, vq, &state->split); 604 } 605 606 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) 607 { 608 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 609 610 return dev->vq_align; 611 } 612 613 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa) 614 { 615 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 616 617 return dev->device_features; 618 } 619 620 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features) 621 { 622 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 623 624 dev->driver_features = features; 625 return 0; 626 } 627 628 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa) 629 { 630 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 631 632 return dev->driver_features; 633 } 634 635 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, 636 struct vdpa_callback *cb) 637 { 638 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 639 640 spin_lock(&dev->irq_lock); 641 dev->config_cb.callback = cb->callback; 642 dev->config_cb.private = cb->private; 643 spin_unlock(&dev->irq_lock); 644 } 645 646 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 647 { 648 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 649 u16 num_max = 0; 650 int i; 651 652 for (i = 0; i < dev->vq_num; i++) 653 if (num_max < dev->vqs[i]->num_max) 654 num_max = dev->vqs[i]->num_max; 655 656 return num_max; 657 } 658 659 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) 660 { 661 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 662 663 return dev->device_id; 664 } 665 666 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) 667 { 668 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 669 670 return dev->vendor_id; 671 } 672 673 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) 674 { 675 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 676 677 return dev->status; 678 } 679 680 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 681 { 682 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 683 684 if (vduse_dev_set_status(dev, status)) 685 return; 686 687 dev->status = status; 688 } 689 690 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) 691 { 692 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 693 694 return dev->config_size; 695 } 696 697 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, 698 void *buf, unsigned int len) 699 { 700 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 701 702 /* Initialize the buffer in case of partial copy. */ 703 memset(buf, 0, len); 704 705 if (offset > dev->config_size) 706 return; 707 708 if (len > dev->config_size - offset) 709 len = dev->config_size - offset; 710 711 memcpy(buf, dev->config + offset, len); 712 } 713 714 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, 715 const void *buf, unsigned int len) 716 { 717 /* Now we only support read-only configuration space */ 718 } 719 720 static int vduse_vdpa_reset(struct vdpa_device *vdpa) 721 { 722 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 723 int ret = vduse_dev_set_status(dev, 0); 724 725 vduse_dev_reset(dev); 726 727 return ret; 728 } 729 730 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) 731 { 732 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 733 734 return dev->generation; 735 } 736 737 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx, 738 const struct cpumask *cpu_mask) 739 { 740 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 741 742 if (cpu_mask) 743 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask); 744 else 745 cpumask_setall(&dev->vqs[idx]->irq_affinity); 746 747 return 0; 748 } 749 750 static const struct cpumask * 751 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx) 752 { 753 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 754 755 return &dev->vqs[idx]->irq_affinity; 756 } 757 758 static int vduse_vdpa_set_map(struct vdpa_device *vdpa, 759 unsigned int asid, 760 struct vhost_iotlb *iotlb) 761 { 762 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 763 int ret; 764 765 ret = vduse_domain_set_map(dev->domain, iotlb); 766 if (ret) 767 return ret; 768 769 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 770 if (ret) { 771 vduse_domain_clear_map(dev->domain, iotlb); 772 return ret; 773 } 774 775 return 0; 776 } 777 778 static void vduse_vdpa_free(struct vdpa_device *vdpa) 779 { 780 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 781 782 dev->vdev = NULL; 783 } 784 785 static const struct vdpa_config_ops vduse_vdpa_config_ops = { 786 .set_vq_address = vduse_vdpa_set_vq_address, 787 .kick_vq = vduse_vdpa_kick_vq, 788 .set_vq_cb = vduse_vdpa_set_vq_cb, 789 .set_vq_num = vduse_vdpa_set_vq_num, 790 .get_vq_size = vduse_vdpa_get_vq_size, 791 .set_vq_ready = vduse_vdpa_set_vq_ready, 792 .get_vq_ready = vduse_vdpa_get_vq_ready, 793 .set_vq_state = vduse_vdpa_set_vq_state, 794 .get_vq_state = vduse_vdpa_get_vq_state, 795 .get_vq_align = vduse_vdpa_get_vq_align, 796 .get_device_features = vduse_vdpa_get_device_features, 797 .set_driver_features = vduse_vdpa_set_driver_features, 798 .get_driver_features = vduse_vdpa_get_driver_features, 799 .set_config_cb = vduse_vdpa_set_config_cb, 800 .get_vq_num_max = vduse_vdpa_get_vq_num_max, 801 .get_device_id = vduse_vdpa_get_device_id, 802 .get_vendor_id = vduse_vdpa_get_vendor_id, 803 .get_status = vduse_vdpa_get_status, 804 .set_status = vduse_vdpa_set_status, 805 .get_config_size = vduse_vdpa_get_config_size, 806 .get_config = vduse_vdpa_get_config, 807 .set_config = vduse_vdpa_set_config, 808 .get_generation = vduse_vdpa_get_generation, 809 .set_vq_affinity = vduse_vdpa_set_vq_affinity, 810 .get_vq_affinity = vduse_vdpa_get_vq_affinity, 811 .reset = vduse_vdpa_reset, 812 .set_map = vduse_vdpa_set_map, 813 .free = vduse_vdpa_free, 814 }; 815 816 static void vduse_dev_sync_single_for_device(struct device *dev, 817 dma_addr_t dma_addr, size_t size, 818 enum dma_data_direction dir) 819 { 820 struct vduse_dev *vdev = dev_to_vduse(dev); 821 struct vduse_iova_domain *domain = vdev->domain; 822 823 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); 824 } 825 826 static void vduse_dev_sync_single_for_cpu(struct device *dev, 827 dma_addr_t dma_addr, size_t size, 828 enum dma_data_direction dir) 829 { 830 struct vduse_dev *vdev = dev_to_vduse(dev); 831 struct vduse_iova_domain *domain = vdev->domain; 832 833 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); 834 } 835 836 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, 837 unsigned long offset, size_t size, 838 enum dma_data_direction dir, 839 unsigned long attrs) 840 { 841 struct vduse_dev *vdev = dev_to_vduse(dev); 842 struct vduse_iova_domain *domain = vdev->domain; 843 844 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 845 } 846 847 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, 848 size_t size, enum dma_data_direction dir, 849 unsigned long attrs) 850 { 851 struct vduse_dev *vdev = dev_to_vduse(dev); 852 struct vduse_iova_domain *domain = vdev->domain; 853 854 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 855 } 856 857 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, 858 dma_addr_t *dma_addr, gfp_t flag, 859 unsigned long attrs) 860 { 861 struct vduse_dev *vdev = dev_to_vduse(dev); 862 struct vduse_iova_domain *domain = vdev->domain; 863 unsigned long iova; 864 void *addr; 865 866 *dma_addr = DMA_MAPPING_ERROR; 867 addr = vduse_domain_alloc_coherent(domain, size, 868 (dma_addr_t *)&iova, flag, attrs); 869 if (!addr) 870 return NULL; 871 872 *dma_addr = (dma_addr_t)iova; 873 874 return addr; 875 } 876 877 static void vduse_dev_free_coherent(struct device *dev, size_t size, 878 void *vaddr, dma_addr_t dma_addr, 879 unsigned long attrs) 880 { 881 struct vduse_dev *vdev = dev_to_vduse(dev); 882 struct vduse_iova_domain *domain = vdev->domain; 883 884 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); 885 } 886 887 static size_t vduse_dev_max_mapping_size(struct device *dev) 888 { 889 struct vduse_dev *vdev = dev_to_vduse(dev); 890 struct vduse_iova_domain *domain = vdev->domain; 891 892 return domain->bounce_size; 893 } 894 895 static const struct dma_map_ops vduse_dev_dma_ops = { 896 .sync_single_for_device = vduse_dev_sync_single_for_device, 897 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, 898 .map_page = vduse_dev_map_page, 899 .unmap_page = vduse_dev_unmap_page, 900 .alloc = vduse_dev_alloc_coherent, 901 .free = vduse_dev_free_coherent, 902 .max_mapping_size = vduse_dev_max_mapping_size, 903 }; 904 905 static unsigned int perm_to_file_flags(u8 perm) 906 { 907 unsigned int flags = 0; 908 909 switch (perm) { 910 case VDUSE_ACCESS_WO: 911 flags |= O_WRONLY; 912 break; 913 case VDUSE_ACCESS_RO: 914 flags |= O_RDONLY; 915 break; 916 case VDUSE_ACCESS_RW: 917 flags |= O_RDWR; 918 break; 919 default: 920 WARN(1, "invalidate vhost IOTLB permission\n"); 921 break; 922 } 923 924 return flags; 925 } 926 927 static int vduse_kickfd_setup(struct vduse_dev *dev, 928 struct vduse_vq_eventfd *eventfd) 929 { 930 struct eventfd_ctx *ctx = NULL; 931 struct vduse_virtqueue *vq; 932 u32 index; 933 934 if (eventfd->index >= dev->vq_num) 935 return -EINVAL; 936 937 index = array_index_nospec(eventfd->index, dev->vq_num); 938 vq = dev->vqs[index]; 939 if (eventfd->fd >= 0) { 940 ctx = eventfd_ctx_fdget(eventfd->fd); 941 if (IS_ERR(ctx)) 942 return PTR_ERR(ctx); 943 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) 944 return 0; 945 946 spin_lock(&vq->kick_lock); 947 if (vq->kickfd) 948 eventfd_ctx_put(vq->kickfd); 949 vq->kickfd = ctx; 950 if (vq->ready && vq->kicked && vq->kickfd) { 951 eventfd_signal(vq->kickfd); 952 vq->kicked = false; 953 } 954 spin_unlock(&vq->kick_lock); 955 956 return 0; 957 } 958 959 static bool vduse_dev_is_ready(struct vduse_dev *dev) 960 { 961 int i; 962 963 for (i = 0; i < dev->vq_num; i++) 964 if (!dev->vqs[i]->num_max) 965 return false; 966 967 return true; 968 } 969 970 static void vduse_dev_irq_inject(struct work_struct *work) 971 { 972 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); 973 974 spin_lock_bh(&dev->irq_lock); 975 if (dev->config_cb.callback) 976 dev->config_cb.callback(dev->config_cb.private); 977 spin_unlock_bh(&dev->irq_lock); 978 } 979 980 static void vduse_vq_irq_inject(struct work_struct *work) 981 { 982 struct vduse_virtqueue *vq = container_of(work, 983 struct vduse_virtqueue, inject); 984 985 spin_lock_bh(&vq->irq_lock); 986 if (vq->ready && vq->cb.callback) 987 vq->cb.callback(vq->cb.private); 988 spin_unlock_bh(&vq->irq_lock); 989 } 990 991 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) 992 { 993 bool signal = false; 994 995 if (!vq->cb.trigger) 996 return false; 997 998 spin_lock_irq(&vq->irq_lock); 999 if (vq->ready && vq->cb.trigger) { 1000 eventfd_signal(vq->cb.trigger); 1001 signal = true; 1002 } 1003 spin_unlock_irq(&vq->irq_lock); 1004 1005 return signal; 1006 } 1007 1008 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, 1009 struct work_struct *irq_work, 1010 int irq_effective_cpu) 1011 { 1012 int ret = -EINVAL; 1013 1014 down_read(&dev->rwsem); 1015 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1016 goto unlock; 1017 1018 ret = 0; 1019 if (irq_effective_cpu == IRQ_UNBOUND) 1020 queue_work(vduse_irq_wq, irq_work); 1021 else 1022 queue_work_on(irq_effective_cpu, 1023 vduse_irq_bound_wq, irq_work); 1024 unlock: 1025 up_read(&dev->rwsem); 1026 1027 return ret; 1028 } 1029 1030 static int vduse_dev_dereg_umem(struct vduse_dev *dev, 1031 u64 iova, u64 size) 1032 { 1033 int ret; 1034 1035 mutex_lock(&dev->mem_lock); 1036 ret = -ENOENT; 1037 if (!dev->umem) 1038 goto unlock; 1039 1040 ret = -EINVAL; 1041 if (!dev->domain) 1042 goto unlock; 1043 1044 if (dev->umem->iova != iova || size != dev->domain->bounce_size) 1045 goto unlock; 1046 1047 vduse_domain_remove_user_bounce_pages(dev->domain); 1048 unpin_user_pages_dirty_lock(dev->umem->pages, 1049 dev->umem->npages, true); 1050 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 1051 mmdrop(dev->umem->mm); 1052 vfree(dev->umem->pages); 1053 kfree(dev->umem); 1054 dev->umem = NULL; 1055 ret = 0; 1056 unlock: 1057 mutex_unlock(&dev->mem_lock); 1058 return ret; 1059 } 1060 1061 static int vduse_dev_reg_umem(struct vduse_dev *dev, 1062 u64 iova, u64 uaddr, u64 size) 1063 { 1064 struct page **page_list = NULL; 1065 struct vduse_umem *umem = NULL; 1066 long pinned = 0; 1067 unsigned long npages, lock_limit; 1068 int ret; 1069 1070 if (!dev->domain || !dev->domain->bounce_map || 1071 size != dev->domain->bounce_size || 1072 iova != 0 || uaddr & ~PAGE_MASK) 1073 return -EINVAL; 1074 1075 mutex_lock(&dev->mem_lock); 1076 ret = -EEXIST; 1077 if (dev->umem) 1078 goto unlock; 1079 1080 ret = -ENOMEM; 1081 npages = size >> PAGE_SHIFT; 1082 page_list = __vmalloc(array_size(npages, sizeof(struct page *)), 1083 GFP_KERNEL_ACCOUNT); 1084 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 1085 if (!page_list || !umem) 1086 goto unlock; 1087 1088 mmap_read_lock(current->mm); 1089 1090 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 1091 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) 1092 goto out; 1093 1094 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, 1095 page_list); 1096 if (pinned != npages) { 1097 ret = pinned < 0 ? pinned : -ENOMEM; 1098 goto out; 1099 } 1100 1101 ret = vduse_domain_add_user_bounce_pages(dev->domain, 1102 page_list, pinned); 1103 if (ret) 1104 goto out; 1105 1106 atomic64_add(npages, ¤t->mm->pinned_vm); 1107 1108 umem->pages = page_list; 1109 umem->npages = pinned; 1110 umem->iova = iova; 1111 umem->mm = current->mm; 1112 mmgrab(current->mm); 1113 1114 dev->umem = umem; 1115 out: 1116 if (ret && pinned > 0) 1117 unpin_user_pages(page_list, pinned); 1118 1119 mmap_read_unlock(current->mm); 1120 unlock: 1121 if (ret) { 1122 vfree(page_list); 1123 kfree(umem); 1124 } 1125 mutex_unlock(&dev->mem_lock); 1126 return ret; 1127 } 1128 1129 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) 1130 { 1131 int curr_cpu = vq->irq_effective_cpu; 1132 1133 while (true) { 1134 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity); 1135 if (cpu_online(curr_cpu)) 1136 break; 1137 1138 if (curr_cpu >= nr_cpu_ids) 1139 curr_cpu = IRQ_UNBOUND; 1140 } 1141 1142 vq->irq_effective_cpu = curr_cpu; 1143 } 1144 1145 static long vduse_dev_ioctl(struct file *file, unsigned int cmd, 1146 unsigned long arg) 1147 { 1148 struct vduse_dev *dev = file->private_data; 1149 void __user *argp = (void __user *)arg; 1150 int ret; 1151 1152 if (unlikely(dev->broken)) 1153 return -EPERM; 1154 1155 switch (cmd) { 1156 case VDUSE_IOTLB_GET_FD: { 1157 struct vduse_iotlb_entry entry; 1158 struct vhost_iotlb_map *map; 1159 struct vdpa_map_file *map_file; 1160 struct file *f = NULL; 1161 1162 ret = -EFAULT; 1163 if (copy_from_user(&entry, argp, sizeof(entry))) 1164 break; 1165 1166 ret = -EINVAL; 1167 if (entry.start > entry.last) 1168 break; 1169 1170 mutex_lock(&dev->domain_lock); 1171 if (!dev->domain) { 1172 mutex_unlock(&dev->domain_lock); 1173 break; 1174 } 1175 spin_lock(&dev->domain->iotlb_lock); 1176 map = vhost_iotlb_itree_first(dev->domain->iotlb, 1177 entry.start, entry.last); 1178 if (map) { 1179 map_file = (struct vdpa_map_file *)map->opaque; 1180 f = get_file(map_file->file); 1181 entry.offset = map_file->offset; 1182 entry.start = map->start; 1183 entry.last = map->last; 1184 entry.perm = map->perm; 1185 } 1186 spin_unlock(&dev->domain->iotlb_lock); 1187 mutex_unlock(&dev->domain_lock); 1188 ret = -EINVAL; 1189 if (!f) 1190 break; 1191 1192 ret = -EFAULT; 1193 if (copy_to_user(argp, &entry, sizeof(entry))) { 1194 fput(f); 1195 break; 1196 } 1197 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); 1198 fput(f); 1199 break; 1200 } 1201 case VDUSE_DEV_GET_FEATURES: 1202 /* 1203 * Just mirror what driver wrote here. 1204 * The driver is expected to check FEATURE_OK later. 1205 */ 1206 ret = put_user(dev->driver_features, (u64 __user *)argp); 1207 break; 1208 case VDUSE_DEV_SET_CONFIG: { 1209 struct vduse_config_data config; 1210 unsigned long size = offsetof(struct vduse_config_data, 1211 buffer); 1212 1213 ret = -EFAULT; 1214 if (copy_from_user(&config, argp, size)) 1215 break; 1216 1217 ret = -EINVAL; 1218 if (config.offset > dev->config_size || 1219 config.length == 0 || 1220 config.length > dev->config_size - config.offset) 1221 break; 1222 1223 ret = -EFAULT; 1224 if (copy_from_user(dev->config + config.offset, argp + size, 1225 config.length)) 1226 break; 1227 1228 ret = 0; 1229 break; 1230 } 1231 case VDUSE_DEV_INJECT_CONFIG_IRQ: 1232 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); 1233 break; 1234 case VDUSE_VQ_SETUP: { 1235 struct vduse_vq_config config; 1236 u32 index; 1237 1238 ret = -EFAULT; 1239 if (copy_from_user(&config, argp, sizeof(config))) 1240 break; 1241 1242 ret = -EINVAL; 1243 if (config.index >= dev->vq_num) 1244 break; 1245 1246 if (!is_mem_zero((const char *)config.reserved, 1247 sizeof(config.reserved))) 1248 break; 1249 1250 index = array_index_nospec(config.index, dev->vq_num); 1251 dev->vqs[index]->num_max = config.max_size; 1252 ret = 0; 1253 break; 1254 } 1255 case VDUSE_VQ_GET_INFO: { 1256 struct vduse_vq_info vq_info; 1257 struct vduse_virtqueue *vq; 1258 u32 index; 1259 1260 ret = -EFAULT; 1261 if (copy_from_user(&vq_info, argp, sizeof(vq_info))) 1262 break; 1263 1264 ret = -EINVAL; 1265 if (vq_info.index >= dev->vq_num) 1266 break; 1267 1268 index = array_index_nospec(vq_info.index, dev->vq_num); 1269 vq = dev->vqs[index]; 1270 vq_info.desc_addr = vq->desc_addr; 1271 vq_info.driver_addr = vq->driver_addr; 1272 vq_info.device_addr = vq->device_addr; 1273 vq_info.num = vq->num; 1274 1275 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { 1276 vq_info.packed.last_avail_counter = 1277 vq->state.packed.last_avail_counter; 1278 vq_info.packed.last_avail_idx = 1279 vq->state.packed.last_avail_idx; 1280 vq_info.packed.last_used_counter = 1281 vq->state.packed.last_used_counter; 1282 vq_info.packed.last_used_idx = 1283 vq->state.packed.last_used_idx; 1284 } else 1285 vq_info.split.avail_index = 1286 vq->state.split.avail_index; 1287 1288 vq_info.ready = vq->ready; 1289 1290 ret = -EFAULT; 1291 if (copy_to_user(argp, &vq_info, sizeof(vq_info))) 1292 break; 1293 1294 ret = 0; 1295 break; 1296 } 1297 case VDUSE_VQ_SETUP_KICKFD: { 1298 struct vduse_vq_eventfd eventfd; 1299 1300 ret = -EFAULT; 1301 if (copy_from_user(&eventfd, argp, sizeof(eventfd))) 1302 break; 1303 1304 ret = vduse_kickfd_setup(dev, &eventfd); 1305 break; 1306 } 1307 case VDUSE_VQ_INJECT_IRQ: { 1308 u32 index; 1309 1310 ret = -EFAULT; 1311 if (get_user(index, (u32 __user *)argp)) 1312 break; 1313 1314 ret = -EINVAL; 1315 if (index >= dev->vq_num) 1316 break; 1317 1318 ret = 0; 1319 index = array_index_nospec(index, dev->vq_num); 1320 if (!vduse_vq_signal_irqfd(dev->vqs[index])) { 1321 vduse_vq_update_effective_cpu(dev->vqs[index]); 1322 ret = vduse_dev_queue_irq_work(dev, 1323 &dev->vqs[index]->inject, 1324 dev->vqs[index]->irq_effective_cpu); 1325 } 1326 break; 1327 } 1328 case VDUSE_IOTLB_REG_UMEM: { 1329 struct vduse_iova_umem umem; 1330 1331 ret = -EFAULT; 1332 if (copy_from_user(&umem, argp, sizeof(umem))) 1333 break; 1334 1335 ret = -EINVAL; 1336 if (!is_mem_zero((const char *)umem.reserved, 1337 sizeof(umem.reserved))) 1338 break; 1339 1340 mutex_lock(&dev->domain_lock); 1341 ret = vduse_dev_reg_umem(dev, umem.iova, 1342 umem.uaddr, umem.size); 1343 mutex_unlock(&dev->domain_lock); 1344 break; 1345 } 1346 case VDUSE_IOTLB_DEREG_UMEM: { 1347 struct vduse_iova_umem umem; 1348 1349 ret = -EFAULT; 1350 if (copy_from_user(&umem, argp, sizeof(umem))) 1351 break; 1352 1353 ret = -EINVAL; 1354 if (!is_mem_zero((const char *)umem.reserved, 1355 sizeof(umem.reserved))) 1356 break; 1357 mutex_lock(&dev->domain_lock); 1358 ret = vduse_dev_dereg_umem(dev, umem.iova, 1359 umem.size); 1360 mutex_unlock(&dev->domain_lock); 1361 break; 1362 } 1363 case VDUSE_IOTLB_GET_INFO: { 1364 struct vduse_iova_info info; 1365 struct vhost_iotlb_map *map; 1366 1367 ret = -EFAULT; 1368 if (copy_from_user(&info, argp, sizeof(info))) 1369 break; 1370 1371 ret = -EINVAL; 1372 if (info.start > info.last) 1373 break; 1374 1375 if (!is_mem_zero((const char *)info.reserved, 1376 sizeof(info.reserved))) 1377 break; 1378 1379 mutex_lock(&dev->domain_lock); 1380 if (!dev->domain) { 1381 mutex_unlock(&dev->domain_lock); 1382 break; 1383 } 1384 spin_lock(&dev->domain->iotlb_lock); 1385 map = vhost_iotlb_itree_first(dev->domain->iotlb, 1386 info.start, info.last); 1387 if (map) { 1388 info.start = map->start; 1389 info.last = map->last; 1390 info.capability = 0; 1391 if (dev->domain->bounce_map && map->start == 0 && 1392 map->last == dev->domain->bounce_size - 1) 1393 info.capability |= VDUSE_IOVA_CAP_UMEM; 1394 } 1395 spin_unlock(&dev->domain->iotlb_lock); 1396 mutex_unlock(&dev->domain_lock); 1397 if (!map) 1398 break; 1399 1400 ret = -EFAULT; 1401 if (copy_to_user(argp, &info, sizeof(info))) 1402 break; 1403 1404 ret = 0; 1405 break; 1406 } 1407 default: 1408 ret = -ENOIOCTLCMD; 1409 break; 1410 } 1411 1412 return ret; 1413 } 1414 1415 static int vduse_dev_release(struct inode *inode, struct file *file) 1416 { 1417 struct vduse_dev *dev = file->private_data; 1418 1419 mutex_lock(&dev->domain_lock); 1420 if (dev->domain) 1421 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1422 mutex_unlock(&dev->domain_lock); 1423 spin_lock(&dev->msg_lock); 1424 /* Make sure the inflight messages can processed after reconncection */ 1425 list_splice_init(&dev->recv_list, &dev->send_list); 1426 spin_unlock(&dev->msg_lock); 1427 dev->connected = false; 1428 1429 return 0; 1430 } 1431 1432 static struct vduse_dev *vduse_dev_get_from_minor(int minor) 1433 { 1434 struct vduse_dev *dev; 1435 1436 mutex_lock(&vduse_lock); 1437 dev = idr_find(&vduse_idr, minor); 1438 mutex_unlock(&vduse_lock); 1439 1440 return dev; 1441 } 1442 1443 static int vduse_dev_open(struct inode *inode, struct file *file) 1444 { 1445 int ret; 1446 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); 1447 1448 if (!dev) 1449 return -ENODEV; 1450 1451 ret = -EBUSY; 1452 mutex_lock(&dev->lock); 1453 if (dev->connected) 1454 goto unlock; 1455 1456 ret = 0; 1457 dev->connected = true; 1458 file->private_data = dev; 1459 unlock: 1460 mutex_unlock(&dev->lock); 1461 1462 return ret; 1463 } 1464 1465 static const struct file_operations vduse_dev_fops = { 1466 .owner = THIS_MODULE, 1467 .open = vduse_dev_open, 1468 .release = vduse_dev_release, 1469 .read_iter = vduse_dev_read_iter, 1470 .write_iter = vduse_dev_write_iter, 1471 .poll = vduse_dev_poll, 1472 .unlocked_ioctl = vduse_dev_ioctl, 1473 .compat_ioctl = compat_ptr_ioctl, 1474 .llseek = noop_llseek, 1475 }; 1476 1477 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf) 1478 { 1479 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity)); 1480 } 1481 1482 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq, 1483 const char *buf, size_t count) 1484 { 1485 cpumask_var_t new_value; 1486 int ret; 1487 1488 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) 1489 return -ENOMEM; 1490 1491 ret = cpumask_parse(buf, new_value); 1492 if (ret) 1493 goto free_mask; 1494 1495 ret = -EINVAL; 1496 if (!cpumask_intersects(new_value, cpu_online_mask)) 1497 goto free_mask; 1498 1499 cpumask_copy(&vq->irq_affinity, new_value); 1500 ret = count; 1501 free_mask: 1502 free_cpumask_var(new_value); 1503 return ret; 1504 } 1505 1506 struct vq_sysfs_entry { 1507 struct attribute attr; 1508 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf); 1509 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf, 1510 size_t count); 1511 }; 1512 1513 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity); 1514 1515 static struct attribute *vq_attrs[] = { 1516 &irq_cb_affinity_attr.attr, 1517 NULL, 1518 }; 1519 ATTRIBUTE_GROUPS(vq); 1520 1521 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr, 1522 char *buf) 1523 { 1524 struct vduse_virtqueue *vq = container_of(kobj, 1525 struct vduse_virtqueue, kobj); 1526 struct vq_sysfs_entry *entry = container_of(attr, 1527 struct vq_sysfs_entry, attr); 1528 1529 if (!entry->show) 1530 return -EIO; 1531 1532 return entry->show(vq, buf); 1533 } 1534 1535 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr, 1536 const char *buf, size_t count) 1537 { 1538 struct vduse_virtqueue *vq = container_of(kobj, 1539 struct vduse_virtqueue, kobj); 1540 struct vq_sysfs_entry *entry = container_of(attr, 1541 struct vq_sysfs_entry, attr); 1542 1543 if (!entry->store) 1544 return -EIO; 1545 1546 return entry->store(vq, buf, count); 1547 } 1548 1549 static const struct sysfs_ops vq_sysfs_ops = { 1550 .show = vq_attr_show, 1551 .store = vq_attr_store, 1552 }; 1553 1554 static void vq_release(struct kobject *kobj) 1555 { 1556 struct vduse_virtqueue *vq = container_of(kobj, 1557 struct vduse_virtqueue, kobj); 1558 kfree(vq); 1559 } 1560 1561 static const struct kobj_type vq_type = { 1562 .release = vq_release, 1563 .sysfs_ops = &vq_sysfs_ops, 1564 .default_groups = vq_groups, 1565 }; 1566 1567 static char *vduse_devnode(const struct device *dev, umode_t *mode) 1568 { 1569 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); 1570 } 1571 1572 static const struct class vduse_class = { 1573 .name = "vduse", 1574 .devnode = vduse_devnode, 1575 }; 1576 1577 static void vduse_dev_deinit_vqs(struct vduse_dev *dev) 1578 { 1579 int i; 1580 1581 if (!dev->vqs) 1582 return; 1583 1584 for (i = 0; i < dev->vq_num; i++) 1585 kobject_put(&dev->vqs[i]->kobj); 1586 kfree(dev->vqs); 1587 } 1588 1589 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) 1590 { 1591 int ret, i; 1592 1593 dev->vq_align = vq_align; 1594 dev->vq_num = vq_num; 1595 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); 1596 if (!dev->vqs) 1597 return -ENOMEM; 1598 1599 for (i = 0; i < vq_num; i++) { 1600 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); 1601 if (!dev->vqs[i]) { 1602 ret = -ENOMEM; 1603 goto err; 1604 } 1605 1606 dev->vqs[i]->index = i; 1607 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND; 1608 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); 1609 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); 1610 spin_lock_init(&dev->vqs[i]->kick_lock); 1611 spin_lock_init(&dev->vqs[i]->irq_lock); 1612 cpumask_setall(&dev->vqs[i]->irq_affinity); 1613 1614 kobject_init(&dev->vqs[i]->kobj, &vq_type); 1615 ret = kobject_add(&dev->vqs[i]->kobj, 1616 &dev->dev->kobj, "vq%d", i); 1617 if (ret) { 1618 kfree(dev->vqs[i]); 1619 goto err; 1620 } 1621 } 1622 1623 return 0; 1624 err: 1625 while (i--) 1626 kobject_put(&dev->vqs[i]->kobj); 1627 kfree(dev->vqs); 1628 dev->vqs = NULL; 1629 return ret; 1630 } 1631 1632 static struct vduse_dev *vduse_dev_create(void) 1633 { 1634 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); 1635 1636 if (!dev) 1637 return NULL; 1638 1639 mutex_init(&dev->lock); 1640 mutex_init(&dev->mem_lock); 1641 mutex_init(&dev->domain_lock); 1642 spin_lock_init(&dev->msg_lock); 1643 INIT_LIST_HEAD(&dev->send_list); 1644 INIT_LIST_HEAD(&dev->recv_list); 1645 spin_lock_init(&dev->irq_lock); 1646 init_rwsem(&dev->rwsem); 1647 1648 INIT_WORK(&dev->inject, vduse_dev_irq_inject); 1649 init_waitqueue_head(&dev->waitq); 1650 1651 return dev; 1652 } 1653 1654 static void vduse_dev_destroy(struct vduse_dev *dev) 1655 { 1656 kfree(dev); 1657 } 1658 1659 static struct vduse_dev *vduse_find_dev(const char *name) 1660 { 1661 struct vduse_dev *dev; 1662 int id; 1663 1664 idr_for_each_entry(&vduse_idr, dev, id) 1665 if (!strcmp(dev->name, name)) 1666 return dev; 1667 1668 return NULL; 1669 } 1670 1671 static int vduse_destroy_dev(char *name) 1672 { 1673 struct vduse_dev *dev = vduse_find_dev(name); 1674 1675 if (!dev) 1676 return -EINVAL; 1677 1678 mutex_lock(&dev->lock); 1679 if (dev->vdev || dev->connected) { 1680 mutex_unlock(&dev->lock); 1681 return -EBUSY; 1682 } 1683 dev->connected = true; 1684 mutex_unlock(&dev->lock); 1685 1686 vduse_dev_reset(dev); 1687 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1688 idr_remove(&vduse_idr, dev->minor); 1689 kvfree(dev->config); 1690 vduse_dev_deinit_vqs(dev); 1691 if (dev->domain) 1692 vduse_domain_destroy(dev->domain); 1693 kfree(dev->name); 1694 vduse_dev_destroy(dev); 1695 module_put(THIS_MODULE); 1696 1697 return 0; 1698 } 1699 1700 static bool device_is_allowed(u32 device_id) 1701 { 1702 int i; 1703 1704 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) 1705 if (allowed_device_id[i] == device_id) 1706 return true; 1707 1708 return false; 1709 } 1710 1711 static bool features_is_valid(struct vduse_dev_config *config) 1712 { 1713 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 1714 return false; 1715 1716 /* Now we only support read-only configuration space */ 1717 if ((config->device_id == VIRTIO_ID_BLOCK) && 1718 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE))) 1719 return false; 1720 else if ((config->device_id == VIRTIO_ID_NET) && 1721 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1722 return false; 1723 1724 if ((config->device_id == VIRTIO_ID_NET) && 1725 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1))) 1726 return false; 1727 1728 return true; 1729 } 1730 1731 static bool vduse_validate_config(struct vduse_dev_config *config) 1732 { 1733 if (!is_mem_zero((const char *)config->reserved, 1734 sizeof(config->reserved))) 1735 return false; 1736 1737 if (config->vq_align > PAGE_SIZE) 1738 return false; 1739 1740 if (config->config_size > PAGE_SIZE) 1741 return false; 1742 1743 if (config->vq_num > 0xffff) 1744 return false; 1745 1746 if (!config->name[0]) 1747 return false; 1748 1749 if (!device_is_allowed(config->device_id)) 1750 return false; 1751 1752 if (!features_is_valid(config)) 1753 return false; 1754 1755 return true; 1756 } 1757 1758 static ssize_t msg_timeout_show(struct device *device, 1759 struct device_attribute *attr, char *buf) 1760 { 1761 struct vduse_dev *dev = dev_get_drvdata(device); 1762 1763 return sysfs_emit(buf, "%u\n", dev->msg_timeout); 1764 } 1765 1766 static ssize_t msg_timeout_store(struct device *device, 1767 struct device_attribute *attr, 1768 const char *buf, size_t count) 1769 { 1770 struct vduse_dev *dev = dev_get_drvdata(device); 1771 int ret; 1772 1773 ret = kstrtouint(buf, 10, &dev->msg_timeout); 1774 if (ret < 0) 1775 return ret; 1776 1777 return count; 1778 } 1779 1780 static DEVICE_ATTR_RW(msg_timeout); 1781 1782 static ssize_t bounce_size_show(struct device *device, 1783 struct device_attribute *attr, char *buf) 1784 { 1785 struct vduse_dev *dev = dev_get_drvdata(device); 1786 1787 return sysfs_emit(buf, "%u\n", dev->bounce_size); 1788 } 1789 1790 static ssize_t bounce_size_store(struct device *device, 1791 struct device_attribute *attr, 1792 const char *buf, size_t count) 1793 { 1794 struct vduse_dev *dev = dev_get_drvdata(device); 1795 unsigned int bounce_size; 1796 int ret; 1797 1798 ret = -EPERM; 1799 mutex_lock(&dev->domain_lock); 1800 if (dev->domain) 1801 goto unlock; 1802 1803 ret = kstrtouint(buf, 10, &bounce_size); 1804 if (ret < 0) 1805 goto unlock; 1806 1807 ret = -EINVAL; 1808 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE || 1809 bounce_size < VDUSE_MIN_BOUNCE_SIZE) 1810 goto unlock; 1811 1812 dev->bounce_size = bounce_size & PAGE_MASK; 1813 ret = count; 1814 unlock: 1815 mutex_unlock(&dev->domain_lock); 1816 return ret; 1817 } 1818 1819 static DEVICE_ATTR_RW(bounce_size); 1820 1821 static struct attribute *vduse_dev_attrs[] = { 1822 &dev_attr_msg_timeout.attr, 1823 &dev_attr_bounce_size.attr, 1824 NULL 1825 }; 1826 1827 ATTRIBUTE_GROUPS(vduse_dev); 1828 1829 static int vduse_create_dev(struct vduse_dev_config *config, 1830 void *config_buf, u64 api_version) 1831 { 1832 int ret; 1833 struct vduse_dev *dev; 1834 1835 ret = -EPERM; 1836 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN)) 1837 goto err; 1838 1839 ret = -EEXIST; 1840 if (vduse_find_dev(config->name)) 1841 goto err; 1842 1843 ret = -ENOMEM; 1844 dev = vduse_dev_create(); 1845 if (!dev) 1846 goto err; 1847 1848 dev->api_version = api_version; 1849 dev->device_features = config->features; 1850 dev->device_id = config->device_id; 1851 dev->vendor_id = config->vendor_id; 1852 dev->name = kstrdup(config->name, GFP_KERNEL); 1853 if (!dev->name) 1854 goto err_str; 1855 1856 dev->bounce_size = VDUSE_BOUNCE_SIZE; 1857 dev->config = config_buf; 1858 dev->config_size = config->config_size; 1859 1860 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); 1861 if (ret < 0) 1862 goto err_idr; 1863 1864 dev->minor = ret; 1865 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1866 dev->dev = device_create_with_groups(&vduse_class, NULL, 1867 MKDEV(MAJOR(vduse_major), dev->minor), 1868 dev, vduse_dev_groups, "%s", config->name); 1869 if (IS_ERR(dev->dev)) { 1870 ret = PTR_ERR(dev->dev); 1871 goto err_dev; 1872 } 1873 1874 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); 1875 if (ret) 1876 goto err_vqs; 1877 1878 __module_get(THIS_MODULE); 1879 1880 return 0; 1881 err_vqs: 1882 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); 1883 err_dev: 1884 idr_remove(&vduse_idr, dev->minor); 1885 err_idr: 1886 kfree(dev->name); 1887 err_str: 1888 vduse_dev_destroy(dev); 1889 err: 1890 return ret; 1891 } 1892 1893 static long vduse_ioctl(struct file *file, unsigned int cmd, 1894 unsigned long arg) 1895 { 1896 int ret; 1897 void __user *argp = (void __user *)arg; 1898 struct vduse_control *control = file->private_data; 1899 1900 mutex_lock(&vduse_lock); 1901 switch (cmd) { 1902 case VDUSE_GET_API_VERSION: 1903 ret = put_user(control->api_version, (u64 __user *)argp); 1904 break; 1905 case VDUSE_SET_API_VERSION: { 1906 u64 api_version; 1907 1908 ret = -EFAULT; 1909 if (get_user(api_version, (u64 __user *)argp)) 1910 break; 1911 1912 ret = -EINVAL; 1913 if (api_version > VDUSE_API_VERSION) 1914 break; 1915 1916 ret = 0; 1917 control->api_version = api_version; 1918 break; 1919 } 1920 case VDUSE_CREATE_DEV: { 1921 struct vduse_dev_config config; 1922 unsigned long size = offsetof(struct vduse_dev_config, config); 1923 void *buf; 1924 1925 ret = -EFAULT; 1926 if (copy_from_user(&config, argp, size)) 1927 break; 1928 1929 ret = -EINVAL; 1930 if (vduse_validate_config(&config) == false) 1931 break; 1932 1933 buf = vmemdup_user(argp + size, config.config_size); 1934 if (IS_ERR(buf)) { 1935 ret = PTR_ERR(buf); 1936 break; 1937 } 1938 config.name[VDUSE_NAME_MAX - 1] = '\0'; 1939 ret = vduse_create_dev(&config, buf, control->api_version); 1940 if (ret) 1941 kvfree(buf); 1942 break; 1943 } 1944 case VDUSE_DESTROY_DEV: { 1945 char name[VDUSE_NAME_MAX]; 1946 1947 ret = -EFAULT; 1948 if (copy_from_user(name, argp, VDUSE_NAME_MAX)) 1949 break; 1950 1951 name[VDUSE_NAME_MAX - 1] = '\0'; 1952 ret = vduse_destroy_dev(name); 1953 break; 1954 } 1955 default: 1956 ret = -EINVAL; 1957 break; 1958 } 1959 mutex_unlock(&vduse_lock); 1960 1961 return ret; 1962 } 1963 1964 static int vduse_release(struct inode *inode, struct file *file) 1965 { 1966 struct vduse_control *control = file->private_data; 1967 1968 kfree(control); 1969 return 0; 1970 } 1971 1972 static int vduse_open(struct inode *inode, struct file *file) 1973 { 1974 struct vduse_control *control; 1975 1976 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); 1977 if (!control) 1978 return -ENOMEM; 1979 1980 control->api_version = VDUSE_API_VERSION; 1981 file->private_data = control; 1982 1983 return 0; 1984 } 1985 1986 static const struct file_operations vduse_ctrl_fops = { 1987 .owner = THIS_MODULE, 1988 .open = vduse_open, 1989 .release = vduse_release, 1990 .unlocked_ioctl = vduse_ioctl, 1991 .compat_ioctl = compat_ptr_ioctl, 1992 .llseek = noop_llseek, 1993 }; 1994 1995 struct vduse_mgmt_dev { 1996 struct vdpa_mgmt_dev mgmt_dev; 1997 struct device dev; 1998 }; 1999 2000 static struct vduse_mgmt_dev *vduse_mgmt; 2001 2002 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) 2003 { 2004 struct vduse_vdpa *vdev; 2005 int ret; 2006 2007 if (dev->vdev) 2008 return -EEXIST; 2009 2010 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 2011 &vduse_vdpa_config_ops, 1, 1, name, true); 2012 if (IS_ERR(vdev)) 2013 return PTR_ERR(vdev); 2014 2015 dev->vdev = vdev; 2016 vdev->dev = dev; 2017 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; 2018 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); 2019 if (ret) { 2020 put_device(&vdev->vdpa.dev); 2021 return ret; 2022 } 2023 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); 2024 vdev->vdpa.dma_dev = &vdev->vdpa.dev; 2025 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; 2026 2027 return 0; 2028 } 2029 2030 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 2031 const struct vdpa_dev_set_config *config) 2032 { 2033 struct vduse_dev *dev; 2034 int ret; 2035 2036 mutex_lock(&vduse_lock); 2037 dev = vduse_find_dev(name); 2038 if (!dev || !vduse_dev_is_ready(dev)) { 2039 mutex_unlock(&vduse_lock); 2040 return -EINVAL; 2041 } 2042 ret = vduse_dev_init_vdpa(dev, name); 2043 mutex_unlock(&vduse_lock); 2044 if (ret) 2045 return ret; 2046 2047 mutex_lock(&dev->domain_lock); 2048 if (!dev->domain) 2049 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 2050 dev->bounce_size); 2051 mutex_unlock(&dev->domain_lock); 2052 if (!dev->domain) { 2053 put_device(&dev->vdev->vdpa.dev); 2054 return -ENOMEM; 2055 } 2056 2057 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 2058 if (ret) { 2059 put_device(&dev->vdev->vdpa.dev); 2060 mutex_lock(&dev->domain_lock); 2061 vduse_domain_destroy(dev->domain); 2062 dev->domain = NULL; 2063 mutex_unlock(&dev->domain_lock); 2064 return ret; 2065 } 2066 2067 return 0; 2068 } 2069 2070 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) 2071 { 2072 _vdpa_unregister_device(dev); 2073 } 2074 2075 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { 2076 .dev_add = vdpa_dev_add, 2077 .dev_del = vdpa_dev_del, 2078 }; 2079 2080 static struct virtio_device_id id_table[] = { 2081 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 2082 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 2083 { 0 }, 2084 }; 2085 2086 static void vduse_mgmtdev_release(struct device *dev) 2087 { 2088 struct vduse_mgmt_dev *mgmt_dev; 2089 2090 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); 2091 kfree(mgmt_dev); 2092 } 2093 2094 static int vduse_mgmtdev_init(void) 2095 { 2096 int ret; 2097 2098 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); 2099 if (!vduse_mgmt) 2100 return -ENOMEM; 2101 2102 ret = dev_set_name(&vduse_mgmt->dev, "vduse"); 2103 if (ret) { 2104 kfree(vduse_mgmt); 2105 return ret; 2106 } 2107 2108 vduse_mgmt->dev.release = vduse_mgmtdev_release; 2109 2110 ret = device_register(&vduse_mgmt->dev); 2111 if (ret) 2112 goto dev_reg_err; 2113 2114 vduse_mgmt->mgmt_dev.id_table = id_table; 2115 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; 2116 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; 2117 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); 2118 if (ret) 2119 device_unregister(&vduse_mgmt->dev); 2120 2121 return ret; 2122 2123 dev_reg_err: 2124 put_device(&vduse_mgmt->dev); 2125 return ret; 2126 } 2127 2128 static void vduse_mgmtdev_exit(void) 2129 { 2130 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); 2131 device_unregister(&vduse_mgmt->dev); 2132 } 2133 2134 static int vduse_init(void) 2135 { 2136 int ret; 2137 struct device *dev; 2138 2139 ret = class_register(&vduse_class); 2140 if (ret) 2141 return ret; 2142 2143 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 2144 if (ret) 2145 goto err_chardev_region; 2146 2147 /* /dev/vduse/control */ 2148 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); 2149 vduse_ctrl_cdev.owner = THIS_MODULE; 2150 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); 2151 if (ret) 2152 goto err_ctrl_cdev; 2153 2154 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control"); 2155 if (IS_ERR(dev)) { 2156 ret = PTR_ERR(dev); 2157 goto err_device; 2158 } 2159 2160 /* /dev/vduse/$DEVICE */ 2161 cdev_init(&vduse_cdev, &vduse_dev_fops); 2162 vduse_cdev.owner = THIS_MODULE; 2163 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), 2164 VDUSE_DEV_MAX - 1); 2165 if (ret) 2166 goto err_cdev; 2167 2168 ret = -ENOMEM; 2169 vduse_irq_wq = alloc_workqueue("vduse-irq", 2170 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); 2171 if (!vduse_irq_wq) 2172 goto err_wq; 2173 2174 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0); 2175 if (!vduse_irq_bound_wq) 2176 goto err_bound_wq; 2177 2178 ret = vduse_domain_init(); 2179 if (ret) 2180 goto err_domain; 2181 2182 ret = vduse_mgmtdev_init(); 2183 if (ret) 2184 goto err_mgmtdev; 2185 2186 return 0; 2187 err_mgmtdev: 2188 vduse_domain_exit(); 2189 err_domain: 2190 destroy_workqueue(vduse_irq_bound_wq); 2191 err_bound_wq: 2192 destroy_workqueue(vduse_irq_wq); 2193 err_wq: 2194 cdev_del(&vduse_cdev); 2195 err_cdev: 2196 device_destroy(&vduse_class, vduse_major); 2197 err_device: 2198 cdev_del(&vduse_ctrl_cdev); 2199 err_ctrl_cdev: 2200 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2201 err_chardev_region: 2202 class_unregister(&vduse_class); 2203 return ret; 2204 } 2205 module_init(vduse_init); 2206 2207 static void vduse_exit(void) 2208 { 2209 vduse_mgmtdev_exit(); 2210 vduse_domain_exit(); 2211 destroy_workqueue(vduse_irq_bound_wq); 2212 destroy_workqueue(vduse_irq_wq); 2213 cdev_del(&vduse_cdev); 2214 device_destroy(&vduse_class, vduse_major); 2215 cdev_del(&vduse_ctrl_cdev); 2216 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); 2217 class_unregister(&vduse_class); 2218 } 2219 module_exit(vduse_exit); 2220 2221 MODULE_LICENSE(DRV_LICENSE); 2222 MODULE_AUTHOR(DRV_AUTHOR); 2223 MODULE_DESCRIPTION(DRV_DESC); 2224