1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio vhost-user driver 4 * 5 * Copyright(c) 2019 Intel Corporation 6 * 7 * This driver allows virtio devices to be used over a vhost-user socket. 8 * 9 * Guest devices can be instantiated by kernel module or command line 10 * parameters. One device will be created for each parameter. Syntax: 11 * 12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>] 13 * where: 14 * <socket> := vhost-user socket path to connect 15 * <virtio_id> := virtio device id (as in virtio_ids.h) 16 * <platform_id> := (optional) platform device id 17 * 18 * example: 19 * virtio_uml.device=/var/uml.socket:1 20 * 21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. 22 */ 23 #include <linux/module.h> 24 #include <linux/platform_device.h> 25 #include <linux/slab.h> 26 #include <linux/virtio.h> 27 #include <linux/virtio_config.h> 28 #include <linux/virtio_ring.h> 29 #include <linux/time-internal.h> 30 #include <linux/virtio-uml.h> 31 #include <shared/as-layout.h> 32 #include <irq_kern.h> 33 #include <init.h> 34 #include <os.h> 35 #include "vhost_user.h" 36 37 #define MAX_SUPPORTED_QUEUE_SIZE 256 38 39 #define to_virtio_uml_device(_vdev) \ 40 container_of(_vdev, struct virtio_uml_device, vdev) 41 42 struct virtio_uml_platform_data { 43 u32 virtio_device_id; 44 const char *socket_path; 45 struct work_struct conn_broken_wk; 46 struct platform_device *pdev; 47 }; 48 49 struct virtio_uml_device { 50 struct virtio_device vdev; 51 struct platform_device *pdev; 52 53 spinlock_t sock_lock; 54 int sock, req_fd, irq; 55 u64 features; 56 u64 protocol_features; 57 u8 status; 58 u8 registered:1; 59 u8 suspended:1; 60 u8 no_vq_suspend:1; 61 62 u8 config_changed_irq:1; 63 uint64_t vq_irq_vq_map; 64 }; 65 66 struct virtio_uml_vq_info { 67 int kick_fd, call_fd; 68 char name[32]; 69 bool suspended; 70 }; 71 72 extern unsigned long long physmem_size, highmem; 73 74 #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) 75 76 /* Vhost-user protocol */ 77 78 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, 79 const int *fds, unsigned int fds_num) 80 { 81 int rc; 82 83 do { 84 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); 85 if (rc > 0) { 86 buf += rc; 87 len -= rc; 88 fds = NULL; 89 fds_num = 0; 90 } 91 } while (len && (rc >= 0 || rc == -EINTR)); 92 93 if (rc < 0) 94 return rc; 95 return 0; 96 } 97 98 static int full_read(int fd, void *buf, int len, bool abortable) 99 { 100 int rc; 101 102 if (!len) 103 return 0; 104 105 do { 106 rc = os_read_file(fd, buf, len); 107 if (rc > 0) { 108 buf += rc; 109 len -= rc; 110 } 111 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); 112 113 if (rc < 0) 114 return rc; 115 if (rc == 0) 116 return -ECONNRESET; 117 return 0; 118 } 119 120 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) 121 { 122 return full_read(fd, msg, sizeof(msg->header), true); 123 } 124 125 static int vhost_user_recv(struct virtio_uml_device *vu_dev, 126 int fd, struct vhost_user_msg *msg, 127 size_t max_payload_size, bool wait) 128 { 129 size_t size; 130 int rc; 131 132 /* 133 * In virtio time-travel mode, we're handling all the vhost-user 134 * FDs by polling them whenever appropriate. However, we may get 135 * into a situation where we're sending out an interrupt message 136 * to a device (e.g. a net device) and need to handle a simulation 137 * time message while doing so, e.g. one that tells us to update 138 * our idea of how long we can run without scheduling. 139 * 140 * Thus, we need to not just read() from the given fd, but need 141 * to also handle messages for the simulation time - this function 142 * does that for us while waiting for the given fd to be readable. 143 */ 144 if (wait) 145 time_travel_wait_readable(fd); 146 147 rc = vhost_user_recv_header(fd, msg); 148 149 if (rc == -ECONNRESET && vu_dev->registered) { 150 struct virtio_uml_platform_data *pdata; 151 152 pdata = vu_dev->pdev->dev.platform_data; 153 154 virtio_break_device(&vu_dev->vdev); 155 schedule_work(&pdata->conn_broken_wk); 156 } 157 if (rc) 158 return rc; 159 size = msg->header.size; 160 if (size > max_payload_size) 161 return -EPROTO; 162 return full_read(fd, &msg->payload, size, false); 163 } 164 165 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, 166 struct vhost_user_msg *msg, 167 size_t max_payload_size) 168 { 169 int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, 170 max_payload_size, true); 171 172 if (rc) 173 return rc; 174 175 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) 176 return -EPROTO; 177 178 return 0; 179 } 180 181 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, 182 u64 *value) 183 { 184 struct vhost_user_msg msg; 185 int rc = vhost_user_recv_resp(vu_dev, &msg, 186 sizeof(msg.payload.integer)); 187 188 if (rc) 189 return rc; 190 if (msg.header.size != sizeof(msg.payload.integer)) 191 return -EPROTO; 192 *value = msg.payload.integer; 193 return 0; 194 } 195 196 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, 197 struct vhost_user_msg *msg, 198 size_t max_payload_size) 199 { 200 int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, 201 max_payload_size, false); 202 203 if (rc) 204 return rc; 205 206 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != 207 VHOST_USER_VERSION) 208 return -EPROTO; 209 210 return 0; 211 } 212 213 static int vhost_user_send(struct virtio_uml_device *vu_dev, 214 bool need_response, struct vhost_user_msg *msg, 215 int *fds, size_t num_fds) 216 { 217 size_t size = sizeof(msg->header) + msg->header.size; 218 unsigned long flags; 219 bool request_ack; 220 int rc; 221 222 msg->header.flags |= VHOST_USER_VERSION; 223 224 /* 225 * The need_response flag indicates that we already need a response, 226 * e.g. to read the features. In these cases, don't request an ACK as 227 * it is meaningless. Also request an ACK only if supported. 228 */ 229 request_ack = !need_response; 230 if (!(vu_dev->protocol_features & 231 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) 232 request_ack = false; 233 234 if (request_ack) 235 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; 236 237 spin_lock_irqsave(&vu_dev->sock_lock, flags); 238 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); 239 if (rc < 0) 240 goto out; 241 242 if (request_ack) { 243 uint64_t status; 244 245 rc = vhost_user_recv_u64(vu_dev, &status); 246 if (rc) 247 goto out; 248 249 if (status) { 250 vu_err(vu_dev, "slave reports error: %llu\n", status); 251 rc = -EIO; 252 goto out; 253 } 254 } 255 256 out: 257 spin_unlock_irqrestore(&vu_dev->sock_lock, flags); 258 return rc; 259 } 260 261 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, 262 bool need_response, u32 request) 263 { 264 struct vhost_user_msg msg = { 265 .header.request = request, 266 }; 267 268 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); 269 } 270 271 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, 272 u32 request, int fd) 273 { 274 struct vhost_user_msg msg = { 275 .header.request = request, 276 }; 277 278 return vhost_user_send(vu_dev, false, &msg, &fd, 1); 279 } 280 281 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, 282 u32 request, u64 value) 283 { 284 struct vhost_user_msg msg = { 285 .header.request = request, 286 .header.size = sizeof(msg.payload.integer), 287 .payload.integer = value, 288 }; 289 290 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 291 } 292 293 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) 294 { 295 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); 296 } 297 298 static int vhost_user_get_features(struct virtio_uml_device *vu_dev, 299 u64 *features) 300 { 301 int rc = vhost_user_send_no_payload(vu_dev, true, 302 VHOST_USER_GET_FEATURES); 303 304 if (rc) 305 return rc; 306 return vhost_user_recv_u64(vu_dev, features); 307 } 308 309 static int vhost_user_set_features(struct virtio_uml_device *vu_dev, 310 u64 features) 311 { 312 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); 313 } 314 315 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, 316 u64 *protocol_features) 317 { 318 int rc = vhost_user_send_no_payload(vu_dev, true, 319 VHOST_USER_GET_PROTOCOL_FEATURES); 320 321 if (rc) 322 return rc; 323 return vhost_user_recv_u64(vu_dev, protocol_features); 324 } 325 326 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, 327 u64 protocol_features) 328 { 329 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, 330 protocol_features); 331 } 332 333 static void vhost_user_reply(struct virtio_uml_device *vu_dev, 334 struct vhost_user_msg *msg, int response) 335 { 336 struct vhost_user_msg reply = { 337 .payload.integer = response, 338 }; 339 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); 340 int rc; 341 342 reply.header = msg->header; 343 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; 344 reply.header.flags |= VHOST_USER_FLAG_REPLY; 345 reply.header.size = sizeof(reply.payload.integer); 346 347 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); 348 349 if (rc) 350 vu_err(vu_dev, 351 "sending reply to slave request failed: %d (size %zu)\n", 352 rc, size); 353 } 354 355 static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, 356 struct time_travel_event *ev) 357 { 358 struct virtqueue *vq; 359 int response = 1; 360 struct { 361 struct vhost_user_msg msg; 362 u8 extra_payload[512]; 363 } msg; 364 int rc; 365 366 rc = vhost_user_recv_req(vu_dev, &msg.msg, 367 sizeof(msg.msg.payload) + 368 sizeof(msg.extra_payload)); 369 370 if (rc) 371 return IRQ_NONE; 372 373 switch (msg.msg.header.request) { 374 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: 375 vu_dev->config_changed_irq = true; 376 response = 0; 377 break; 378 case VHOST_USER_SLAVE_VRING_CALL: 379 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 380 if (vq->index == msg.msg.payload.vring_state.index) { 381 response = 0; 382 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); 383 break; 384 } 385 } 386 break; 387 case VHOST_USER_SLAVE_IOTLB_MSG: 388 /* not supported - VIRTIO_F_ACCESS_PLATFORM */ 389 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 390 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ 391 default: 392 vu_err(vu_dev, "unexpected slave request %d\n", 393 msg.msg.header.request); 394 } 395 396 if (ev && !vu_dev->suspended) 397 time_travel_add_irq_event(ev); 398 399 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) 400 vhost_user_reply(vu_dev, &msg.msg, response); 401 402 return IRQ_HANDLED; 403 } 404 405 static irqreturn_t vu_req_interrupt(int irq, void *data) 406 { 407 struct virtio_uml_device *vu_dev = data; 408 irqreturn_t ret = IRQ_HANDLED; 409 410 if (!um_irq_timetravel_handler_used()) 411 ret = vu_req_read_message(vu_dev, NULL); 412 413 if (vu_dev->vq_irq_vq_map) { 414 struct virtqueue *vq; 415 416 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 417 if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) 418 vring_interrupt(0 /* ignored */, vq); 419 } 420 vu_dev->vq_irq_vq_map = 0; 421 } else if (vu_dev->config_changed_irq) { 422 virtio_config_changed(&vu_dev->vdev); 423 vu_dev->config_changed_irq = false; 424 } 425 426 return ret; 427 } 428 429 static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, 430 struct time_travel_event *ev) 431 { 432 vu_req_read_message(data, ev); 433 } 434 435 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) 436 { 437 int rc, req_fds[2]; 438 439 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ 440 rc = os_pipe(req_fds, true, true); 441 if (rc < 0) 442 return rc; 443 vu_dev->req_fd = req_fds[0]; 444 445 rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, 446 vu_req_interrupt, IRQF_SHARED, 447 vu_dev->pdev->name, vu_dev, 448 vu_req_interrupt_comm_handler); 449 if (rc < 0) 450 goto err_close; 451 452 vu_dev->irq = rc; 453 454 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, 455 req_fds[1]); 456 if (rc) 457 goto err_free_irq; 458 459 goto out; 460 461 err_free_irq: 462 um_free_irq(vu_dev->irq, vu_dev); 463 err_close: 464 os_close_file(req_fds[0]); 465 out: 466 /* Close unused write end of request fds */ 467 os_close_file(req_fds[1]); 468 return rc; 469 } 470 471 static int vhost_user_init(struct virtio_uml_device *vu_dev) 472 { 473 int rc = vhost_user_set_owner(vu_dev); 474 475 if (rc) 476 return rc; 477 rc = vhost_user_get_features(vu_dev, &vu_dev->features); 478 if (rc) 479 return rc; 480 481 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { 482 rc = vhost_user_get_protocol_features(vu_dev, 483 &vu_dev->protocol_features); 484 if (rc) 485 return rc; 486 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; 487 rc = vhost_user_set_protocol_features(vu_dev, 488 vu_dev->protocol_features); 489 if (rc) 490 return rc; 491 } 492 493 if (vu_dev->protocol_features & 494 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 495 rc = vhost_user_init_slave_req(vu_dev); 496 if (rc) 497 return rc; 498 } 499 500 return 0; 501 } 502 503 static void vhost_user_get_config(struct virtio_uml_device *vu_dev, 504 u32 offset, void *buf, u32 len) 505 { 506 u32 cfg_size = offset + len; 507 struct vhost_user_msg *msg; 508 size_t payload_size = sizeof(msg->payload.config) + cfg_size; 509 size_t msg_size = sizeof(msg->header) + payload_size; 510 int rc; 511 512 if (!(vu_dev->protocol_features & 513 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) 514 return; 515 516 msg = kzalloc(msg_size, GFP_KERNEL); 517 if (!msg) 518 return; 519 msg->header.request = VHOST_USER_GET_CONFIG; 520 msg->header.size = payload_size; 521 msg->payload.config.offset = 0; 522 msg->payload.config.size = cfg_size; 523 524 rc = vhost_user_send(vu_dev, true, msg, NULL, 0); 525 if (rc) { 526 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", 527 rc); 528 goto free; 529 } 530 531 rc = vhost_user_recv_resp(vu_dev, msg, msg_size); 532 if (rc) { 533 vu_err(vu_dev, 534 "receiving VHOST_USER_GET_CONFIG response failed: %d\n", 535 rc); 536 goto free; 537 } 538 539 if (msg->header.size != payload_size || 540 msg->payload.config.size != cfg_size) { 541 rc = -EPROTO; 542 vu_err(vu_dev, 543 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", 544 msg->header.size, payload_size, 545 msg->payload.config.size, cfg_size); 546 goto free; 547 } 548 memcpy(buf, msg->payload.config.payload + offset, len); 549 550 free: 551 kfree(msg); 552 } 553 554 static void vhost_user_set_config(struct virtio_uml_device *vu_dev, 555 u32 offset, const void *buf, u32 len) 556 { 557 struct vhost_user_msg *msg; 558 size_t payload_size = sizeof(msg->payload.config) + len; 559 size_t msg_size = sizeof(msg->header) + payload_size; 560 int rc; 561 562 if (!(vu_dev->protocol_features & 563 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) 564 return; 565 566 msg = kzalloc(msg_size, GFP_KERNEL); 567 if (!msg) 568 return; 569 msg->header.request = VHOST_USER_SET_CONFIG; 570 msg->header.size = payload_size; 571 msg->payload.config.offset = offset; 572 msg->payload.config.size = len; 573 memcpy(msg->payload.config.payload, buf, len); 574 575 rc = vhost_user_send(vu_dev, false, msg, NULL, 0); 576 if (rc) 577 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", 578 rc); 579 580 kfree(msg); 581 } 582 583 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, 584 struct vhost_user_mem_region *region_out) 585 { 586 unsigned long long mem_offset; 587 int rc = phys_mapping(addr, &mem_offset); 588 589 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) 590 return -EFAULT; 591 *fd_out = rc; 592 region_out->guest_addr = addr; 593 region_out->user_addr = addr; 594 region_out->size = size; 595 region_out->mmap_offset = mem_offset; 596 597 /* Ensure mapping is valid for the entire region */ 598 rc = phys_mapping(addr + size - 1, &mem_offset); 599 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", 600 addr + size - 1, rc, *fd_out)) 601 return -EFAULT; 602 return 0; 603 } 604 605 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) 606 { 607 struct vhost_user_msg msg = { 608 .header.request = VHOST_USER_SET_MEM_TABLE, 609 .header.size = sizeof(msg.payload.mem_regions), 610 .payload.mem_regions.num = 1, 611 }; 612 unsigned long reserved = uml_reserved - uml_physmem; 613 int fds[2]; 614 int rc; 615 616 /* 617 * This is a bit tricky, see also the comment with setup_physmem(). 618 * 619 * Essentially, setup_physmem() uses a file to mmap() our physmem, 620 * but the code and data we *already* have is omitted. To us, this 621 * is no difference, since they both become part of our address 622 * space and memory consumption. To somebody looking in from the 623 * outside, however, it is different because the part of our memory 624 * consumption that's already part of the binary (code/data) is not 625 * mapped from the file, so it's not visible to another mmap from 626 * the file descriptor. 627 * 628 * Thus, don't advertise this space to the vhost-user slave. This 629 * means that the slave will likely abort or similar when we give 630 * it an address from the hidden range, since it's not marked as 631 * a valid address, but at least that way we detect the issue and 632 * don't just have the slave read an all-zeroes buffer from the 633 * shared memory file, or write something there that we can never 634 * see (depending on the direction of the virtqueue traffic.) 635 * 636 * Since we usually don't want to use .text for virtio buffers, 637 * this effectively means that you cannot use 638 * 1) global variables, which are in the .bss and not in the shm 639 * file-backed memory 640 * 2) the stack in some processes, depending on where they have 641 * their stack (or maybe only no interrupt stack?) 642 * 643 * The stack is already not typically valid for DMA, so this isn't 644 * much of a restriction, but global variables might be encountered. 645 * 646 * It might be possible to fix it by copying around the data that's 647 * between bss_start and where we map the file now, but it's not 648 * something that you typically encounter with virtio drivers, so 649 * it didn't seem worthwhile. 650 */ 651 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, 652 &fds[0], 653 &msg.payload.mem_regions.regions[0]); 654 655 if (rc < 0) 656 return rc; 657 if (highmem) { 658 msg.payload.mem_regions.num++; 659 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, 660 &fds[1], &msg.payload.mem_regions.regions[1]); 661 if (rc < 0) 662 return rc; 663 } 664 665 return vhost_user_send(vu_dev, false, &msg, fds, 666 msg.payload.mem_regions.num); 667 } 668 669 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, 670 u32 request, u32 index, u32 num) 671 { 672 struct vhost_user_msg msg = { 673 .header.request = request, 674 .header.size = sizeof(msg.payload.vring_state), 675 .payload.vring_state.index = index, 676 .payload.vring_state.num = num, 677 }; 678 679 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 680 } 681 682 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, 683 u32 index, u32 num) 684 { 685 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, 686 index, num); 687 } 688 689 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, 690 u32 index, u32 offset) 691 { 692 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, 693 index, offset); 694 } 695 696 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, 697 u32 index, u64 desc, u64 used, u64 avail, 698 u64 log) 699 { 700 struct vhost_user_msg msg = { 701 .header.request = VHOST_USER_SET_VRING_ADDR, 702 .header.size = sizeof(msg.payload.vring_addr), 703 .payload.vring_addr.index = index, 704 .payload.vring_addr.desc = desc, 705 .payload.vring_addr.used = used, 706 .payload.vring_addr.avail = avail, 707 .payload.vring_addr.log = log, 708 }; 709 710 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 711 } 712 713 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, 714 u32 request, int index, int fd) 715 { 716 struct vhost_user_msg msg = { 717 .header.request = request, 718 .header.size = sizeof(msg.payload.integer), 719 .payload.integer = index, 720 }; 721 722 if (index & ~VHOST_USER_VRING_INDEX_MASK) 723 return -EINVAL; 724 if (fd < 0) { 725 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; 726 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 727 } 728 return vhost_user_send(vu_dev, false, &msg, &fd, 1); 729 } 730 731 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, 732 int index, int fd) 733 { 734 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, 735 index, fd); 736 } 737 738 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, 739 int index, int fd) 740 { 741 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, 742 index, fd); 743 } 744 745 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, 746 u32 index, bool enable) 747 { 748 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) 749 return 0; 750 751 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, 752 index, enable); 753 } 754 755 756 /* Virtio interface */ 757 758 static bool vu_notify(struct virtqueue *vq) 759 { 760 struct virtio_uml_vq_info *info = vq->priv; 761 const uint64_t n = 1; 762 int rc; 763 764 if (info->suspended) 765 return true; 766 767 time_travel_propagate_time(); 768 769 if (info->kick_fd < 0) { 770 struct virtio_uml_device *vu_dev; 771 772 vu_dev = to_virtio_uml_device(vq->vdev); 773 774 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, 775 vq->index, 0) == 0; 776 } 777 778 do { 779 rc = os_write_file(info->kick_fd, &n, sizeof(n)); 780 } while (rc == -EINTR); 781 return !WARN(rc != sizeof(n), "write returned %d\n", rc); 782 } 783 784 static irqreturn_t vu_interrupt(int irq, void *opaque) 785 { 786 struct virtqueue *vq = opaque; 787 struct virtio_uml_vq_info *info = vq->priv; 788 uint64_t n; 789 int rc; 790 irqreturn_t ret = IRQ_NONE; 791 792 do { 793 rc = os_read_file(info->call_fd, &n, sizeof(n)); 794 if (rc == sizeof(n)) 795 ret |= vring_interrupt(irq, vq); 796 } while (rc == sizeof(n) || rc == -EINTR); 797 WARN(rc != -EAGAIN, "read returned %d\n", rc); 798 return ret; 799 } 800 801 802 static void vu_get(struct virtio_device *vdev, unsigned offset, 803 void *buf, unsigned len) 804 { 805 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 806 807 vhost_user_get_config(vu_dev, offset, buf, len); 808 } 809 810 static void vu_set(struct virtio_device *vdev, unsigned offset, 811 const void *buf, unsigned len) 812 { 813 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 814 815 vhost_user_set_config(vu_dev, offset, buf, len); 816 } 817 818 static u8 vu_get_status(struct virtio_device *vdev) 819 { 820 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 821 822 return vu_dev->status; 823 } 824 825 static void vu_set_status(struct virtio_device *vdev, u8 status) 826 { 827 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 828 829 vu_dev->status = status; 830 } 831 832 static void vu_reset(struct virtio_device *vdev) 833 { 834 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 835 836 vu_dev->status = 0; 837 } 838 839 static void vu_del_vq(struct virtqueue *vq) 840 { 841 struct virtio_uml_vq_info *info = vq->priv; 842 843 if (info->call_fd >= 0) { 844 struct virtio_uml_device *vu_dev; 845 846 vu_dev = to_virtio_uml_device(vq->vdev); 847 848 um_free_irq(vu_dev->irq, vq); 849 os_close_file(info->call_fd); 850 } 851 852 if (info->kick_fd >= 0) 853 os_close_file(info->kick_fd); 854 855 vring_del_virtqueue(vq); 856 kfree(info); 857 } 858 859 static void vu_del_vqs(struct virtio_device *vdev) 860 { 861 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 862 struct virtqueue *vq, *n; 863 u64 features; 864 865 /* Note: reverse order as a workaround to a decoding bug in snabb */ 866 list_for_each_entry_reverse(vq, &vdev->vqs, list) 867 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); 868 869 /* Ensure previous messages have been processed */ 870 WARN_ON(vhost_user_get_features(vu_dev, &features)); 871 872 list_for_each_entry_safe(vq, n, &vdev->vqs, list) 873 vu_del_vq(vq); 874 } 875 876 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, 877 struct virtqueue *vq) 878 { 879 struct virtio_uml_vq_info *info = vq->priv; 880 int call_fds[2]; 881 int rc; 882 883 /* no call FD needed/desired in this case */ 884 if (vu_dev->protocol_features & 885 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && 886 vu_dev->protocol_features & 887 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 888 info->call_fd = -1; 889 return 0; 890 } 891 892 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ 893 rc = os_pipe(call_fds, true, true); 894 if (rc < 0) 895 return rc; 896 897 info->call_fd = call_fds[0]; 898 rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, 899 vu_interrupt, IRQF_SHARED, info->name, vq); 900 if (rc < 0) 901 goto close_both; 902 903 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); 904 if (rc) 905 goto release_irq; 906 907 goto out; 908 909 release_irq: 910 um_free_irq(vu_dev->irq, vq); 911 close_both: 912 os_close_file(call_fds[0]); 913 out: 914 /* Close (unused) write end of call fds */ 915 os_close_file(call_fds[1]); 916 917 return rc; 918 } 919 920 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, 921 unsigned index, vq_callback_t *callback, 922 const char *name, bool ctx) 923 { 924 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 925 struct platform_device *pdev = vu_dev->pdev; 926 struct virtio_uml_vq_info *info; 927 struct virtqueue *vq; 928 int num = MAX_SUPPORTED_QUEUE_SIZE; 929 int rc; 930 931 info = kzalloc(sizeof(*info), GFP_KERNEL); 932 if (!info) { 933 rc = -ENOMEM; 934 goto error_kzalloc; 935 } 936 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, 937 pdev->id, name); 938 939 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, 940 ctx, vu_notify, callback, info->name); 941 if (!vq) { 942 rc = -ENOMEM; 943 goto error_create; 944 } 945 vq->priv = info; 946 num = virtqueue_get_vring_size(vq); 947 948 if (vu_dev->protocol_features & 949 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { 950 info->kick_fd = -1; 951 } else { 952 rc = os_eventfd(0, 0); 953 if (rc < 0) 954 goto error_kick; 955 info->kick_fd = rc; 956 } 957 958 rc = vu_setup_vq_call_fd(vu_dev, vq); 959 if (rc) 960 goto error_call; 961 962 rc = vhost_user_set_vring_num(vu_dev, index, num); 963 if (rc) 964 goto error_setup; 965 966 rc = vhost_user_set_vring_base(vu_dev, index, 0); 967 if (rc) 968 goto error_setup; 969 970 rc = vhost_user_set_vring_addr(vu_dev, index, 971 virtqueue_get_desc_addr(vq), 972 virtqueue_get_used_addr(vq), 973 virtqueue_get_avail_addr(vq), 974 (u64) -1); 975 if (rc) 976 goto error_setup; 977 978 return vq; 979 980 error_setup: 981 if (info->call_fd >= 0) { 982 um_free_irq(vu_dev->irq, vq); 983 os_close_file(info->call_fd); 984 } 985 error_call: 986 if (info->kick_fd >= 0) 987 os_close_file(info->kick_fd); 988 error_kick: 989 vring_del_virtqueue(vq); 990 error_create: 991 kfree(info); 992 error_kzalloc: 993 return ERR_PTR(rc); 994 } 995 996 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, 997 struct virtqueue *vqs[], vq_callback_t *callbacks[], 998 const char * const names[], const bool *ctx, 999 struct irq_affinity *desc) 1000 { 1001 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1002 int i, queue_idx = 0, rc; 1003 struct virtqueue *vq; 1004 1005 /* not supported for now */ 1006 if (WARN_ON(nvqs > 64)) 1007 return -EINVAL; 1008 1009 rc = vhost_user_set_mem_table(vu_dev); 1010 if (rc) 1011 return rc; 1012 1013 for (i = 0; i < nvqs; ++i) { 1014 if (!names[i]) { 1015 vqs[i] = NULL; 1016 continue; 1017 } 1018 1019 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], 1020 ctx ? ctx[i] : false); 1021 if (IS_ERR(vqs[i])) { 1022 rc = PTR_ERR(vqs[i]); 1023 goto error_setup; 1024 } 1025 } 1026 1027 list_for_each_entry(vq, &vdev->vqs, list) { 1028 struct virtio_uml_vq_info *info = vq->priv; 1029 1030 if (info->kick_fd >= 0) { 1031 rc = vhost_user_set_vring_kick(vu_dev, vq->index, 1032 info->kick_fd); 1033 if (rc) 1034 goto error_setup; 1035 } 1036 1037 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); 1038 if (rc) 1039 goto error_setup; 1040 } 1041 1042 return 0; 1043 1044 error_setup: 1045 vu_del_vqs(vdev); 1046 return rc; 1047 } 1048 1049 static u64 vu_get_features(struct virtio_device *vdev) 1050 { 1051 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1052 1053 return vu_dev->features; 1054 } 1055 1056 static int vu_finalize_features(struct virtio_device *vdev) 1057 { 1058 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1059 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; 1060 1061 vring_transport_features(vdev); 1062 vu_dev->features = vdev->features | supported; 1063 1064 return vhost_user_set_features(vu_dev, vu_dev->features); 1065 } 1066 1067 static const char *vu_bus_name(struct virtio_device *vdev) 1068 { 1069 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1070 1071 return vu_dev->pdev->name; 1072 } 1073 1074 static const struct virtio_config_ops virtio_uml_config_ops = { 1075 .get = vu_get, 1076 .set = vu_set, 1077 .get_status = vu_get_status, 1078 .set_status = vu_set_status, 1079 .reset = vu_reset, 1080 .find_vqs = vu_find_vqs, 1081 .del_vqs = vu_del_vqs, 1082 .get_features = vu_get_features, 1083 .finalize_features = vu_finalize_features, 1084 .bus_name = vu_bus_name, 1085 }; 1086 1087 static void virtio_uml_release_dev(struct device *d) 1088 { 1089 struct virtio_device *vdev = 1090 container_of(d, struct virtio_device, dev); 1091 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1092 1093 /* might not have been opened due to not negotiating the feature */ 1094 if (vu_dev->req_fd >= 0) { 1095 um_free_irq(vu_dev->irq, vu_dev); 1096 os_close_file(vu_dev->req_fd); 1097 } 1098 1099 os_close_file(vu_dev->sock); 1100 kfree(vu_dev); 1101 } 1102 1103 void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, 1104 bool no_vq_suspend) 1105 { 1106 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1107 1108 if (WARN_ON(vdev->config != &virtio_uml_config_ops)) 1109 return; 1110 1111 vu_dev->no_vq_suspend = no_vq_suspend; 1112 dev_info(&vdev->dev, "%sabled VQ suspend\n", 1113 no_vq_suspend ? "dis" : "en"); 1114 } 1115 1116 /* Platform device */ 1117 1118 static int virtio_uml_probe(struct platform_device *pdev) 1119 { 1120 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1121 struct virtio_uml_device *vu_dev; 1122 int rc; 1123 1124 if (!pdata) 1125 return -EINVAL; 1126 1127 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); 1128 if (!vu_dev) 1129 return -ENOMEM; 1130 1131 vu_dev->vdev.dev.parent = &pdev->dev; 1132 vu_dev->vdev.dev.release = virtio_uml_release_dev; 1133 vu_dev->vdev.config = &virtio_uml_config_ops; 1134 vu_dev->vdev.id.device = pdata->virtio_device_id; 1135 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; 1136 vu_dev->pdev = pdev; 1137 vu_dev->req_fd = -1; 1138 1139 do { 1140 rc = os_connect_socket(pdata->socket_path); 1141 } while (rc == -EINTR); 1142 if (rc < 0) 1143 goto error_free; 1144 vu_dev->sock = rc; 1145 1146 spin_lock_init(&vu_dev->sock_lock); 1147 1148 rc = vhost_user_init(vu_dev); 1149 if (rc) 1150 goto error_init; 1151 1152 platform_set_drvdata(pdev, vu_dev); 1153 1154 device_set_wakeup_capable(&vu_dev->vdev.dev, true); 1155 1156 rc = register_virtio_device(&vu_dev->vdev); 1157 if (rc) 1158 put_device(&vu_dev->vdev.dev); 1159 vu_dev->registered = 1; 1160 return rc; 1161 1162 error_init: 1163 os_close_file(vu_dev->sock); 1164 error_free: 1165 kfree(vu_dev); 1166 return rc; 1167 } 1168 1169 static int virtio_uml_remove(struct platform_device *pdev) 1170 { 1171 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1172 1173 unregister_virtio_device(&vu_dev->vdev); 1174 return 0; 1175 } 1176 1177 /* Command line device list */ 1178 1179 static void vu_cmdline_release_dev(struct device *d) 1180 { 1181 } 1182 1183 static struct device vu_cmdline_parent = { 1184 .init_name = "virtio-uml-cmdline", 1185 .release = vu_cmdline_release_dev, 1186 }; 1187 1188 static bool vu_cmdline_parent_registered; 1189 static int vu_cmdline_id; 1190 1191 static int vu_unregister_cmdline_device(struct device *dev, void *data) 1192 { 1193 struct platform_device *pdev = to_platform_device(dev); 1194 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1195 1196 kfree(pdata->socket_path); 1197 platform_device_unregister(pdev); 1198 return 0; 1199 } 1200 1201 static void vu_conn_broken(struct work_struct *wk) 1202 { 1203 struct virtio_uml_platform_data *pdata; 1204 1205 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); 1206 vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); 1207 } 1208 1209 static int vu_cmdline_set(const char *device, const struct kernel_param *kp) 1210 { 1211 const char *ids = strchr(device, ':'); 1212 unsigned int virtio_device_id; 1213 int processed, consumed, err; 1214 char *socket_path; 1215 struct virtio_uml_platform_data pdata, *ppdata; 1216 struct platform_device *pdev; 1217 1218 if (!ids || ids == device) 1219 return -EINVAL; 1220 1221 processed = sscanf(ids, ":%u%n:%d%n", 1222 &virtio_device_id, &consumed, 1223 &vu_cmdline_id, &consumed); 1224 1225 if (processed < 1 || ids[consumed]) 1226 return -EINVAL; 1227 1228 if (!vu_cmdline_parent_registered) { 1229 err = device_register(&vu_cmdline_parent); 1230 if (err) { 1231 pr_err("Failed to register parent device!\n"); 1232 put_device(&vu_cmdline_parent); 1233 return err; 1234 } 1235 vu_cmdline_parent_registered = true; 1236 } 1237 1238 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); 1239 if (!socket_path) 1240 return -ENOMEM; 1241 1242 pdata.virtio_device_id = (u32) virtio_device_id; 1243 pdata.socket_path = socket_path; 1244 1245 pr_info("Registering device virtio-uml.%d id=%d at %s\n", 1246 vu_cmdline_id, virtio_device_id, socket_path); 1247 1248 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", 1249 vu_cmdline_id++, &pdata, 1250 sizeof(pdata)); 1251 err = PTR_ERR_OR_ZERO(pdev); 1252 if (err) 1253 goto free; 1254 1255 ppdata = pdev->dev.platform_data; 1256 ppdata->pdev = pdev; 1257 INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); 1258 1259 return 0; 1260 1261 free: 1262 kfree(socket_path); 1263 return err; 1264 } 1265 1266 static int vu_cmdline_get_device(struct device *dev, void *data) 1267 { 1268 struct platform_device *pdev = to_platform_device(dev); 1269 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1270 char *buffer = data; 1271 unsigned int len = strlen(buffer); 1272 1273 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", 1274 pdata->socket_path, pdata->virtio_device_id, pdev->id); 1275 return 0; 1276 } 1277 1278 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) 1279 { 1280 buffer[0] = '\0'; 1281 if (vu_cmdline_parent_registered) 1282 device_for_each_child(&vu_cmdline_parent, buffer, 1283 vu_cmdline_get_device); 1284 return strlen(buffer) + 1; 1285 } 1286 1287 static const struct kernel_param_ops vu_cmdline_param_ops = { 1288 .set = vu_cmdline_set, 1289 .get = vu_cmdline_get, 1290 }; 1291 1292 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); 1293 __uml_help(vu_cmdline_param_ops, 1294 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n" 1295 " Configure a virtio device over a vhost-user socket.\n" 1296 " See virtio_ids.h for a list of possible virtio device id values.\n" 1297 " Optionally use a specific platform_device id.\n\n" 1298 ); 1299 1300 1301 static void vu_unregister_cmdline_devices(void) 1302 { 1303 if (vu_cmdline_parent_registered) { 1304 device_for_each_child(&vu_cmdline_parent, NULL, 1305 vu_unregister_cmdline_device); 1306 device_unregister(&vu_cmdline_parent); 1307 vu_cmdline_parent_registered = false; 1308 } 1309 } 1310 1311 /* Platform driver */ 1312 1313 static const struct of_device_id virtio_uml_match[] = { 1314 { .compatible = "virtio,uml", }, 1315 { } 1316 }; 1317 MODULE_DEVICE_TABLE(of, virtio_uml_match); 1318 1319 static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) 1320 { 1321 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1322 1323 if (!vu_dev->no_vq_suspend) { 1324 struct virtqueue *vq; 1325 1326 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 1327 struct virtio_uml_vq_info *info = vq->priv; 1328 1329 info->suspended = true; 1330 vhost_user_set_vring_enable(vu_dev, vq->index, false); 1331 } 1332 } 1333 1334 if (!device_may_wakeup(&vu_dev->vdev.dev)) { 1335 vu_dev->suspended = true; 1336 return 0; 1337 } 1338 1339 return irq_set_irq_wake(vu_dev->irq, 1); 1340 } 1341 1342 static int virtio_uml_resume(struct platform_device *pdev) 1343 { 1344 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1345 1346 if (!vu_dev->no_vq_suspend) { 1347 struct virtqueue *vq; 1348 1349 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 1350 struct virtio_uml_vq_info *info = vq->priv; 1351 1352 info->suspended = false; 1353 vhost_user_set_vring_enable(vu_dev, vq->index, true); 1354 } 1355 } 1356 1357 vu_dev->suspended = false; 1358 1359 if (!device_may_wakeup(&vu_dev->vdev.dev)) 1360 return 0; 1361 1362 return irq_set_irq_wake(vu_dev->irq, 0); 1363 } 1364 1365 static struct platform_driver virtio_uml_driver = { 1366 .probe = virtio_uml_probe, 1367 .remove = virtio_uml_remove, 1368 .driver = { 1369 .name = "virtio-uml", 1370 .of_match_table = virtio_uml_match, 1371 }, 1372 .suspend = virtio_uml_suspend, 1373 .resume = virtio_uml_resume, 1374 }; 1375 1376 static int __init virtio_uml_init(void) 1377 { 1378 return platform_driver_register(&virtio_uml_driver); 1379 } 1380 1381 static void __exit virtio_uml_exit(void) 1382 { 1383 platform_driver_unregister(&virtio_uml_driver); 1384 vu_unregister_cmdline_devices(); 1385 } 1386 1387 module_init(virtio_uml_init); 1388 module_exit(virtio_uml_exit); 1389 __uml_exitcall(virtio_uml_exit); 1390 1391 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); 1392 MODULE_LICENSE("GPL"); 1393