1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VMware VMCI Driver 4 * 5 * Copyright (C) 2012 VMware, Inc. All rights reserved. 6 */ 7 8 #include <linux/vmw_vmci_defs.h> 9 #include <linux/vmw_vmci_api.h> 10 #include <linux/moduleparam.h> 11 #include <linux/interrupt.h> 12 #include <linux/highmem.h> 13 #include <linux/kernel.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/processor.h> 17 #include <linux/sched.h> 18 #include <linux/slab.h> 19 #include <linux/init.h> 20 #include <linux/pci.h> 21 #include <linux/smp.h> 22 #include <linux/io.h> 23 #include <linux/vmalloc.h> 24 25 #include "vmci_datagram.h" 26 #include "vmci_doorbell.h" 27 #include "vmci_context.h" 28 #include "vmci_driver.h" 29 #include "vmci_event.h" 30 31 #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 32 33 #define VMCI_UTIL_NUM_RESOURCES 1 34 35 /* 36 * Datagram buffers for DMA send/receive must accommodate at least 37 * a maximum sized datagram and the header. 38 */ 39 #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) 40 41 static bool vmci_disable_msi; 42 module_param_named(disable_msi, vmci_disable_msi, bool, 0); 43 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); 44 45 static bool vmci_disable_msix; 46 module_param_named(disable_msix, vmci_disable_msix, bool, 0); 47 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); 48 49 static u32 ctx_update_sub_id = VMCI_INVALID_ID; 50 static u32 vm_context_id = VMCI_INVALID_ID; 51 52 struct vmci_guest_device { 53 struct device *dev; /* PCI device we are attached to */ 54 void __iomem *iobase; 55 void __iomem *mmio_base; 56 57 bool exclusive_vectors; 58 59 struct wait_queue_head inout_wq; 60 61 void *data_buffer; 62 dma_addr_t data_buffer_base; 63 void *tx_buffer; 64 dma_addr_t tx_buffer_base; 65 void *notification_bitmap; 66 dma_addr_t notification_base; 67 }; 68 69 static bool use_ppn64; 70 71 bool vmci_use_ppn64(void) 72 { 73 return use_ppn64; 74 } 75 76 /* vmci_dev singleton device and supporting data*/ 77 struct pci_dev *vmci_pdev; 78 static struct vmci_guest_device *vmci_dev_g; 79 static DEFINE_SPINLOCK(vmci_dev_spinlock); 80 81 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); 82 83 bool vmci_guest_code_active(void) 84 { 85 return atomic_read(&vmci_num_guest_devices) != 0; 86 } 87 88 u32 vmci_get_vm_context_id(void) 89 { 90 if (vm_context_id == VMCI_INVALID_ID) { 91 struct vmci_datagram get_cid_msg; 92 get_cid_msg.dst = 93 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 94 VMCI_GET_CONTEXT_ID); 95 get_cid_msg.src = VMCI_ANON_SRC_HANDLE; 96 get_cid_msg.payload_size = 0; 97 vm_context_id = vmci_send_datagram(&get_cid_msg); 98 } 99 return vm_context_id; 100 } 101 102 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) 103 { 104 if (dev->mmio_base != NULL) 105 return readl(dev->mmio_base + reg); 106 return ioread32(dev->iobase + reg); 107 } 108 109 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) 110 { 111 if (dev->mmio_base != NULL) 112 writel(val, dev->mmio_base + reg); 113 else 114 iowrite32(val, dev->iobase + reg); 115 } 116 117 static void vmci_read_data(struct vmci_guest_device *vmci_dev, 118 void *dest, size_t size) 119 { 120 if (vmci_dev->mmio_base == NULL) 121 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, 122 dest, size); 123 else { 124 /* 125 * For DMA datagrams, the data_buffer will contain the header on the 126 * first page, followed by the incoming datagram(s) on the following 127 * pages. The header uses an S/G element immediately following the 128 * header on the first page to point to the data area. 129 */ 130 struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; 131 struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); 132 size_t buffer_offset = dest - vmci_dev->data_buffer; 133 134 buffer_header->opcode = 1; 135 buffer_header->size = 1; 136 buffer_header->busy = 0; 137 sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; 138 sg_array[0].size = size; 139 140 vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), 141 VMCI_DATA_IN_LOW_ADDR); 142 143 wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); 144 } 145 } 146 147 static int vmci_write_data(struct vmci_guest_device *dev, 148 struct vmci_datagram *dg) 149 { 150 int result; 151 152 if (dev->mmio_base != NULL) { 153 struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; 154 u8 *dg_out_buffer = (u8 *)(buffer_header + 1); 155 156 if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) 157 return VMCI_ERROR_INVALID_ARGS; 158 159 /* 160 * Initialize send buffer with outgoing datagram 161 * and set up header for inline data. Device will 162 * not access buffer asynchronously - only after 163 * the write to VMCI_DATA_OUT_LOW_ADDR. 164 */ 165 memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); 166 buffer_header->opcode = 0; 167 buffer_header->size = VMCI_DG_SIZE(dg); 168 buffer_header->busy = 1; 169 170 vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), 171 VMCI_DATA_OUT_LOW_ADDR); 172 173 /* Caller holds a spinlock, so cannot block. */ 174 spin_until_cond(buffer_header->busy == 0); 175 176 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 177 if (result == VMCI_SUCCESS) 178 result = (int)buffer_header->result; 179 } else { 180 iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, 181 dg, VMCI_DG_SIZE(dg)); 182 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 183 } 184 185 return result; 186 } 187 188 /* 189 * VM to hypervisor call mechanism. We use the standard VMware naming 190 * convention since shared code is calling this function as well. 191 */ 192 int vmci_send_datagram(struct vmci_datagram *dg) 193 { 194 unsigned long flags; 195 int result; 196 197 /* Check args. */ 198 if (dg == NULL) 199 return VMCI_ERROR_INVALID_ARGS; 200 201 /* 202 * Need to acquire spinlock on the device because the datagram 203 * data may be spread over multiple pages and the monitor may 204 * interleave device user rpc calls from multiple 205 * VCPUs. Acquiring the spinlock precludes that 206 * possibility. Disabling interrupts to avoid incoming 207 * datagrams during a "rep out" and possibly landing up in 208 * this function. 209 */ 210 spin_lock_irqsave(&vmci_dev_spinlock, flags); 211 212 if (vmci_dev_g) { 213 vmci_write_data(vmci_dev_g, dg); 214 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 215 } else { 216 result = VMCI_ERROR_UNAVAILABLE; 217 } 218 219 spin_unlock_irqrestore(&vmci_dev_spinlock, flags); 220 221 return result; 222 } 223 EXPORT_SYMBOL_GPL(vmci_send_datagram); 224 225 /* 226 * Gets called with the new context id if updated or resumed. 227 * Context id. 228 */ 229 static void vmci_guest_cid_update(u32 sub_id, 230 const struct vmci_event_data *event_data, 231 void *client_data) 232 { 233 const struct vmci_event_payld_ctx *ev_payload = 234 vmci_event_data_const_payload(event_data); 235 236 if (sub_id != ctx_update_sub_id) { 237 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); 238 return; 239 } 240 241 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { 242 pr_devel("Invalid event data\n"); 243 return; 244 } 245 246 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", 247 vm_context_id, ev_payload->context_id, event_data->event); 248 249 vm_context_id = ev_payload->context_id; 250 } 251 252 /* 253 * Verify that the host supports the hypercalls we need. If it does not, 254 * try to find fallback hypercalls and use those instead. Returns 0 if 255 * required hypercalls (or fallback hypercalls) are supported by the host, 256 * an error code otherwise. 257 */ 258 static int vmci_check_host_caps(struct pci_dev *pdev) 259 { 260 bool result; 261 struct vmci_resource_query_msg *msg; 262 u32 msg_size = sizeof(struct vmci_resource_query_hdr) + 263 VMCI_UTIL_NUM_RESOURCES * sizeof(u32); 264 struct vmci_datagram *check_msg; 265 266 check_msg = kzalloc(msg_size, GFP_KERNEL); 267 if (!check_msg) { 268 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); 269 return -ENOMEM; 270 } 271 272 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 273 VMCI_RESOURCES_QUERY); 274 check_msg->src = VMCI_ANON_SRC_HANDLE; 275 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; 276 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); 277 278 msg->num_resources = VMCI_UTIL_NUM_RESOURCES; 279 msg->resources[0] = VMCI_GET_CONTEXT_ID; 280 281 /* Checks that hyper calls are supported */ 282 result = vmci_send_datagram(check_msg) == 0x01; 283 kfree(check_msg); 284 285 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", 286 __func__, result ? "PASSED" : "FAILED"); 287 288 /* We need the vector. There are no fallbacks. */ 289 return result ? 0 : -ENXIO; 290 } 291 292 /* 293 * Reads datagrams from the device and dispatches them. For IO port 294 * based access to the device, we always start reading datagrams into 295 * only the first page of the datagram buffer. If the datagrams don't 296 * fit into one page, we use the maximum datagram buffer size for the 297 * remainder of the invocation. This is a simple heuristic for not 298 * penalizing small datagrams. For DMA-based datagrams, we always 299 * use the maximum datagram buffer size, since there is no performance 300 * penalty for doing so. 301 * 302 * This function assumes that it has exclusive access to the data 303 * in register(s) for the duration of the call. 304 */ 305 static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev) 306 { 307 u8 *dg_in_buffer = vmci_dev->data_buffer; 308 struct vmci_datagram *dg; 309 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; 310 size_t current_dg_in_buffer_size; 311 size_t remaining_bytes; 312 bool is_io_port = vmci_dev->mmio_base == NULL; 313 314 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); 315 316 if (!is_io_port) { 317 /* For mmio, the first page is used for the header. */ 318 dg_in_buffer += PAGE_SIZE; 319 320 /* 321 * For DMA-based datagram operations, there is no performance 322 * penalty for reading the maximum buffer size. 323 */ 324 current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; 325 } else { 326 current_dg_in_buffer_size = PAGE_SIZE; 327 } 328 vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); 329 dg = (struct vmci_datagram *)dg_in_buffer; 330 remaining_bytes = current_dg_in_buffer_size; 331 332 /* 333 * Read through the buffer until an invalid datagram header is 334 * encountered. The exit condition for datagrams read through 335 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram 336 * can start on any page boundary in the buffer. 337 */ 338 while (dg->dst.resource != VMCI_INVALID_ID || 339 (is_io_port && remaining_bytes > PAGE_SIZE)) { 340 unsigned dg_in_size; 341 342 /* 343 * If using VMCI_DATA_IN_ADDR, skip to the next page 344 * as a datagram can start on any page boundary. 345 */ 346 if (dg->dst.resource == VMCI_INVALID_ID) { 347 dg = (struct vmci_datagram *)roundup( 348 (uintptr_t)dg + 1, PAGE_SIZE); 349 remaining_bytes = 350 (size_t)(dg_in_buffer + 351 current_dg_in_buffer_size - 352 (u8 *)dg); 353 continue; 354 } 355 356 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); 357 358 if (dg_in_size <= dg_in_buffer_size) { 359 int result; 360 361 /* 362 * If the remaining bytes in the datagram 363 * buffer doesn't contain the complete 364 * datagram, we first make sure we have enough 365 * room for it and then we read the reminder 366 * of the datagram and possibly any following 367 * datagrams. 368 */ 369 if (dg_in_size > remaining_bytes) { 370 if (remaining_bytes != 371 current_dg_in_buffer_size) { 372 373 /* 374 * We move the partial 375 * datagram to the front and 376 * read the reminder of the 377 * datagram and possibly 378 * following calls into the 379 * following bytes. 380 */ 381 memmove(dg_in_buffer, dg_in_buffer + 382 current_dg_in_buffer_size - 383 remaining_bytes, 384 remaining_bytes); 385 dg = (struct vmci_datagram *) 386 dg_in_buffer; 387 } 388 389 if (current_dg_in_buffer_size != 390 dg_in_buffer_size) 391 current_dg_in_buffer_size = 392 dg_in_buffer_size; 393 394 vmci_read_data(vmci_dev, 395 dg_in_buffer + 396 remaining_bytes, 397 current_dg_in_buffer_size - 398 remaining_bytes); 399 } 400 401 /* 402 * We special case event datagrams from the 403 * hypervisor. 404 */ 405 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && 406 dg->dst.resource == VMCI_EVENT_HANDLER) { 407 result = vmci_event_dispatch(dg); 408 } else { 409 result = vmci_datagram_invoke_guest_handler(dg); 410 } 411 if (result < VMCI_SUCCESS) 412 dev_dbg(vmci_dev->dev, 413 "Datagram with resource (ID=0x%x) failed (err=%d)\n", 414 dg->dst.resource, result); 415 416 /* On to the next datagram. */ 417 dg = (struct vmci_datagram *)((u8 *)dg + 418 dg_in_size); 419 } else { 420 size_t bytes_to_skip; 421 422 /* 423 * Datagram doesn't fit in datagram buffer of maximal 424 * size. We drop it. 425 */ 426 dev_dbg(vmci_dev->dev, 427 "Failed to receive datagram (size=%u bytes)\n", 428 dg_in_size); 429 430 bytes_to_skip = dg_in_size - remaining_bytes; 431 if (current_dg_in_buffer_size != dg_in_buffer_size) 432 current_dg_in_buffer_size = dg_in_buffer_size; 433 434 for (;;) { 435 vmci_read_data(vmci_dev, dg_in_buffer, 436 current_dg_in_buffer_size); 437 if (bytes_to_skip <= current_dg_in_buffer_size) 438 break; 439 440 bytes_to_skip -= current_dg_in_buffer_size; 441 } 442 dg = (struct vmci_datagram *)(dg_in_buffer + 443 bytes_to_skip); 444 } 445 446 remaining_bytes = 447 (size_t) (dg_in_buffer + current_dg_in_buffer_size - 448 (u8 *)dg); 449 450 if (remaining_bytes < VMCI_DG_HEADERSIZE) { 451 /* Get the next batch of datagrams. */ 452 453 vmci_read_data(vmci_dev, dg_in_buffer, 454 current_dg_in_buffer_size); 455 dg = (struct vmci_datagram *)dg_in_buffer; 456 remaining_bytes = current_dg_in_buffer_size; 457 } 458 } 459 } 460 461 /* 462 * Scans the notification bitmap for raised flags, clears them 463 * and handles the notifications. 464 */ 465 static void vmci_process_bitmap(struct vmci_guest_device *dev) 466 { 467 if (!dev->notification_bitmap) { 468 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); 469 return; 470 } 471 472 vmci_dbell_scan_notification_entries(dev->notification_bitmap); 473 } 474 475 /* 476 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X 477 * interrupt (vector VMCI_INTR_DATAGRAM). 478 */ 479 static irqreturn_t vmci_interrupt(int irq, void *_dev) 480 { 481 struct vmci_guest_device *dev = _dev; 482 483 /* 484 * If we are using MSI-X with exclusive vectors then we simply call 485 * vmci_dispatch_dgs(), since we know the interrupt was meant for us. 486 * Otherwise we must read the ICR to determine what to do. 487 */ 488 489 if (dev->exclusive_vectors) { 490 vmci_dispatch_dgs(dev); 491 } else { 492 unsigned int icr; 493 494 /* Acknowledge interrupt and determine what needs doing. */ 495 icr = vmci_read_reg(dev, VMCI_ICR_ADDR); 496 if (icr == 0 || icr == ~0) 497 return IRQ_NONE; 498 499 if (icr & VMCI_ICR_DATAGRAM) { 500 vmci_dispatch_dgs(dev); 501 icr &= ~VMCI_ICR_DATAGRAM; 502 } 503 504 if (icr & VMCI_ICR_NOTIFICATION) { 505 vmci_process_bitmap(dev); 506 icr &= ~VMCI_ICR_NOTIFICATION; 507 } 508 509 510 if (icr & VMCI_ICR_DMA_DATAGRAM) { 511 wake_up_all(&dev->inout_wq); 512 icr &= ~VMCI_ICR_DMA_DATAGRAM; 513 } 514 515 if (icr != 0) 516 dev_warn(dev->dev, 517 "Ignoring unknown interrupt cause (%d)\n", 518 icr); 519 } 520 521 return IRQ_HANDLED; 522 } 523 524 /* 525 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, 526 * which is for the notification bitmap. Will only get called if we are 527 * using MSI-X with exclusive vectors. 528 */ 529 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) 530 { 531 struct vmci_guest_device *dev = _dev; 532 533 /* For MSI-X we can just assume it was meant for us. */ 534 vmci_process_bitmap(dev); 535 536 return IRQ_HANDLED; 537 } 538 539 /* 540 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, 541 * which is for the completion of a DMA datagram send or receive operation. 542 * Will only get called if we are using MSI-X with exclusive vectors. 543 */ 544 static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) 545 { 546 struct vmci_guest_device *dev = _dev; 547 548 wake_up_all(&dev->inout_wq); 549 550 return IRQ_HANDLED; 551 } 552 553 static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) 554 { 555 if (vmci_dev->mmio_base != NULL) { 556 if (vmci_dev->tx_buffer != NULL) 557 dma_free_coherent(vmci_dev->dev, 558 VMCI_DMA_DG_BUFFER_SIZE, 559 vmci_dev->tx_buffer, 560 vmci_dev->tx_buffer_base); 561 if (vmci_dev->data_buffer != NULL) 562 dma_free_coherent(vmci_dev->dev, 563 VMCI_DMA_DG_BUFFER_SIZE, 564 vmci_dev->data_buffer, 565 vmci_dev->data_buffer_base); 566 } else { 567 vfree(vmci_dev->data_buffer); 568 } 569 } 570 571 /* 572 * Most of the initialization at module load time is done here. 573 */ 574 static int vmci_guest_probe_device(struct pci_dev *pdev, 575 const struct pci_device_id *id) 576 { 577 struct vmci_guest_device *vmci_dev; 578 void __iomem *iobase = NULL; 579 void __iomem *mmio_base = NULL; 580 unsigned int num_irq_vectors; 581 unsigned int capabilities; 582 unsigned int caps_in_use; 583 unsigned long cmd; 584 int vmci_err; 585 int error; 586 587 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); 588 589 error = pcim_enable_device(pdev); 590 if (error) { 591 dev_err(&pdev->dev, 592 "Failed to enable VMCI device: %d\n", error); 593 return error; 594 } 595 596 /* 597 * The VMCI device with mmio access to registers requests 256KB 598 * for BAR1. If present, driver will use new VMCI device 599 * functionality for register access and datagram send/recv. 600 */ 601 602 if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { 603 dev_info(&pdev->dev, "MMIO register access is available\n"); 604 mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, 605 VMCI_MMIO_ACCESS_SIZE); 606 /* If the map fails, we fall back to IOIO access. */ 607 if (!mmio_base) 608 dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); 609 } 610 611 if (!mmio_base) { 612 if (IS_ENABLED(CONFIG_ARM64)) { 613 dev_err(&pdev->dev, "MMIO base is invalid\n"); 614 return -ENXIO; 615 } 616 error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); 617 if (error) { 618 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); 619 return error; 620 } 621 iobase = pcim_iomap_table(pdev)[0]; 622 } 623 624 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); 625 if (!vmci_dev) { 626 dev_err(&pdev->dev, 627 "Can't allocate memory for VMCI device\n"); 628 return -ENOMEM; 629 } 630 631 vmci_dev->dev = &pdev->dev; 632 vmci_dev->exclusive_vectors = false; 633 vmci_dev->iobase = iobase; 634 vmci_dev->mmio_base = mmio_base; 635 636 init_waitqueue_head(&vmci_dev->inout_wq); 637 638 if (mmio_base != NULL) { 639 vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 640 &vmci_dev->tx_buffer_base, 641 GFP_KERNEL); 642 if (!vmci_dev->tx_buffer) { 643 dev_err(&pdev->dev, 644 "Can't allocate memory for datagram tx buffer\n"); 645 return -ENOMEM; 646 } 647 648 vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 649 &vmci_dev->data_buffer_base, 650 GFP_KERNEL); 651 } else { 652 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); 653 } 654 if (!vmci_dev->data_buffer) { 655 dev_err(&pdev->dev, 656 "Can't allocate memory for datagram buffer\n"); 657 error = -ENOMEM; 658 goto err_free_data_buffers; 659 } 660 661 pci_set_master(pdev); /* To enable queue_pair functionality. */ 662 663 /* 664 * Verify that the VMCI Device supports the capabilities that 665 * we need. If the device is missing capabilities that we would 666 * like to use, check for fallback capabilities and use those 667 * instead (so we can run a new VM on old hosts). Fail the load if 668 * a required capability is missing and there is no fallback. 669 * 670 * Right now, we need datagrams. There are no fallbacks. 671 */ 672 capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); 673 if (!(capabilities & VMCI_CAPS_DATAGRAM)) { 674 dev_err(&pdev->dev, "Device does not support datagrams\n"); 675 error = -ENXIO; 676 goto err_free_data_buffers; 677 } 678 caps_in_use = VMCI_CAPS_DATAGRAM; 679 680 /* 681 * Use 64-bit PPNs if the device supports. 682 * 683 * There is no check for the return value of dma_set_mask_and_coherent 684 * since this driver can handle the default mask values if 685 * dma_set_mask_and_coherent fails. 686 */ 687 if (capabilities & VMCI_CAPS_PPN64) { 688 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 689 use_ppn64 = true; 690 caps_in_use |= VMCI_CAPS_PPN64; 691 } else { 692 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); 693 use_ppn64 = false; 694 } 695 696 /* 697 * If the hardware supports notifications, we will use that as 698 * well. 699 */ 700 if (capabilities & VMCI_CAPS_NOTIFICATIONS) { 701 vmci_dev->notification_bitmap = dma_alloc_coherent( 702 &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, 703 GFP_KERNEL); 704 if (!vmci_dev->notification_bitmap) 705 dev_warn(&pdev->dev, 706 "Unable to allocate notification bitmap\n"); 707 else 708 caps_in_use |= VMCI_CAPS_NOTIFICATIONS; 709 } 710 711 if (mmio_base != NULL) { 712 if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { 713 caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; 714 } else { 715 dev_err(&pdev->dev, 716 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); 717 error = -ENXIO; 718 goto err_free_notification_bitmap; 719 } 720 } 721 722 dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); 723 724 /* Let the host know which capabilities we intend to use. */ 725 vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); 726 727 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 728 /* Let the device know the size for pages passed down. */ 729 vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); 730 731 /* Configure the high order parts of the data in/out buffers. */ 732 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), 733 VMCI_DATA_IN_HIGH_ADDR); 734 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), 735 VMCI_DATA_OUT_HIGH_ADDR); 736 } 737 738 /* Set up global device so that we can start sending datagrams */ 739 spin_lock_irq(&vmci_dev_spinlock); 740 vmci_dev_g = vmci_dev; 741 vmci_pdev = pdev; 742 spin_unlock_irq(&vmci_dev_spinlock); 743 744 /* 745 * Register notification bitmap with device if that capability is 746 * used. 747 */ 748 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) { 749 unsigned long bitmap_ppn = 750 vmci_dev->notification_base >> PAGE_SHIFT; 751 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { 752 dev_warn(&pdev->dev, 753 "VMCI device unable to register notification bitmap with PPN 0x%lx\n", 754 bitmap_ppn); 755 error = -ENXIO; 756 goto err_remove_vmci_dev_g; 757 } 758 } 759 760 /* Check host capabilities. */ 761 error = vmci_check_host_caps(pdev); 762 if (error) 763 goto err_remove_vmci_dev_g; 764 765 /* Enable device. */ 766 767 /* 768 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can 769 * update the internal context id when needed. 770 */ 771 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, 772 vmci_guest_cid_update, NULL, 773 &ctx_update_sub_id); 774 if (vmci_err < VMCI_SUCCESS) 775 dev_warn(&pdev->dev, 776 "Failed to subscribe to event (type=%d): %d\n", 777 VMCI_EVENT_CTX_ID_UPDATE, vmci_err); 778 779 /* 780 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on 781 * legacy interrupts. 782 */ 783 if (vmci_dev->mmio_base != NULL) 784 num_irq_vectors = VMCI_MAX_INTRS; 785 else 786 num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; 787 error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, 788 PCI_IRQ_MSIX); 789 if (error < 0) { 790 error = pci_alloc_irq_vectors(pdev, 1, 1, 791 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); 792 if (error < 0) 793 goto err_unsubscribe_event; 794 } else { 795 vmci_dev->exclusive_vectors = true; 796 } 797 798 /* 799 * Request IRQ for legacy or MSI interrupts, or for first 800 * MSI-X vector. 801 */ 802 error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, 803 vmci_interrupt, IRQF_SHARED, 804 KBUILD_MODNAME, vmci_dev); 805 if (error) { 806 dev_err(&pdev->dev, "Irq %u in use: %d\n", 807 pci_irq_vector(pdev, 0), error); 808 goto err_disable_msi; 809 } 810 811 /* 812 * For MSI-X with exclusive vectors we need to request an 813 * interrupt for each vector so that we get a separate 814 * interrupt handler routine. This allows us to distinguish 815 * between the vectors. 816 */ 817 if (vmci_dev->exclusive_vectors) { 818 error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL, 819 vmci_interrupt_bm, 0, 820 KBUILD_MODNAME, vmci_dev); 821 if (error) { 822 dev_err(&pdev->dev, 823 "Failed to allocate irq %u: %d\n", 824 pci_irq_vector(pdev, 1), error); 825 goto err_free_irq; 826 } 827 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 828 error = request_threaded_irq(pci_irq_vector(pdev, 2), 829 NULL, 830 vmci_interrupt_dma_datagram, 831 0, KBUILD_MODNAME, 832 vmci_dev); 833 if (error) { 834 dev_err(&pdev->dev, 835 "Failed to allocate irq %u: %d\n", 836 pci_irq_vector(pdev, 2), error); 837 goto err_free_bm_irq; 838 } 839 } 840 } 841 842 dev_dbg(&pdev->dev, "Registered device\n"); 843 844 atomic_inc(&vmci_num_guest_devices); 845 846 /* Enable specific interrupt bits. */ 847 cmd = VMCI_IMR_DATAGRAM; 848 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) 849 cmd |= VMCI_IMR_NOTIFICATION; 850 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) 851 cmd |= VMCI_IMR_DMA_DATAGRAM; 852 vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); 853 854 /* Enable interrupts. */ 855 vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); 856 857 pci_set_drvdata(pdev, vmci_dev); 858 859 vmci_call_vsock_callback(false); 860 return 0; 861 862 err_free_bm_irq: 863 if (vmci_dev->exclusive_vectors) 864 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 865 866 err_free_irq: 867 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 868 869 err_disable_msi: 870 pci_free_irq_vectors(pdev); 871 872 err_unsubscribe_event: 873 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 874 if (vmci_err < VMCI_SUCCESS) 875 dev_warn(&pdev->dev, 876 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 877 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 878 879 err_remove_vmci_dev_g: 880 spin_lock_irq(&vmci_dev_spinlock); 881 vmci_pdev = NULL; 882 vmci_dev_g = NULL; 883 spin_unlock_irq(&vmci_dev_spinlock); 884 885 err_free_notification_bitmap: 886 if (vmci_dev->notification_bitmap) { 887 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 888 dma_free_coherent(&pdev->dev, PAGE_SIZE, 889 vmci_dev->notification_bitmap, 890 vmci_dev->notification_base); 891 } 892 893 err_free_data_buffers: 894 vmci_free_dg_buffers(vmci_dev); 895 896 /* The rest are managed resources and will be freed by PCI core */ 897 return error; 898 } 899 900 static void vmci_guest_remove_device(struct pci_dev *pdev) 901 { 902 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); 903 int vmci_err; 904 905 dev_dbg(&pdev->dev, "Removing device\n"); 906 907 atomic_dec(&vmci_num_guest_devices); 908 909 vmci_qp_guest_endpoints_exit(); 910 911 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 912 if (vmci_err < VMCI_SUCCESS) 913 dev_warn(&pdev->dev, 914 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 915 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 916 917 spin_lock_irq(&vmci_dev_spinlock); 918 vmci_dev_g = NULL; 919 vmci_pdev = NULL; 920 spin_unlock_irq(&vmci_dev_spinlock); 921 922 dev_dbg(&pdev->dev, "Resetting vmci device\n"); 923 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 924 925 /* 926 * Free IRQ and then disable MSI/MSI-X as appropriate. For 927 * MSI-X, we might have multiple vectors, each with their own 928 * IRQ, which we must free too. 929 */ 930 if (vmci_dev->exclusive_vectors) { 931 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 932 if (vmci_dev->mmio_base != NULL) 933 free_irq(pci_irq_vector(pdev, 2), vmci_dev); 934 } 935 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 936 pci_free_irq_vectors(pdev); 937 938 if (vmci_dev->notification_bitmap) { 939 /* 940 * The device reset above cleared the bitmap state of the 941 * device, so we can safely free it here. 942 */ 943 944 dma_free_coherent(&pdev->dev, PAGE_SIZE, 945 vmci_dev->notification_bitmap, 946 vmci_dev->notification_base); 947 } 948 949 vmci_free_dg_buffers(vmci_dev); 950 951 if (vmci_dev->mmio_base != NULL) 952 pci_iounmap(pdev, vmci_dev->mmio_base); 953 954 /* The rest are managed resources and will be freed by PCI core */ 955 } 956 957 static const struct pci_device_id vmci_ids[] = { 958 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, 959 { 0 }, 960 }; 961 MODULE_DEVICE_TABLE(pci, vmci_ids); 962 963 static struct pci_driver vmci_guest_driver = { 964 .name = KBUILD_MODNAME, 965 .id_table = vmci_ids, 966 .probe = vmci_guest_probe_device, 967 .remove = vmci_guest_remove_device, 968 }; 969 970 int __init vmci_guest_init(void) 971 { 972 return pci_register_driver(&vmci_guest_driver); 973 } 974 975 void __exit vmci_guest_exit(void) 976 { 977 pci_unregister_driver(&vmci_guest_driver); 978 } 979