1 /*- 2 * Copyright (c) 2016 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/types.h> 33 #include <sys/malloc.h> 34 #include <sys/module.h> 35 #include <sys/kernel.h> 36 #include <sys/queue.h> 37 #include <sys/lock.h> 38 #include <sys/sx.h> 39 #include <sys/smp.h> 40 #include <sys/sysctl.h> 41 #include <sys/bus.h> 42 #include <sys/rman.h> 43 #include <sys/mutex.h> 44 #include <sys/errno.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_param.h> 48 #include <vm/vm_kern.h> 49 #include <vm/pmap.h> 50 51 #include <machine/atomic.h> 52 #include <machine/bus.h> 53 #include <machine/frame.h> 54 #include <machine/pci_cfgreg.h> 55 #include <machine/resource.h> 56 57 #include <sys/pciio.h> 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 #include <dev/pci/pci_private.h> 61 #include <dev/pci/pcib_private.h> 62 #include "pcib_if.h" 63 64 #include <machine/intr_machdep.h> 65 #include <x86/apicreg.h> 66 67 #include <dev/hyperv/include/hyperv.h> 68 #include <dev/hyperv/include/hyperv_busdma.h> 69 #include <dev/hyperv/include/vmbus_xact.h> 70 #include <dev/hyperv/vmbus/vmbus_reg.h> 71 #include <dev/hyperv/vmbus/vmbus_chanvar.h> 72 73 #include "vmbus_if.h" 74 75 #if __FreeBSD_version < 1100000 76 typedef u_long rman_res_t; 77 #define RM_MAX_END (~(rman_res_t)0) 78 #endif 79 80 struct completion { 81 unsigned int done; 82 struct mtx lock; 83 }; 84 85 static void 86 init_completion(struct completion *c) 87 { 88 memset(c, 0, sizeof(*c)); 89 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF); 90 c->done = 0; 91 } 92 93 static void 94 free_completion(struct completion *c) 95 { 96 mtx_destroy(&c->lock); 97 } 98 99 static void 100 complete(struct completion *c) 101 { 102 mtx_lock(&c->lock); 103 c->done++; 104 mtx_unlock(&c->lock); 105 wakeup(c); 106 } 107 108 static void 109 wait_for_completion(struct completion *c) 110 { 111 mtx_lock(&c->lock); 112 while (c->done == 0) 113 mtx_sleep(c, &c->lock, 0, "hvwfc", 0); 114 c->done--; 115 mtx_unlock(&c->lock); 116 } 117 118 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major))) 119 120 enum { 121 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), 122 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 123 }; 124 125 #define PCI_CONFIG_MMIO_LENGTH 0x2000 126 #define CFG_PAGE_OFFSET 0x1000 127 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) 128 129 /* 130 * Message Types 131 */ 132 133 enum pci_message_type { 134 /* 135 * Version 1.1 136 */ 137 PCI_MESSAGE_BASE = 0x42490000, 138 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0, 139 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1, 140 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4, 141 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5, 142 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6, 143 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7, 144 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8, 145 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9, 146 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA, 147 PCI_EJECT = PCI_MESSAGE_BASE + 0xB, 148 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC, 149 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD, 150 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE, 151 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF, 152 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10, 153 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11, 154 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12, 155 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13, 156 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14, 157 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15, 158 PCI_MESSAGE_MAXIMUM 159 }; 160 161 /* 162 * Structures defining the virtual PCI Express protocol. 163 */ 164 165 union pci_version { 166 struct { 167 uint16_t minor_version; 168 uint16_t major_version; 169 } parts; 170 uint32_t version; 171 } __packed; 172 173 /* 174 * This representation is the one used in Windows, which is 175 * what is expected when sending this back and forth with 176 * the Hyper-V parent partition. 177 */ 178 union win_slot_encoding { 179 struct { 180 uint32_t slot:5; 181 uint32_t func:3; 182 uint32_t reserved:24; 183 } bits; 184 uint32_t val; 185 } __packed; 186 187 struct pci_func_desc { 188 uint16_t v_id; /* vendor ID */ 189 uint16_t d_id; /* device ID */ 190 uint8_t rev; 191 uint8_t prog_intf; 192 uint8_t subclass; 193 uint8_t base_class; 194 uint32_t subsystem_id; 195 union win_slot_encoding wslot; 196 uint32_t ser; /* serial number */ 197 } __packed; 198 199 struct hv_msi_desc { 200 uint8_t vector; 201 uint8_t delivery_mode; 202 uint16_t vector_count; 203 uint32_t reserved; 204 uint64_t cpu_mask; 205 } __packed; 206 207 struct tran_int_desc { 208 uint16_t reserved; 209 uint16_t vector_count; 210 uint32_t data; 211 uint64_t address; 212 } __packed; 213 214 struct pci_message { 215 uint32_t type; 216 } __packed; 217 218 struct pci_child_message { 219 struct pci_message message_type; 220 union win_slot_encoding wslot; 221 } __packed; 222 223 struct pci_incoming_message { 224 struct vmbus_chanpkt_hdr hdr; 225 struct pci_message message_type; 226 } __packed; 227 228 struct pci_response { 229 struct vmbus_chanpkt_hdr hdr; 230 int32_t status; /* negative values are failures */ 231 } __packed; 232 233 struct pci_packet { 234 void (*completion_func)(void *context, struct pci_response *resp, 235 int resp_packet_size); 236 void *compl_ctxt; 237 238 struct pci_message message[0]; 239 }; 240 241 /* 242 * Specific message types supporting the PCI protocol. 243 */ 244 245 struct pci_version_request { 246 struct pci_message message_type; 247 uint32_t protocol_version; 248 uint32_t is_last_attempt:1; 249 uint32_t reservedz:31; 250 } __packed; 251 252 struct pci_bus_d0_entry { 253 struct pci_message message_type; 254 uint32_t reserved; 255 uint64_t mmio_base; 256 } __packed; 257 258 struct pci_bus_relations { 259 struct pci_incoming_message incoming; 260 uint32_t device_count; 261 struct pci_func_desc func[0]; 262 } __packed; 263 264 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1) 265 struct pci_q_res_req_response { 266 struct vmbus_chanpkt_hdr hdr; 267 int32_t status; /* negative values are failures */ 268 uint32_t probed_bar[MAX_NUM_BARS]; 269 } __packed; 270 271 struct pci_resources_assigned { 272 struct pci_message message_type; 273 union win_slot_encoding wslot; 274 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */ 275 uint32_t msi_descriptors; 276 uint32_t reserved[4]; 277 } __packed; 278 279 struct pci_create_interrupt { 280 struct pci_message message_type; 281 union win_slot_encoding wslot; 282 struct hv_msi_desc int_desc; 283 } __packed; 284 285 struct pci_create_int_response { 286 struct pci_response response; 287 uint32_t reserved; 288 struct tran_int_desc int_desc; 289 } __packed; 290 291 struct pci_delete_interrupt { 292 struct pci_message message_type; 293 union win_slot_encoding wslot; 294 struct tran_int_desc int_desc; 295 } __packed; 296 297 struct pci_dev_incoming { 298 struct pci_incoming_message incoming; 299 union win_slot_encoding wslot; 300 } __packed; 301 302 struct pci_eject_response { 303 struct pci_message message_type; 304 union win_slot_encoding wslot; 305 uint32_t status; 306 } __packed; 307 308 /* 309 * Driver specific state. 310 */ 311 312 enum hv_pcibus_state { 313 hv_pcibus_init = 0, 314 hv_pcibus_installed, 315 }; 316 317 struct hv_pcibus { 318 device_t pcib; 319 device_t pci_bus; 320 struct vmbus_pcib_softc *sc; 321 322 uint16_t pci_domain; 323 324 enum hv_pcibus_state state; 325 326 struct resource *cfg_res; 327 328 struct completion query_completion, *query_comp; 329 330 struct mtx config_lock; /* Avoid two threads writing index page */ 331 struct mtx device_list_lock; /* Protect lists below */ 332 TAILQ_HEAD(, hv_pci_dev) children; 333 TAILQ_HEAD(, hv_dr_state) dr_list; 334 335 volatile int detaching; 336 }; 337 338 struct hv_pci_dev { 339 TAILQ_ENTRY(hv_pci_dev) link; 340 341 struct pci_func_desc desc; 342 343 bool reported_missing; 344 345 struct hv_pcibus *hbus; 346 struct task eject_task; 347 348 TAILQ_HEAD(, hv_irq_desc) irq_desc_list; 349 350 /* 351 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then 352 * read it back, for each of the BAR offsets within config space. 353 */ 354 uint32_t probed_bar[MAX_NUM_BARS]; 355 }; 356 357 /* 358 * Tracks "Device Relations" messages from the host, which must be both 359 * processed in order. 360 */ 361 struct hv_dr_work { 362 struct task task; 363 struct hv_pcibus *bus; 364 }; 365 366 struct hv_dr_state { 367 TAILQ_ENTRY(hv_dr_state) link; 368 uint32_t device_count; 369 struct pci_func_desc func[0]; 370 }; 371 372 struct hv_irq_desc { 373 TAILQ_ENTRY(hv_irq_desc) link; 374 struct tran_int_desc desc; 375 int irq; 376 }; 377 378 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) 379 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) 380 #define PCI_FUNC(devfn) ((devfn) & 0x07) 381 382 static uint32_t 383 devfn_to_wslot(unsigned int devfn) 384 { 385 union win_slot_encoding wslot; 386 387 wslot.val = 0; 388 wslot.bits.slot = PCI_SLOT(devfn); 389 wslot.bits.func = PCI_FUNC(devfn); 390 391 return (wslot.val); 392 } 393 394 static unsigned int 395 wslot_to_devfn(uint32_t wslot) 396 { 397 union win_slot_encoding encoding; 398 unsigned int slot; 399 unsigned int func; 400 401 encoding.val = wslot; 402 403 slot = encoding.bits.slot; 404 func = encoding.bits.func; 405 406 return (PCI_DEVFN(slot, func)); 407 } 408 409 struct vmbus_pcib_softc { 410 struct vmbus_channel *chan; 411 void *rx_buf; 412 413 struct taskqueue *taskq; 414 415 struct hv_pcibus *hbus; 416 }; 417 418 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */ 419 static const struct hyperv_guid g_pass_through_dev_type = { 420 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, 421 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F} 422 }; 423 424 struct hv_pci_compl { 425 struct completion host_event; 426 int32_t completion_status; 427 }; 428 429 struct q_res_req_compl { 430 struct completion host_event; 431 struct hv_pci_dev *hpdev; 432 }; 433 434 struct compose_comp_ctxt { 435 struct hv_pci_compl comp_pkt; 436 struct tran_int_desc int_desc; 437 }; 438 439 static void 440 hv_pci_generic_compl(void *context, struct pci_response *resp, 441 int resp_packet_size) 442 { 443 struct hv_pci_compl *comp_pkt = context; 444 445 if (resp_packet_size >= sizeof(struct pci_response)) 446 comp_pkt->completion_status = resp->status; 447 else 448 comp_pkt->completion_status = -1; 449 450 complete(&comp_pkt->host_event); 451 } 452 453 static void 454 q_resource_requirements(void *context, struct pci_response *resp, 455 int resp_packet_size) 456 { 457 struct q_res_req_compl *completion = context; 458 struct pci_q_res_req_response *q_res_req = 459 (struct pci_q_res_req_response *)resp; 460 int i; 461 462 if (resp->status < 0) { 463 printf("vmbus_pcib: failed to query resource requirements\n"); 464 } else { 465 for (i = 0; i < MAX_NUM_BARS; i++) 466 completion->hpdev->probed_bar[i] = 467 q_res_req->probed_bar[i]; 468 } 469 470 complete(&completion->host_event); 471 } 472 473 static void 474 hv_pci_compose_compl(void *context, struct pci_response *resp, 475 int resp_packet_size) 476 { 477 struct compose_comp_ctxt *comp_pkt = context; 478 struct pci_create_int_response *int_resp = 479 (struct pci_create_int_response *)resp; 480 481 comp_pkt->comp_pkt.completion_status = resp->status; 482 comp_pkt->int_desc = int_resp->int_desc; 483 complete(&comp_pkt->comp_pkt.host_event); 484 } 485 486 static void 487 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid) 488 { 489 struct pci_delete_interrupt *int_pkt; 490 struct { 491 struct pci_packet pkt; 492 uint8_t buffer[sizeof(struct pci_delete_interrupt)]; 493 } ctxt; 494 495 memset(&ctxt, 0, sizeof(ctxt)); 496 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; 497 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; 498 int_pkt->wslot.val = hpdev->desc.wslot.val; 499 int_pkt->int_desc = hid->desc; 500 501 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 502 int_pkt, sizeof(*int_pkt), 0); 503 504 free(hid, M_DEVBUF); 505 } 506 507 static void 508 hv_pci_delete_device(struct hv_pci_dev *hpdev) 509 { 510 struct hv_pcibus *hbus = hpdev->hbus; 511 struct hv_irq_desc *hid, *tmp_hid; 512 device_t pci_dev; 513 int devfn; 514 515 devfn = wslot_to_devfn(hpdev->desc.wslot.val); 516 517 mtx_lock(&Giant); 518 519 pci_dev = pci_find_dbsf(hbus->pci_domain, 520 0, PCI_SLOT(devfn), PCI_FUNC(devfn)); 521 if (pci_dev) 522 device_delete_child(hbus->pci_bus, pci_dev); 523 524 mtx_unlock(&Giant); 525 526 mtx_lock(&hbus->device_list_lock); 527 TAILQ_REMOVE(&hbus->children, hpdev, link); 528 mtx_unlock(&hbus->device_list_lock); 529 530 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) 531 hv_int_desc_free(hpdev, hid); 532 533 free(hpdev, M_DEVBUF); 534 } 535 536 static struct hv_pci_dev * 537 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc) 538 { 539 struct hv_pci_dev *hpdev; 540 struct pci_child_message *res_req; 541 struct q_res_req_compl comp_pkt; 542 struct { 543 struct pci_packet pkt; 544 uint8_t buffer[sizeof(struct pci_child_message)]; 545 } ctxt; 546 int ret; 547 548 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO); 549 hpdev->hbus = hbus; 550 551 TAILQ_INIT(&hpdev->irq_desc_list); 552 553 init_completion(&comp_pkt.host_event); 554 comp_pkt.hpdev = hpdev; 555 556 ctxt.pkt.compl_ctxt = &comp_pkt; 557 ctxt.pkt.completion_func = q_resource_requirements; 558 559 res_req = (struct pci_child_message *)&ctxt.pkt.message; 560 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; 561 res_req->wslot.val = desc->wslot.val; 562 563 ret = vmbus_chan_send(hbus->sc->chan, 564 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 565 res_req, sizeof(*res_req), (uint64_t)&ctxt.pkt); 566 if (ret) 567 goto err; 568 569 wait_for_completion(&comp_pkt.host_event); 570 free_completion(&comp_pkt.host_event); 571 572 hpdev->desc = *desc; 573 574 mtx_lock(&hbus->device_list_lock); 575 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link); 576 mtx_unlock(&hbus->device_list_lock); 577 return (hpdev); 578 err: 579 free_completion(&comp_pkt.host_event); 580 free(hpdev, M_DEVBUF); 581 return (NULL); 582 } 583 584 #if __FreeBSD_version < 1100000 585 586 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */ 587 588 static struct pci_devinfo * 589 pci_identify_function(device_t pcib, device_t dev, int domain, int busno, 590 int slot, int func, size_t dinfo_size) 591 { 592 struct pci_devinfo *dinfo; 593 594 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size); 595 if (dinfo != NULL) 596 pci_add_child(dev, dinfo); 597 598 return (dinfo); 599 } 600 601 static int 602 pci_rescan(device_t dev) 603 { 604 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w) 605 device_t pcib = device_get_parent(dev); 606 struct pci_softc *sc; 607 device_t child, *devlist, *unchanged; 608 int devcount, error, i, j, maxslots, oldcount; 609 int busno, domain, s, f, pcifunchigh; 610 uint8_t hdrtype; 611 612 /* No need to check for ARI on a rescan. */ 613 error = device_get_children(dev, &devlist, &devcount); 614 if (error) 615 return (error); 616 if (devcount != 0) { 617 unchanged = malloc(devcount * sizeof(device_t), M_TEMP, 618 M_NOWAIT | M_ZERO); 619 if (unchanged == NULL) { 620 free(devlist, M_TEMP); 621 return (ENOMEM); 622 } 623 } else 624 unchanged = NULL; 625 626 sc = device_get_softc(dev); 627 domain = pcib_get_domain(dev); 628 busno = pcib_get_bus(dev); 629 maxslots = PCIB_MAXSLOTS(pcib); 630 for (s = 0; s <= maxslots; s++) { 631 /* If function 0 is not present, skip to the next slot. */ 632 f = 0; 633 if (REG(PCIR_VENDOR, 2) == 0xffff) 634 continue; 635 pcifunchigh = 0; 636 hdrtype = REG(PCIR_HDRTYPE, 1); 637 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) 638 continue; 639 if (hdrtype & PCIM_MFDEV) 640 pcifunchigh = PCIB_MAXFUNCS(pcib); 641 for (f = 0; f <= pcifunchigh; f++) { 642 if (REG(PCIR_VENDOR, 2) == 0xffff) 643 continue; 644 645 /* 646 * Found a valid function. Check if a 647 * device_t for this device already exists. 648 */ 649 for (i = 0; i < devcount; i++) { 650 child = devlist[i]; 651 if (child == NULL) 652 continue; 653 if (pci_get_slot(child) == s && 654 pci_get_function(child) == f) { 655 unchanged[i] = child; 656 goto next_func; 657 } 658 } 659 660 pci_identify_function(pcib, dev, domain, busno, s, f, 661 sizeof(struct pci_devinfo)); 662 next_func:; 663 } 664 } 665 666 /* Remove devices that are no longer present. */ 667 for (i = 0; i < devcount; i++) { 668 if (unchanged[i] != NULL) 669 continue; 670 device_delete_child(dev, devlist[i]); 671 } 672 673 free(devlist, M_TEMP); 674 oldcount = devcount; 675 676 /* Try to attach the devices just added. */ 677 error = device_get_children(dev, &devlist, &devcount); 678 if (error) { 679 free(unchanged, M_TEMP); 680 return (error); 681 } 682 683 for (i = 0; i < devcount; i++) { 684 for (j = 0; j < oldcount; j++) { 685 if (devlist[i] == unchanged[j]) 686 goto next_device; 687 } 688 689 device_probe_and_attach(devlist[i]); 690 next_device:; 691 } 692 693 free(unchanged, M_TEMP); 694 free(devlist, M_TEMP); 695 return (0); 696 #undef REG 697 } 698 699 #else 700 701 static int 702 pci_rescan(device_t dev) 703 { 704 return (BUS_RESCAN(dev)); 705 } 706 707 #endif 708 709 static void 710 pci_devices_present_work(void *arg, int pending __unused) 711 { 712 struct hv_dr_work *dr_wrk = arg; 713 struct hv_dr_state *dr = NULL; 714 struct hv_pcibus *hbus; 715 uint32_t child_no; 716 bool found; 717 struct pci_func_desc *new_desc; 718 struct hv_pci_dev *hpdev, *tmp_hpdev; 719 struct completion *query_comp; 720 bool need_rescan = false; 721 722 hbus = dr_wrk->bus; 723 free(dr_wrk, M_DEVBUF); 724 725 /* Pull this off the queue and process it if it was the last one. */ 726 mtx_lock(&hbus->device_list_lock); 727 while (!TAILQ_EMPTY(&hbus->dr_list)) { 728 dr = TAILQ_FIRST(&hbus->dr_list); 729 TAILQ_REMOVE(&hbus->dr_list, dr, link); 730 731 /* Throw this away if the list still has stuff in it. */ 732 if (!TAILQ_EMPTY(&hbus->dr_list)) { 733 free(dr, M_DEVBUF); 734 continue; 735 } 736 } 737 mtx_unlock(&hbus->device_list_lock); 738 739 if (!dr) 740 return; 741 742 /* First, mark all existing children as reported missing. */ 743 mtx_lock(&hbus->device_list_lock); 744 TAILQ_FOREACH(hpdev, &hbus->children, link) 745 hpdev->reported_missing = true; 746 mtx_unlock(&hbus->device_list_lock); 747 748 /* Next, add back any reported devices. */ 749 for (child_no = 0; child_no < dr->device_count; child_no++) { 750 found = false; 751 new_desc = &dr->func[child_no]; 752 753 mtx_lock(&hbus->device_list_lock); 754 TAILQ_FOREACH(hpdev, &hbus->children, link) { 755 if ((hpdev->desc.wslot.val == 756 new_desc->wslot.val) && 757 (hpdev->desc.v_id == new_desc->v_id) && 758 (hpdev->desc.d_id == new_desc->d_id) && 759 (hpdev->desc.ser == new_desc->ser)) { 760 hpdev->reported_missing = false; 761 found = true; 762 break; 763 } 764 } 765 mtx_unlock(&hbus->device_list_lock); 766 767 if (!found) { 768 if (!need_rescan) 769 need_rescan = true; 770 771 hpdev = new_pcichild_device(hbus, new_desc); 772 if (!hpdev) 773 printf("vmbus_pcib: failed to add a child\n"); 774 } 775 } 776 777 /* Remove missing device(s), if any */ 778 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) { 779 if (hpdev->reported_missing) 780 hv_pci_delete_device(hpdev); 781 } 782 783 /* Rescan the bus to find any new device, if necessary. */ 784 if (hbus->state == hv_pcibus_installed && need_rescan) 785 pci_rescan(hbus->pci_bus); 786 787 /* Wake up hv_pci_query_relations(), if it's waiting. */ 788 query_comp = hbus->query_comp; 789 if (query_comp) { 790 hbus->query_comp = NULL; 791 complete(query_comp); 792 } 793 794 free(dr, M_DEVBUF); 795 } 796 797 static struct hv_pci_dev * 798 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot) 799 { 800 struct hv_pci_dev *hpdev, *ret = NULL; 801 802 mtx_lock(&hbus->device_list_lock); 803 TAILQ_FOREACH(hpdev, &hbus->children, link) { 804 if (hpdev->desc.wslot.val == wslot) { 805 ret = hpdev; 806 break; 807 } 808 } 809 mtx_unlock(&hbus->device_list_lock); 810 811 return (ret); 812 } 813 814 static void 815 hv_pci_devices_present(struct hv_pcibus *hbus, 816 struct pci_bus_relations *relations) 817 { 818 struct hv_dr_state *dr; 819 struct hv_dr_work *dr_wrk; 820 unsigned long dr_size; 821 822 if (hbus->detaching && relations->device_count > 0) 823 return; 824 825 dr_size = offsetof(struct hv_dr_state, func) + 826 (sizeof(struct pci_func_desc) * relations->device_count); 827 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); 828 829 dr->device_count = relations->device_count; 830 if (dr->device_count != 0) 831 memcpy(dr->func, relations->func, 832 sizeof(struct pci_func_desc) * dr->device_count); 833 834 mtx_lock(&hbus->device_list_lock); 835 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); 836 mtx_unlock(&hbus->device_list_lock); 837 838 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); 839 dr_wrk->bus = hbus; 840 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); 841 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); 842 } 843 844 static void 845 hv_eject_device_work(void *arg, int pending __unused) 846 { 847 struct hv_pci_dev *hpdev = arg; 848 union win_slot_encoding wslot = hpdev->desc.wslot; 849 struct hv_pcibus *hbus = hpdev->hbus; 850 struct pci_eject_response *eject_pkt; 851 struct { 852 struct pci_packet pkt; 853 uint8_t buffer[sizeof(struct pci_eject_response)]; 854 } ctxt; 855 856 hv_pci_delete_device(hpdev); 857 858 memset(&ctxt, 0, sizeof(ctxt)); 859 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message; 860 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE; 861 eject_pkt->wslot.val = wslot.val; 862 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 863 eject_pkt, sizeof(*eject_pkt), 0); 864 } 865 866 static void 867 hv_pci_eject_device(struct hv_pci_dev *hpdev) 868 { 869 struct hv_pcibus *hbus = hpdev->hbus; 870 struct taskqueue *taskq; 871 872 if (hbus->detaching) 873 return; 874 875 /* 876 * Push this task into the same taskqueue on which 877 * vmbus_pcib_attach() runs, so we're sure this task can't run 878 * concurrently with vmbus_pcib_attach(). 879 */ 880 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev); 881 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan); 882 taskqueue_enqueue(taskq, &hpdev->eject_task); 883 } 884 885 #define PCIB_PACKET_SIZE 0x100 886 887 static void 888 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg) 889 { 890 struct vmbus_pcib_softc *sc = arg; 891 struct hv_pcibus *hbus = sc->hbus; 892 893 void *buffer; 894 int bufferlen = PCIB_PACKET_SIZE; 895 896 struct pci_packet *comp_packet; 897 struct pci_response *response; 898 struct pci_incoming_message *new_msg; 899 struct pci_bus_relations *bus_rel; 900 struct pci_dev_incoming *dev_msg; 901 struct hv_pci_dev *hpdev; 902 903 buffer = sc->rx_buf; 904 do { 905 struct vmbus_chanpkt_hdr *pkt = buffer; 906 uint32_t bytes_rxed; 907 int ret; 908 909 bytes_rxed = bufferlen; 910 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); 911 912 if (ret == ENOBUFS) { 913 /* Handle large packet */ 914 if (bufferlen > PCIB_PACKET_SIZE) { 915 free(buffer, M_DEVBUF); 916 buffer = NULL; 917 } 918 919 /* alloc new buffer */ 920 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO); 921 bufferlen = bytes_rxed; 922 923 continue; 924 } 925 926 if (ret != 0) { 927 /* ignore EIO or EAGAIN */ 928 break; 929 } 930 931 if (bytes_rxed <= sizeof(struct pci_response)) 932 continue; 933 934 switch (pkt->cph_type) { 935 case VMBUS_CHANPKT_TYPE_COMP: 936 comp_packet = (struct pci_packet *)pkt->cph_xactid; 937 response = (struct pci_response *)pkt; 938 comp_packet->completion_func(comp_packet->compl_ctxt, 939 response, bytes_rxed); 940 break; 941 case VMBUS_CHANPKT_TYPE_INBAND: 942 new_msg = (struct pci_incoming_message *)buffer; 943 944 switch (new_msg->message_type.type) { 945 case PCI_BUS_RELATIONS: 946 bus_rel = (struct pci_bus_relations *)buffer; 947 948 if (bus_rel->device_count == 0) 949 break; 950 951 if (bytes_rxed < 952 offsetof(struct pci_bus_relations, func) + 953 (sizeof(struct pci_func_desc) * 954 (bus_rel->device_count))) 955 break; 956 957 hv_pci_devices_present(hbus, bus_rel); 958 break; 959 960 case PCI_EJECT: 961 dev_msg = (struct pci_dev_incoming *)buffer; 962 hpdev = get_pcichild_wslot(hbus, 963 dev_msg->wslot.val); 964 965 if (hpdev) 966 hv_pci_eject_device(hpdev); 967 968 break; 969 default: 970 printf("vmbus_pcib: Unknown msg type 0x%x\n", 971 new_msg->message_type.type); 972 break; 973 } 974 break; 975 default: 976 printf("vmbus_pcib: Unknown VMBus msg type %hd\n", 977 pkt->cph_type); 978 break; 979 } 980 } while (1); 981 982 if (bufferlen > PCIB_PACKET_SIZE) 983 free(buffer, M_DEVBUF); 984 } 985 986 static int 987 hv_pci_protocol_negotiation(struct hv_pcibus *hbus) 988 { 989 struct pci_version_request *version_req; 990 struct hv_pci_compl comp_pkt; 991 struct { 992 struct pci_packet pkt; 993 uint8_t buffer[sizeof(struct pci_version_request)]; 994 } ctxt; 995 int ret; 996 997 init_completion(&comp_pkt.host_event); 998 999 ctxt.pkt.completion_func = hv_pci_generic_compl; 1000 ctxt.pkt.compl_ctxt = &comp_pkt; 1001 version_req = (struct pci_version_request *)&ctxt.pkt.message; 1002 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; 1003 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT; 1004 version_req->is_last_attempt = 1; 1005 1006 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1007 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req), 1008 (uint64_t)&ctxt.pkt); 1009 if (ret) 1010 goto out; 1011 1012 wait_for_completion(&comp_pkt.host_event); 1013 1014 if (comp_pkt.completion_status < 0) { 1015 device_printf(hbus->pcib, 1016 "vmbus_pcib version negotiation failed: %x\n", 1017 comp_pkt.completion_status); 1018 ret = EPROTO; 1019 } else { 1020 ret = 0; 1021 } 1022 out: 1023 free_completion(&comp_pkt.host_event); 1024 return (ret); 1025 } 1026 1027 /* Ask the host to send along the list of child devices */ 1028 static int 1029 hv_pci_query_relations(struct hv_pcibus *hbus) 1030 { 1031 struct pci_message message; 1032 int ret; 1033 1034 message.type = PCI_QUERY_BUS_RELATIONS; 1035 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1036 &message, sizeof(message), 0); 1037 return (ret); 1038 } 1039 1040 static int 1041 hv_pci_enter_d0(struct hv_pcibus *hbus) 1042 { 1043 struct pci_bus_d0_entry *d0_entry; 1044 struct hv_pci_compl comp_pkt; 1045 struct { 1046 struct pci_packet pkt; 1047 uint8_t buffer[sizeof(struct pci_bus_d0_entry)]; 1048 } ctxt; 1049 int ret; 1050 1051 /* 1052 * Tell the host that the bus is ready to use, and moved into the 1053 * powered-on state. This includes telling the host which region 1054 * of memory-mapped I/O space has been chosen for configuration space 1055 * access. 1056 */ 1057 init_completion(&comp_pkt.host_event); 1058 1059 ctxt.pkt.completion_func = hv_pci_generic_compl; 1060 ctxt.pkt.compl_ctxt = &comp_pkt; 1061 1062 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message; 1063 memset(d0_entry, 0, sizeof(*d0_entry)); 1064 d0_entry->message_type.type = PCI_BUS_D0ENTRY; 1065 d0_entry->mmio_base = rman_get_start(hbus->cfg_res); 1066 1067 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1068 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry), 1069 (uint64_t)&ctxt.pkt); 1070 if (ret) 1071 goto out; 1072 1073 wait_for_completion(&comp_pkt.host_event); 1074 1075 if (comp_pkt.completion_status < 0) { 1076 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n"); 1077 ret = EPROTO; 1078 } else { 1079 ret = 0; 1080 } 1081 1082 out: 1083 free_completion(&comp_pkt.host_event); 1084 return (ret); 1085 } 1086 1087 /* 1088 * It looks this is only needed by Windows VM, but let's send the message too 1089 * just to make the host happy. 1090 */ 1091 static int 1092 hv_send_resources_allocated(struct hv_pcibus *hbus) 1093 { 1094 struct pci_resources_assigned *res_assigned; 1095 struct hv_pci_compl comp_pkt; 1096 struct hv_pci_dev *hpdev; 1097 struct pci_packet *pkt; 1098 uint32_t wslot; 1099 int ret = 0; 1100 1101 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned), 1102 M_DEVBUF, M_WAITOK | M_ZERO); 1103 1104 for (wslot = 0; wslot < 256; wslot++) { 1105 hpdev = get_pcichild_wslot(hbus, wslot); 1106 if (!hpdev) 1107 continue; 1108 1109 init_completion(&comp_pkt.host_event); 1110 1111 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned)); 1112 pkt->completion_func = hv_pci_generic_compl; 1113 pkt->compl_ctxt = &comp_pkt; 1114 1115 res_assigned = (struct pci_resources_assigned *)&pkt->message; 1116 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; 1117 res_assigned->wslot.val = hpdev->desc.wslot.val; 1118 1119 ret = vmbus_chan_send(hbus->sc->chan, 1120 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 1121 &pkt->message, sizeof(*res_assigned), (uint64_t)pkt); 1122 if (ret) { 1123 free_completion(&comp_pkt.host_event); 1124 break; 1125 } 1126 1127 wait_for_completion(&comp_pkt.host_event); 1128 free_completion(&comp_pkt.host_event); 1129 1130 if (comp_pkt.completion_status < 0) { 1131 ret = EPROTO; 1132 device_printf(hbus->pcib, 1133 "failed to send PCI_RESOURCES_ASSIGNED\n"); 1134 break; 1135 } 1136 } 1137 1138 free(pkt, M_DEVBUF); 1139 return (ret); 1140 } 1141 1142 static int 1143 hv_send_resources_released(struct hv_pcibus *hbus) 1144 { 1145 struct pci_child_message pkt; 1146 struct hv_pci_dev *hpdev; 1147 uint32_t wslot; 1148 int ret; 1149 1150 for (wslot = 0; wslot < 256; wslot++) { 1151 hpdev = get_pcichild_wslot(hbus, wslot); 1152 if (!hpdev) 1153 continue; 1154 1155 pkt.message_type.type = PCI_RESOURCES_RELEASED; 1156 pkt.wslot.val = hpdev->desc.wslot.val; 1157 1158 ret = vmbus_chan_send(hbus->sc->chan, 1159 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0); 1160 if (ret) 1161 return (ret); 1162 } 1163 1164 return (0); 1165 } 1166 1167 #define hv_cfg_read(x, s) \ 1168 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \ 1169 bus_size_t offset) \ 1170 { \ 1171 return (bus_read_##s(bus->cfg_res, offset)); \ 1172 } 1173 1174 #define hv_cfg_write(x, s) \ 1175 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \ 1176 bus_size_t offset, uint##x##_t val) \ 1177 { \ 1178 return (bus_write_##s(bus->cfg_res, offset, val)); \ 1179 } 1180 1181 hv_cfg_read(8, 1) 1182 hv_cfg_read(16, 2) 1183 hv_cfg_read(32, 4) 1184 1185 hv_cfg_write(8, 1) 1186 hv_cfg_write(16, 2) 1187 hv_cfg_write(32, 4) 1188 1189 static void 1190 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, 1191 uint32_t *val) 1192 { 1193 struct hv_pcibus *hbus = hpdev->hbus; 1194 bus_size_t addr = CFG_PAGE_OFFSET + where; 1195 1196 /* 1197 * If the attempt is to read the IDs or the ROM BAR, simulate that. 1198 */ 1199 if (where + size <= PCIR_COMMAND) { 1200 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size); 1201 } else if (where >= PCIR_REVID && where + size <= 1202 PCIR_CACHELNSZ) { 1203 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where - 1204 PCIR_REVID, size); 1205 } else if (where >= PCIR_SUBVEND_0 && where + size <= 1206 PCIR_BIOS) { 1207 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where - 1208 PCIR_SUBVEND_0, size); 1209 } else if (where >= PCIR_BIOS && where + size <= 1210 PCIR_CAP_PTR) { 1211 /* ROM BARs are unimplemented */ 1212 *val = 0; 1213 } else if ((where >= PCIR_INTLINE && where + size <= 1214 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) { 1215 /* 1216 * Interrupt Line and Interrupt PIN are hard-wired to zero 1217 * because this front-end only supports message-signaled 1218 * interrupts. 1219 */ 1220 *val = 0; 1221 } else if (where + size <= CFG_PAGE_SIZE) { 1222 mtx_lock(&hbus->config_lock); 1223 1224 /* Choose the function to be read. */ 1225 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1226 1227 /* Make sure the function was chosen before we start reading.*/ 1228 mb(); 1229 1230 /* Read from that function's config space. */ 1231 switch (size) { 1232 case 1: 1233 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr); 1234 break; 1235 case 2: 1236 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr); 1237 break; 1238 default: 1239 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr); 1240 break; 1241 } 1242 /* 1243 * Make sure the write was done before we release the lock, 1244 * allowing consecutive reads/writes. 1245 */ 1246 mb(); 1247 1248 mtx_unlock(&hbus->config_lock); 1249 } else { 1250 /* Invalid config read: it's unlikely to reach here. */ 1251 memset(val, 0, size); 1252 } 1253 } 1254 1255 static void 1256 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, 1257 uint32_t val) 1258 { 1259 struct hv_pcibus *hbus = hpdev->hbus; 1260 bus_size_t addr = CFG_PAGE_OFFSET + where; 1261 1262 /* SSIDs and ROM BARs are read-only */ 1263 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR) 1264 return; 1265 1266 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) { 1267 mtx_lock(&hbus->config_lock); 1268 1269 /* Choose the function to be written. */ 1270 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1271 1272 /* Make sure the function was chosen before we start writing.*/ 1273 wmb(); 1274 1275 /* Write to that function's config space. */ 1276 switch (size) { 1277 case 1: 1278 hv_cfg_write_1(hbus, addr, (uint8_t)val); 1279 break; 1280 case 2: 1281 hv_cfg_write_2(hbus, addr, (uint16_t)val); 1282 break; 1283 default: 1284 hv_cfg_write_4(hbus, addr, (uint32_t)val); 1285 break; 1286 } 1287 1288 /* 1289 * Make sure the write was done before we release the lock, 1290 * allowing consecutive reads/writes. 1291 */ 1292 mb(); 1293 1294 mtx_unlock(&hbus->config_lock); 1295 } else { 1296 /* Invalid config write: it's unlikely to reach here. */ 1297 return; 1298 } 1299 } 1300 1301 static void 1302 vmbus_pcib_set_detaching(void *arg, int pending __unused) 1303 { 1304 struct hv_pcibus *hbus = arg; 1305 1306 atomic_set_int(&hbus->detaching, 1); 1307 } 1308 1309 static void 1310 vmbus_pcib_pre_detach(struct hv_pcibus *hbus) 1311 { 1312 struct task task; 1313 1314 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus); 1315 1316 /* 1317 * Make sure the channel callback won't push any possible new 1318 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq. 1319 */ 1320 vmbus_chan_run_task(hbus->sc->chan, &task); 1321 1322 taskqueue_drain_all(hbus->sc->taskq); 1323 } 1324 1325 1326 /* 1327 * Standard probe entry point. 1328 * 1329 */ 1330 static int 1331 vmbus_pcib_probe(device_t dev) 1332 { 1333 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1334 &g_pass_through_dev_type) == 0) { 1335 device_set_desc(dev, "Hyper-V PCI Express Pass Through"); 1336 return (BUS_PROBE_DEFAULT); 1337 } 1338 return (ENXIO); 1339 } 1340 1341 /* 1342 * Standard attach entry point. 1343 * 1344 */ 1345 static int 1346 vmbus_pcib_attach(device_t dev) 1347 { 1348 const int pci_ring_size = (4 * PAGE_SIZE); 1349 const struct hyperv_guid *inst_guid; 1350 struct vmbus_channel *channel; 1351 struct vmbus_pcib_softc *sc; 1352 struct hv_pcibus *hbus; 1353 int rid = 0; 1354 int ret; 1355 1356 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO); 1357 hbus->pcib = dev; 1358 1359 channel = vmbus_get_channel(dev); 1360 inst_guid = vmbus_chan_guid_inst(channel); 1361 hbus->pci_domain = inst_guid->hv_guid[9] | 1362 (inst_guid->hv_guid[8] << 8); 1363 1364 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF); 1365 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF); 1366 TAILQ_INIT(&hbus->children); 1367 TAILQ_INIT(&hbus->dr_list); 1368 1369 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 1370 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH, 1371 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE)); 1372 1373 if (!hbus->cfg_res) { 1374 device_printf(dev, "failed to get resource for cfg window\n"); 1375 ret = ENXIO; 1376 goto free_bus; 1377 } 1378 1379 sc = device_get_softc(dev); 1380 sc->chan = channel; 1381 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); 1382 sc->hbus = hbus; 1383 1384 /* 1385 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT 1386 * messages. NB: we can't handle the messages in the channel callback 1387 * directly, because the message handlers need to send new messages 1388 * to the host and waits for the host's completion messages, which 1389 * must also be handled by the channel callback. 1390 */ 1391 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK, 1392 taskqueue_thread_enqueue, &sc->taskq); 1393 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq"); 1394 1395 hbus->sc = sc; 1396 1397 init_completion(&hbus->query_completion); 1398 hbus->query_comp = &hbus->query_completion; 1399 1400 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size, 1401 NULL, 0, vmbus_pcib_on_channel_callback, sc); 1402 if (ret) 1403 goto free_res; 1404 1405 ret = hv_pci_protocol_negotiation(hbus); 1406 if (ret) 1407 goto vmbus_close; 1408 1409 ret = hv_pci_query_relations(hbus); 1410 if (ret) 1411 goto vmbus_close; 1412 wait_for_completion(hbus->query_comp); 1413 1414 ret = hv_pci_enter_d0(hbus); 1415 if (ret) 1416 goto vmbus_close; 1417 1418 ret = hv_send_resources_allocated(hbus); 1419 if (ret) 1420 goto vmbus_close; 1421 1422 hbus->pci_bus = device_add_child(dev, "pci", -1); 1423 if (!hbus->pci_bus) { 1424 device_printf(dev, "failed to create pci bus\n"); 1425 ret = ENXIO; 1426 goto vmbus_close; 1427 } 1428 1429 bus_generic_attach(dev); 1430 1431 hbus->state = hv_pcibus_installed; 1432 1433 return (0); 1434 1435 vmbus_close: 1436 vmbus_pcib_pre_detach(hbus); 1437 vmbus_chan_close(sc->chan); 1438 free_res: 1439 taskqueue_free(sc->taskq); 1440 free_completion(&hbus->query_completion); 1441 free(sc->rx_buf, M_DEVBUF); 1442 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1443 free_bus: 1444 mtx_destroy(&hbus->device_list_lock); 1445 mtx_destroy(&hbus->config_lock); 1446 free(hbus, M_DEVBUF); 1447 return (ret); 1448 } 1449 1450 /* 1451 * Standard detach entry point 1452 */ 1453 static int 1454 vmbus_pcib_detach(device_t dev) 1455 { 1456 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1457 struct hv_pcibus *hbus = sc->hbus; 1458 struct pci_message teardown_packet; 1459 struct pci_bus_relations relations; 1460 int ret; 1461 1462 vmbus_pcib_pre_detach(hbus); 1463 1464 if (hbus->state == hv_pcibus_installed) 1465 bus_generic_detach(dev); 1466 1467 /* Delete any children which might still exist. */ 1468 memset(&relations, 0, sizeof(relations)); 1469 hv_pci_devices_present(hbus, &relations); 1470 1471 ret = hv_send_resources_released(hbus); 1472 if (ret) 1473 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n"); 1474 1475 teardown_packet.type = PCI_BUS_D0EXIT; 1476 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1477 &teardown_packet, sizeof(struct pci_message), 0); 1478 if (ret) 1479 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n"); 1480 1481 taskqueue_drain_all(hbus->sc->taskq); 1482 vmbus_chan_close(sc->chan); 1483 taskqueue_free(sc->taskq); 1484 1485 free_completion(&hbus->query_completion); 1486 free(sc->rx_buf, M_DEVBUF); 1487 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1488 1489 mtx_destroy(&hbus->device_list_lock); 1490 mtx_destroy(&hbus->config_lock); 1491 free(hbus, M_DEVBUF); 1492 1493 return (0); 1494 } 1495 1496 static int 1497 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val) 1498 { 1499 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1500 1501 switch (which) { 1502 case PCIB_IVAR_DOMAIN: 1503 *val = sc->hbus->pci_domain; 1504 return (0); 1505 1506 case PCIB_IVAR_BUS: 1507 /* There is only bus 0. */ 1508 *val = 0; 1509 return (0); 1510 } 1511 return (ENOENT); 1512 } 1513 1514 static int 1515 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val) 1516 { 1517 return (ENOENT); 1518 } 1519 1520 static struct resource * 1521 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, 1522 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 1523 { 1524 unsigned int bar_no; 1525 struct hv_pci_dev *hpdev; 1526 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1527 struct resource *res; 1528 unsigned int devfn; 1529 1530 if (type == PCI_RES_BUS) 1531 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid, 1532 start, end, count, flags)); 1533 1534 /* Devices with port I/O BAR are not supported. */ 1535 if (type == SYS_RES_IOPORT) 1536 return (NULL); 1537 1538 if (type == SYS_RES_MEMORY) { 1539 devfn = PCI_DEVFN(pci_get_slot(child), 1540 pci_get_function(child)); 1541 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1542 if (!hpdev) 1543 return (NULL); 1544 1545 bar_no = PCI_RID2BAR(*rid); 1546 if (bar_no >= MAX_NUM_BARS) 1547 return (NULL); 1548 1549 /* Make sure a 32-bit BAR gets a 32-bit address */ 1550 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64)) 1551 end = ulmin(end, 0xFFFFFFFF); 1552 } 1553 1554 res = bus_generic_alloc_resource(dev, child, type, rid, 1555 start, end, count, flags); 1556 /* 1557 * If this is a request for a specific range, assume it is 1558 * correct and pass it up to the parent. 1559 */ 1560 if (res == NULL && start + count - 1 == end) 1561 res = bus_generic_alloc_resource(dev, child, type, rid, 1562 start, end, count, flags); 1563 return (res); 1564 } 1565 1566 static int 1567 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid, 1568 struct resource *r) 1569 { 1570 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1571 1572 if (type == PCI_RES_BUS) 1573 return (pci_domain_release_bus(sc->hbus->pci_domain, child, 1574 rid, r)); 1575 1576 if (type == SYS_RES_IOPORT) 1577 return (EINVAL); 1578 1579 return (bus_generic_release_resource(dev, child, type, rid, r)); 1580 } 1581 1582 #if __FreeBSD_version >= 1100000 1583 static int 1584 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op, 1585 size_t setsize, cpuset_t *cpuset) 1586 { 1587 return (bus_get_cpus(pcib, op, setsize, cpuset)); 1588 } 1589 #endif 1590 1591 static uint32_t 1592 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, 1593 u_int reg, int bytes) 1594 { 1595 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1596 struct hv_pci_dev *hpdev; 1597 unsigned int devfn = PCI_DEVFN(slot, func); 1598 uint32_t data = 0; 1599 1600 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1601 1602 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1603 if (!hpdev) 1604 return (~0); 1605 1606 _hv_pcifront_read_config(hpdev, reg, bytes, &data); 1607 1608 return (data); 1609 } 1610 1611 static void 1612 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, 1613 u_int reg, uint32_t data, int bytes) 1614 { 1615 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1616 struct hv_pci_dev *hpdev; 1617 unsigned int devfn = PCI_DEVFN(slot, func); 1618 1619 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1620 1621 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1622 if (!hpdev) 1623 return; 1624 1625 _hv_pcifront_write_config(hpdev, reg, bytes, data); 1626 } 1627 1628 static int 1629 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin) 1630 { 1631 /* We only support MSI/MSI-X and don't support INTx interrupt. */ 1632 return (PCI_INVALID_IRQ); 1633 } 1634 1635 static int 1636 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count, 1637 int maxcount, int *irqs) 1638 { 1639 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount, 1640 irqs)); 1641 } 1642 1643 static int 1644 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) 1645 { 1646 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs)); 1647 } 1648 1649 static int 1650 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) 1651 { 1652 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq)); 1653 } 1654 1655 static int 1656 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq) 1657 { 1658 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq)); 1659 } 1660 1661 #define MSI_INTEL_ADDR_DEST 0x000ff000 1662 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ 1663 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED 1664 1665 static int 1666 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq, 1667 uint64_t *addr, uint32_t *data) 1668 { 1669 unsigned int devfn; 1670 struct hv_pci_dev *hpdev; 1671 1672 uint64_t v_addr; 1673 uint32_t v_data; 1674 struct hv_irq_desc *hid, *tmp_hid; 1675 unsigned int cpu, vcpu_id; 1676 unsigned int vector; 1677 1678 struct vmbus_pcib_softc *sc = device_get_softc(pcib); 1679 struct pci_create_interrupt *int_pkt; 1680 struct compose_comp_ctxt comp; 1681 struct { 1682 struct pci_packet pkt; 1683 uint8_t buffer[sizeof(struct pci_create_interrupt)]; 1684 } ctxt; 1685 1686 int ret; 1687 1688 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); 1689 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1690 if (!hpdev) 1691 return (ENOENT); 1692 1693 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq, 1694 &v_addr, &v_data); 1695 if (ret) 1696 return (ret); 1697 1698 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) { 1699 if (hid->irq == irq) { 1700 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link); 1701 hv_int_desc_free(hpdev, hid); 1702 break; 1703 } 1704 } 1705 1706 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12; 1707 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); 1708 vector = v_data & MSI_INTEL_DATA_INTVEC; 1709 1710 init_completion(&comp.comp_pkt.host_event); 1711 1712 memset(&ctxt, 0, sizeof(ctxt)); 1713 ctxt.pkt.completion_func = hv_pci_compose_compl; 1714 ctxt.pkt.compl_ctxt = ∁ 1715 1716 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message; 1717 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; 1718 int_pkt->wslot.val = hpdev->desc.wslot.val; 1719 int_pkt->int_desc.vector = vector; 1720 int_pkt->int_desc.vector_count = 1; 1721 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; 1722 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id; 1723 1724 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1725 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt), 1726 (uint64_t)&ctxt.pkt); 1727 if (ret) { 1728 free_completion(&comp.comp_pkt.host_event); 1729 return (ret); 1730 } 1731 1732 wait_for_completion(&comp.comp_pkt.host_event); 1733 free_completion(&comp.comp_pkt.host_event); 1734 1735 if (comp.comp_pkt.completion_status < 0) 1736 return (EPROTO); 1737 1738 *addr = comp.int_desc.address; 1739 *data = comp.int_desc.data; 1740 1741 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO); 1742 hid->irq = irq; 1743 hid->desc = comp.int_desc; 1744 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link); 1745 1746 return (0); 1747 } 1748 1749 static device_method_t vmbus_pcib_methods[] = { 1750 /* Device interface */ 1751 DEVMETHOD(device_probe, vmbus_pcib_probe), 1752 DEVMETHOD(device_attach, vmbus_pcib_attach), 1753 DEVMETHOD(device_detach, vmbus_pcib_detach), 1754 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1755 DEVMETHOD(device_suspend, bus_generic_suspend), 1756 DEVMETHOD(device_resume, bus_generic_resume), 1757 1758 /* Bus interface */ 1759 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar), 1760 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar), 1761 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource), 1762 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource), 1763 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 1764 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 1765 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), 1766 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), 1767 #if __FreeBSD_version >= 1100000 1768 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus), 1769 #endif 1770 1771 /* pcib interface */ 1772 DEVMETHOD(pcib_maxslots, pcib_maxslots), 1773 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config), 1774 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config), 1775 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr), 1776 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi), 1777 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi), 1778 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix), 1779 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix), 1780 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi), 1781 1782 DEVMETHOD_END 1783 }; 1784 1785 static devclass_t pcib_devclass; 1786 1787 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods, 1788 sizeof(struct vmbus_pcib_softc)); 1789 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, pcib_devclass, 0, 0); 1790 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1); 1791 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1); 1792