1 /*- 2 * Copyright (c) 2016 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #ifdef NEW_PCIB 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/types.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/kernel.h> 38 #include <sys/queue.h> 39 #include <sys/lock.h> 40 #include <sys/sx.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/bus.h> 44 #include <sys/rman.h> 45 #include <sys/mutex.h> 46 #include <sys/errno.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_param.h> 50 #include <vm/vm_kern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/atomic.h> 54 #include <machine/bus.h> 55 #include <machine/frame.h> 56 #include <machine/pci_cfgreg.h> 57 #include <machine/resource.h> 58 59 #include <sys/pciio.h> 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 #include <dev/pci/pci_private.h> 63 #include <dev/pci/pcib_private.h> 64 #include "pcib_if.h" 65 66 #include <machine/intr_machdep.h> 67 #include <x86/apicreg.h> 68 69 #include <dev/hyperv/include/hyperv.h> 70 #include <dev/hyperv/include/hyperv_busdma.h> 71 #include <dev/hyperv/include/vmbus_xact.h> 72 #include <dev/hyperv/vmbus/vmbus_reg.h> 73 #include <dev/hyperv/vmbus/vmbus_chanvar.h> 74 75 #include "vmbus_if.h" 76 77 #if __FreeBSD_version < 1100000 78 typedef u_long rman_res_t; 79 #define RM_MAX_END (~(rman_res_t)0) 80 #endif 81 82 struct completion { 83 unsigned int done; 84 struct mtx lock; 85 }; 86 87 static void 88 init_completion(struct completion *c) 89 { 90 memset(c, 0, sizeof(*c)); 91 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF); 92 c->done = 0; 93 } 94 95 static void 96 free_completion(struct completion *c) 97 { 98 mtx_destroy(&c->lock); 99 } 100 101 static void 102 complete(struct completion *c) 103 { 104 mtx_lock(&c->lock); 105 c->done++; 106 mtx_unlock(&c->lock); 107 wakeup(c); 108 } 109 110 static void 111 wait_for_completion(struct completion *c) 112 { 113 mtx_lock(&c->lock); 114 while (c->done == 0) 115 mtx_sleep(c, &c->lock, 0, "hvwfc", 0); 116 c->done--; 117 mtx_unlock(&c->lock); 118 } 119 120 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major))) 121 122 enum { 123 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), 124 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 125 }; 126 127 #define PCI_CONFIG_MMIO_LENGTH 0x2000 128 #define CFG_PAGE_OFFSET 0x1000 129 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) 130 131 /* 132 * Message Types 133 */ 134 135 enum pci_message_type { 136 /* 137 * Version 1.1 138 */ 139 PCI_MESSAGE_BASE = 0x42490000, 140 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0, 141 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1, 142 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4, 143 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5, 144 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6, 145 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7, 146 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8, 147 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9, 148 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA, 149 PCI_EJECT = PCI_MESSAGE_BASE + 0xB, 150 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC, 151 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD, 152 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE, 153 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF, 154 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10, 155 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11, 156 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12, 157 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13, 158 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14, 159 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15, 160 PCI_MESSAGE_MAXIMUM 161 }; 162 163 /* 164 * Structures defining the virtual PCI Express protocol. 165 */ 166 167 union pci_version { 168 struct { 169 uint16_t minor_version; 170 uint16_t major_version; 171 } parts; 172 uint32_t version; 173 } __packed; 174 175 /* 176 * This representation is the one used in Windows, which is 177 * what is expected when sending this back and forth with 178 * the Hyper-V parent partition. 179 */ 180 union win_slot_encoding { 181 struct { 182 uint32_t slot:5; 183 uint32_t func:3; 184 uint32_t reserved:24; 185 } bits; 186 uint32_t val; 187 } __packed; 188 189 struct pci_func_desc { 190 uint16_t v_id; /* vendor ID */ 191 uint16_t d_id; /* device ID */ 192 uint8_t rev; 193 uint8_t prog_intf; 194 uint8_t subclass; 195 uint8_t base_class; 196 uint32_t subsystem_id; 197 union win_slot_encoding wslot; 198 uint32_t ser; /* serial number */ 199 } __packed; 200 201 struct hv_msi_desc { 202 uint8_t vector; 203 uint8_t delivery_mode; 204 uint16_t vector_count; 205 uint32_t reserved; 206 uint64_t cpu_mask; 207 } __packed; 208 209 struct tran_int_desc { 210 uint16_t reserved; 211 uint16_t vector_count; 212 uint32_t data; 213 uint64_t address; 214 } __packed; 215 216 struct pci_message { 217 uint32_t type; 218 } __packed; 219 220 struct pci_child_message { 221 struct pci_message message_type; 222 union win_slot_encoding wslot; 223 } __packed; 224 225 struct pci_incoming_message { 226 struct vmbus_chanpkt_hdr hdr; 227 struct pci_message message_type; 228 } __packed; 229 230 struct pci_response { 231 struct vmbus_chanpkt_hdr hdr; 232 int32_t status; /* negative values are failures */ 233 } __packed; 234 235 struct pci_packet { 236 void (*completion_func)(void *context, struct pci_response *resp, 237 int resp_packet_size); 238 void *compl_ctxt; 239 240 struct pci_message message[0]; 241 }; 242 243 /* 244 * Specific message types supporting the PCI protocol. 245 */ 246 247 struct pci_version_request { 248 struct pci_message message_type; 249 uint32_t protocol_version; 250 uint32_t is_last_attempt:1; 251 uint32_t reservedz:31; 252 } __packed; 253 254 struct pci_bus_d0_entry { 255 struct pci_message message_type; 256 uint32_t reserved; 257 uint64_t mmio_base; 258 } __packed; 259 260 struct pci_bus_relations { 261 struct pci_incoming_message incoming; 262 uint32_t device_count; 263 struct pci_func_desc func[0]; 264 } __packed; 265 266 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1) 267 struct pci_q_res_req_response { 268 struct vmbus_chanpkt_hdr hdr; 269 int32_t status; /* negative values are failures */ 270 uint32_t probed_bar[MAX_NUM_BARS]; 271 } __packed; 272 273 struct pci_resources_assigned { 274 struct pci_message message_type; 275 union win_slot_encoding wslot; 276 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */ 277 uint32_t msi_descriptors; 278 uint32_t reserved[4]; 279 } __packed; 280 281 struct pci_create_interrupt { 282 struct pci_message message_type; 283 union win_slot_encoding wslot; 284 struct hv_msi_desc int_desc; 285 } __packed; 286 287 struct pci_create_int_response { 288 struct pci_response response; 289 uint32_t reserved; 290 struct tran_int_desc int_desc; 291 } __packed; 292 293 struct pci_delete_interrupt { 294 struct pci_message message_type; 295 union win_slot_encoding wslot; 296 struct tran_int_desc int_desc; 297 } __packed; 298 299 struct pci_dev_incoming { 300 struct pci_incoming_message incoming; 301 union win_slot_encoding wslot; 302 } __packed; 303 304 struct pci_eject_response { 305 struct pci_message message_type; 306 union win_slot_encoding wslot; 307 uint32_t status; 308 } __packed; 309 310 /* 311 * Driver specific state. 312 */ 313 314 enum hv_pcibus_state { 315 hv_pcibus_init = 0, 316 hv_pcibus_installed, 317 }; 318 319 struct hv_pcibus { 320 device_t pcib; 321 device_t pci_bus; 322 struct vmbus_pcib_softc *sc; 323 324 uint16_t pci_domain; 325 326 enum hv_pcibus_state state; 327 328 struct resource *cfg_res; 329 330 struct completion query_completion, *query_comp; 331 332 struct mtx config_lock; /* Avoid two threads writing index page */ 333 struct mtx device_list_lock; /* Protect lists below */ 334 TAILQ_HEAD(, hv_pci_dev) children; 335 TAILQ_HEAD(, hv_dr_state) dr_list; 336 337 volatile int detaching; 338 }; 339 340 struct hv_pci_dev { 341 TAILQ_ENTRY(hv_pci_dev) link; 342 343 struct pci_func_desc desc; 344 345 bool reported_missing; 346 347 struct hv_pcibus *hbus; 348 struct task eject_task; 349 350 TAILQ_HEAD(, hv_irq_desc) irq_desc_list; 351 352 /* 353 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then 354 * read it back, for each of the BAR offsets within config space. 355 */ 356 uint32_t probed_bar[MAX_NUM_BARS]; 357 }; 358 359 /* 360 * Tracks "Device Relations" messages from the host, which must be both 361 * processed in order. 362 */ 363 struct hv_dr_work { 364 struct task task; 365 struct hv_pcibus *bus; 366 }; 367 368 struct hv_dr_state { 369 TAILQ_ENTRY(hv_dr_state) link; 370 uint32_t device_count; 371 struct pci_func_desc func[0]; 372 }; 373 374 struct hv_irq_desc { 375 TAILQ_ENTRY(hv_irq_desc) link; 376 struct tran_int_desc desc; 377 int irq; 378 }; 379 380 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) 381 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) 382 #define PCI_FUNC(devfn) ((devfn) & 0x07) 383 384 static uint32_t 385 devfn_to_wslot(unsigned int devfn) 386 { 387 union win_slot_encoding wslot; 388 389 wslot.val = 0; 390 wslot.bits.slot = PCI_SLOT(devfn); 391 wslot.bits.func = PCI_FUNC(devfn); 392 393 return (wslot.val); 394 } 395 396 static unsigned int 397 wslot_to_devfn(uint32_t wslot) 398 { 399 union win_slot_encoding encoding; 400 unsigned int slot; 401 unsigned int func; 402 403 encoding.val = wslot; 404 405 slot = encoding.bits.slot; 406 func = encoding.bits.func; 407 408 return (PCI_DEVFN(slot, func)); 409 } 410 411 struct vmbus_pcib_softc { 412 struct vmbus_channel *chan; 413 void *rx_buf; 414 415 struct taskqueue *taskq; 416 417 struct hv_pcibus *hbus; 418 }; 419 420 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */ 421 static const struct hyperv_guid g_pass_through_dev_type = { 422 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, 423 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F} 424 }; 425 426 struct hv_pci_compl { 427 struct completion host_event; 428 int32_t completion_status; 429 }; 430 431 struct q_res_req_compl { 432 struct completion host_event; 433 struct hv_pci_dev *hpdev; 434 }; 435 436 struct compose_comp_ctxt { 437 struct hv_pci_compl comp_pkt; 438 struct tran_int_desc int_desc; 439 }; 440 441 static void 442 hv_pci_generic_compl(void *context, struct pci_response *resp, 443 int resp_packet_size) 444 { 445 struct hv_pci_compl *comp_pkt = context; 446 447 if (resp_packet_size >= sizeof(struct pci_response)) 448 comp_pkt->completion_status = resp->status; 449 else 450 comp_pkt->completion_status = -1; 451 452 complete(&comp_pkt->host_event); 453 } 454 455 static void 456 q_resource_requirements(void *context, struct pci_response *resp, 457 int resp_packet_size) 458 { 459 struct q_res_req_compl *completion = context; 460 struct pci_q_res_req_response *q_res_req = 461 (struct pci_q_res_req_response *)resp; 462 int i; 463 464 if (resp->status < 0) { 465 printf("vmbus_pcib: failed to query resource requirements\n"); 466 } else { 467 for (i = 0; i < MAX_NUM_BARS; i++) 468 completion->hpdev->probed_bar[i] = 469 q_res_req->probed_bar[i]; 470 } 471 472 complete(&completion->host_event); 473 } 474 475 static void 476 hv_pci_compose_compl(void *context, struct pci_response *resp, 477 int resp_packet_size) 478 { 479 struct compose_comp_ctxt *comp_pkt = context; 480 struct pci_create_int_response *int_resp = 481 (struct pci_create_int_response *)resp; 482 483 comp_pkt->comp_pkt.completion_status = resp->status; 484 comp_pkt->int_desc = int_resp->int_desc; 485 complete(&comp_pkt->comp_pkt.host_event); 486 } 487 488 static void 489 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid) 490 { 491 struct pci_delete_interrupt *int_pkt; 492 struct { 493 struct pci_packet pkt; 494 uint8_t buffer[sizeof(struct pci_delete_interrupt)]; 495 } ctxt; 496 497 memset(&ctxt, 0, sizeof(ctxt)); 498 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; 499 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; 500 int_pkt->wslot.val = hpdev->desc.wslot.val; 501 int_pkt->int_desc = hid->desc; 502 503 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 504 int_pkt, sizeof(*int_pkt), 0); 505 506 free(hid, M_DEVBUF); 507 } 508 509 static void 510 hv_pci_delete_device(struct hv_pci_dev *hpdev) 511 { 512 struct hv_pcibus *hbus = hpdev->hbus; 513 struct hv_irq_desc *hid, *tmp_hid; 514 device_t pci_dev; 515 int devfn; 516 517 devfn = wslot_to_devfn(hpdev->desc.wslot.val); 518 519 mtx_lock(&Giant); 520 521 pci_dev = pci_find_dbsf(hbus->pci_domain, 522 0, PCI_SLOT(devfn), PCI_FUNC(devfn)); 523 if (pci_dev) 524 device_delete_child(hbus->pci_bus, pci_dev); 525 526 mtx_unlock(&Giant); 527 528 mtx_lock(&hbus->device_list_lock); 529 TAILQ_REMOVE(&hbus->children, hpdev, link); 530 mtx_unlock(&hbus->device_list_lock); 531 532 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) 533 hv_int_desc_free(hpdev, hid); 534 535 free(hpdev, M_DEVBUF); 536 } 537 538 static struct hv_pci_dev * 539 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc) 540 { 541 struct hv_pci_dev *hpdev; 542 struct pci_child_message *res_req; 543 struct q_res_req_compl comp_pkt; 544 struct { 545 struct pci_packet pkt; 546 uint8_t buffer[sizeof(struct pci_child_message)]; 547 } ctxt; 548 int ret; 549 550 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO); 551 hpdev->hbus = hbus; 552 553 TAILQ_INIT(&hpdev->irq_desc_list); 554 555 init_completion(&comp_pkt.host_event); 556 comp_pkt.hpdev = hpdev; 557 558 ctxt.pkt.compl_ctxt = &comp_pkt; 559 ctxt.pkt.completion_func = q_resource_requirements; 560 561 res_req = (struct pci_child_message *)&ctxt.pkt.message; 562 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; 563 res_req->wslot.val = desc->wslot.val; 564 565 ret = vmbus_chan_send(hbus->sc->chan, 566 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 567 res_req, sizeof(*res_req), (uint64_t)&ctxt.pkt); 568 if (ret) 569 goto err; 570 571 wait_for_completion(&comp_pkt.host_event); 572 free_completion(&comp_pkt.host_event); 573 574 hpdev->desc = *desc; 575 576 mtx_lock(&hbus->device_list_lock); 577 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link); 578 mtx_unlock(&hbus->device_list_lock); 579 return (hpdev); 580 err: 581 free_completion(&comp_pkt.host_event); 582 free(hpdev, M_DEVBUF); 583 return (NULL); 584 } 585 586 #if __FreeBSD_version < 1100000 587 588 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */ 589 590 static struct pci_devinfo * 591 pci_identify_function(device_t pcib, device_t dev, int domain, int busno, 592 int slot, int func, size_t dinfo_size) 593 { 594 struct pci_devinfo *dinfo; 595 596 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size); 597 if (dinfo != NULL) 598 pci_add_child(dev, dinfo); 599 600 return (dinfo); 601 } 602 603 static int 604 pci_rescan(device_t dev) 605 { 606 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w) 607 device_t pcib = device_get_parent(dev); 608 struct pci_softc *sc; 609 device_t child, *devlist, *unchanged; 610 int devcount, error, i, j, maxslots, oldcount; 611 int busno, domain, s, f, pcifunchigh; 612 uint8_t hdrtype; 613 614 /* No need to check for ARI on a rescan. */ 615 error = device_get_children(dev, &devlist, &devcount); 616 if (error) 617 return (error); 618 if (devcount != 0) { 619 unchanged = malloc(devcount * sizeof(device_t), M_TEMP, 620 M_NOWAIT | M_ZERO); 621 if (unchanged == NULL) { 622 free(devlist, M_TEMP); 623 return (ENOMEM); 624 } 625 } else 626 unchanged = NULL; 627 628 sc = device_get_softc(dev); 629 domain = pcib_get_domain(dev); 630 busno = pcib_get_bus(dev); 631 maxslots = PCIB_MAXSLOTS(pcib); 632 for (s = 0; s <= maxslots; s++) { 633 /* If function 0 is not present, skip to the next slot. */ 634 f = 0; 635 if (REG(PCIR_VENDOR, 2) == 0xffff) 636 continue; 637 pcifunchigh = 0; 638 hdrtype = REG(PCIR_HDRTYPE, 1); 639 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) 640 continue; 641 if (hdrtype & PCIM_MFDEV) 642 pcifunchigh = PCIB_MAXFUNCS(pcib); 643 for (f = 0; f <= pcifunchigh; f++) { 644 if (REG(PCIR_VENDOR, 2) == 0xffff) 645 continue; 646 647 /* 648 * Found a valid function. Check if a 649 * device_t for this device already exists. 650 */ 651 for (i = 0; i < devcount; i++) { 652 child = devlist[i]; 653 if (child == NULL) 654 continue; 655 if (pci_get_slot(child) == s && 656 pci_get_function(child) == f) { 657 unchanged[i] = child; 658 goto next_func; 659 } 660 } 661 662 pci_identify_function(pcib, dev, domain, busno, s, f, 663 sizeof(struct pci_devinfo)); 664 next_func:; 665 } 666 } 667 668 /* Remove devices that are no longer present. */ 669 for (i = 0; i < devcount; i++) { 670 if (unchanged[i] != NULL) 671 continue; 672 device_delete_child(dev, devlist[i]); 673 } 674 675 free(devlist, M_TEMP); 676 oldcount = devcount; 677 678 /* Try to attach the devices just added. */ 679 error = device_get_children(dev, &devlist, &devcount); 680 if (error) { 681 free(unchanged, M_TEMP); 682 return (error); 683 } 684 685 for (i = 0; i < devcount; i++) { 686 for (j = 0; j < oldcount; j++) { 687 if (devlist[i] == unchanged[j]) 688 goto next_device; 689 } 690 691 device_probe_and_attach(devlist[i]); 692 next_device:; 693 } 694 695 free(unchanged, M_TEMP); 696 free(devlist, M_TEMP); 697 return (0); 698 #undef REG 699 } 700 701 #else 702 703 static int 704 pci_rescan(device_t dev) 705 { 706 return (BUS_RESCAN(dev)); 707 } 708 709 #endif 710 711 static void 712 pci_devices_present_work(void *arg, int pending __unused) 713 { 714 struct hv_dr_work *dr_wrk = arg; 715 struct hv_dr_state *dr = NULL; 716 struct hv_pcibus *hbus; 717 uint32_t child_no; 718 bool found; 719 struct pci_func_desc *new_desc; 720 struct hv_pci_dev *hpdev, *tmp_hpdev; 721 struct completion *query_comp; 722 bool need_rescan = false; 723 724 hbus = dr_wrk->bus; 725 free(dr_wrk, M_DEVBUF); 726 727 /* Pull this off the queue and process it if it was the last one. */ 728 mtx_lock(&hbus->device_list_lock); 729 while (!TAILQ_EMPTY(&hbus->dr_list)) { 730 dr = TAILQ_FIRST(&hbus->dr_list); 731 TAILQ_REMOVE(&hbus->dr_list, dr, link); 732 733 /* Throw this away if the list still has stuff in it. */ 734 if (!TAILQ_EMPTY(&hbus->dr_list)) { 735 free(dr, M_DEVBUF); 736 continue; 737 } 738 } 739 mtx_unlock(&hbus->device_list_lock); 740 741 if (!dr) 742 return; 743 744 /* First, mark all existing children as reported missing. */ 745 mtx_lock(&hbus->device_list_lock); 746 TAILQ_FOREACH(hpdev, &hbus->children, link) 747 hpdev->reported_missing = true; 748 mtx_unlock(&hbus->device_list_lock); 749 750 /* Next, add back any reported devices. */ 751 for (child_no = 0; child_no < dr->device_count; child_no++) { 752 found = false; 753 new_desc = &dr->func[child_no]; 754 755 mtx_lock(&hbus->device_list_lock); 756 TAILQ_FOREACH(hpdev, &hbus->children, link) { 757 if ((hpdev->desc.wslot.val == 758 new_desc->wslot.val) && 759 (hpdev->desc.v_id == new_desc->v_id) && 760 (hpdev->desc.d_id == new_desc->d_id) && 761 (hpdev->desc.ser == new_desc->ser)) { 762 hpdev->reported_missing = false; 763 found = true; 764 break; 765 } 766 } 767 mtx_unlock(&hbus->device_list_lock); 768 769 if (!found) { 770 if (!need_rescan) 771 need_rescan = true; 772 773 hpdev = new_pcichild_device(hbus, new_desc); 774 if (!hpdev) 775 printf("vmbus_pcib: failed to add a child\n"); 776 } 777 } 778 779 /* Remove missing device(s), if any */ 780 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) { 781 if (hpdev->reported_missing) 782 hv_pci_delete_device(hpdev); 783 } 784 785 /* Rescan the bus to find any new device, if necessary. */ 786 if (hbus->state == hv_pcibus_installed && need_rescan) 787 pci_rescan(hbus->pci_bus); 788 789 /* Wake up hv_pci_query_relations(), if it's waiting. */ 790 query_comp = hbus->query_comp; 791 if (query_comp) { 792 hbus->query_comp = NULL; 793 complete(query_comp); 794 } 795 796 free(dr, M_DEVBUF); 797 } 798 799 static struct hv_pci_dev * 800 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot) 801 { 802 struct hv_pci_dev *hpdev, *ret = NULL; 803 804 mtx_lock(&hbus->device_list_lock); 805 TAILQ_FOREACH(hpdev, &hbus->children, link) { 806 if (hpdev->desc.wslot.val == wslot) { 807 ret = hpdev; 808 break; 809 } 810 } 811 mtx_unlock(&hbus->device_list_lock); 812 813 return (ret); 814 } 815 816 static void 817 hv_pci_devices_present(struct hv_pcibus *hbus, 818 struct pci_bus_relations *relations) 819 { 820 struct hv_dr_state *dr; 821 struct hv_dr_work *dr_wrk; 822 unsigned long dr_size; 823 824 if (hbus->detaching && relations->device_count > 0) 825 return; 826 827 dr_size = offsetof(struct hv_dr_state, func) + 828 (sizeof(struct pci_func_desc) * relations->device_count); 829 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); 830 831 dr->device_count = relations->device_count; 832 if (dr->device_count != 0) 833 memcpy(dr->func, relations->func, 834 sizeof(struct pci_func_desc) * dr->device_count); 835 836 mtx_lock(&hbus->device_list_lock); 837 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); 838 mtx_unlock(&hbus->device_list_lock); 839 840 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); 841 dr_wrk->bus = hbus; 842 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); 843 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); 844 } 845 846 static void 847 hv_eject_device_work(void *arg, int pending __unused) 848 { 849 struct hv_pci_dev *hpdev = arg; 850 union win_slot_encoding wslot = hpdev->desc.wslot; 851 struct hv_pcibus *hbus = hpdev->hbus; 852 struct pci_eject_response *eject_pkt; 853 struct { 854 struct pci_packet pkt; 855 uint8_t buffer[sizeof(struct pci_eject_response)]; 856 } ctxt; 857 858 hv_pci_delete_device(hpdev); 859 860 memset(&ctxt, 0, sizeof(ctxt)); 861 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message; 862 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE; 863 eject_pkt->wslot.val = wslot.val; 864 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 865 eject_pkt, sizeof(*eject_pkt), 0); 866 } 867 868 static void 869 hv_pci_eject_device(struct hv_pci_dev *hpdev) 870 { 871 struct hv_pcibus *hbus = hpdev->hbus; 872 struct taskqueue *taskq; 873 874 if (hbus->detaching) 875 return; 876 877 /* 878 * Push this task into the same taskqueue on which 879 * vmbus_pcib_attach() runs, so we're sure this task can't run 880 * concurrently with vmbus_pcib_attach(). 881 */ 882 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev); 883 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan); 884 taskqueue_enqueue(taskq, &hpdev->eject_task); 885 } 886 887 #define PCIB_PACKET_SIZE 0x100 888 889 static void 890 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg) 891 { 892 struct vmbus_pcib_softc *sc = arg; 893 struct hv_pcibus *hbus = sc->hbus; 894 895 void *buffer; 896 int bufferlen = PCIB_PACKET_SIZE; 897 898 struct pci_packet *comp_packet; 899 struct pci_response *response; 900 struct pci_incoming_message *new_msg; 901 struct pci_bus_relations *bus_rel; 902 struct pci_dev_incoming *dev_msg; 903 struct hv_pci_dev *hpdev; 904 905 buffer = sc->rx_buf; 906 do { 907 struct vmbus_chanpkt_hdr *pkt = buffer; 908 uint32_t bytes_rxed; 909 int ret; 910 911 bytes_rxed = bufferlen; 912 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); 913 914 if (ret == ENOBUFS) { 915 /* Handle large packet */ 916 if (bufferlen > PCIB_PACKET_SIZE) { 917 free(buffer, M_DEVBUF); 918 buffer = NULL; 919 } 920 921 /* alloc new buffer */ 922 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO); 923 bufferlen = bytes_rxed; 924 925 continue; 926 } 927 928 if (ret != 0) { 929 /* ignore EIO or EAGAIN */ 930 break; 931 } 932 933 if (bytes_rxed <= sizeof(struct pci_response)) 934 continue; 935 936 switch (pkt->cph_type) { 937 case VMBUS_CHANPKT_TYPE_COMP: 938 comp_packet = (struct pci_packet *)pkt->cph_xactid; 939 response = (struct pci_response *)pkt; 940 comp_packet->completion_func(comp_packet->compl_ctxt, 941 response, bytes_rxed); 942 break; 943 case VMBUS_CHANPKT_TYPE_INBAND: 944 new_msg = (struct pci_incoming_message *)buffer; 945 946 switch (new_msg->message_type.type) { 947 case PCI_BUS_RELATIONS: 948 bus_rel = (struct pci_bus_relations *)buffer; 949 950 if (bus_rel->device_count == 0) 951 break; 952 953 if (bytes_rxed < 954 offsetof(struct pci_bus_relations, func) + 955 (sizeof(struct pci_func_desc) * 956 (bus_rel->device_count))) 957 break; 958 959 hv_pci_devices_present(hbus, bus_rel); 960 break; 961 962 case PCI_EJECT: 963 dev_msg = (struct pci_dev_incoming *)buffer; 964 hpdev = get_pcichild_wslot(hbus, 965 dev_msg->wslot.val); 966 967 if (hpdev) 968 hv_pci_eject_device(hpdev); 969 970 break; 971 default: 972 printf("vmbus_pcib: Unknown msg type 0x%x\n", 973 new_msg->message_type.type); 974 break; 975 } 976 break; 977 default: 978 printf("vmbus_pcib: Unknown VMBus msg type %hd\n", 979 pkt->cph_type); 980 break; 981 } 982 } while (1); 983 984 if (bufferlen > PCIB_PACKET_SIZE) 985 free(buffer, M_DEVBUF); 986 } 987 988 static int 989 hv_pci_protocol_negotiation(struct hv_pcibus *hbus) 990 { 991 struct pci_version_request *version_req; 992 struct hv_pci_compl comp_pkt; 993 struct { 994 struct pci_packet pkt; 995 uint8_t buffer[sizeof(struct pci_version_request)]; 996 } ctxt; 997 int ret; 998 999 init_completion(&comp_pkt.host_event); 1000 1001 ctxt.pkt.completion_func = hv_pci_generic_compl; 1002 ctxt.pkt.compl_ctxt = &comp_pkt; 1003 version_req = (struct pci_version_request *)&ctxt.pkt.message; 1004 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; 1005 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT; 1006 version_req->is_last_attempt = 1; 1007 1008 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1009 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req), 1010 (uint64_t)&ctxt.pkt); 1011 if (ret) 1012 goto out; 1013 1014 wait_for_completion(&comp_pkt.host_event); 1015 1016 if (comp_pkt.completion_status < 0) { 1017 device_printf(hbus->pcib, 1018 "vmbus_pcib version negotiation failed: %x\n", 1019 comp_pkt.completion_status); 1020 ret = EPROTO; 1021 } else { 1022 ret = 0; 1023 } 1024 out: 1025 free_completion(&comp_pkt.host_event); 1026 return (ret); 1027 } 1028 1029 /* Ask the host to send along the list of child devices */ 1030 static int 1031 hv_pci_query_relations(struct hv_pcibus *hbus) 1032 { 1033 struct pci_message message; 1034 int ret; 1035 1036 message.type = PCI_QUERY_BUS_RELATIONS; 1037 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1038 &message, sizeof(message), 0); 1039 return (ret); 1040 } 1041 1042 static int 1043 hv_pci_enter_d0(struct hv_pcibus *hbus) 1044 { 1045 struct pci_bus_d0_entry *d0_entry; 1046 struct hv_pci_compl comp_pkt; 1047 struct { 1048 struct pci_packet pkt; 1049 uint8_t buffer[sizeof(struct pci_bus_d0_entry)]; 1050 } ctxt; 1051 int ret; 1052 1053 /* 1054 * Tell the host that the bus is ready to use, and moved into the 1055 * powered-on state. This includes telling the host which region 1056 * of memory-mapped I/O space has been chosen for configuration space 1057 * access. 1058 */ 1059 init_completion(&comp_pkt.host_event); 1060 1061 ctxt.pkt.completion_func = hv_pci_generic_compl; 1062 ctxt.pkt.compl_ctxt = &comp_pkt; 1063 1064 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message; 1065 memset(d0_entry, 0, sizeof(*d0_entry)); 1066 d0_entry->message_type.type = PCI_BUS_D0ENTRY; 1067 d0_entry->mmio_base = rman_get_start(hbus->cfg_res); 1068 1069 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1070 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry), 1071 (uint64_t)&ctxt.pkt); 1072 if (ret) 1073 goto out; 1074 1075 wait_for_completion(&comp_pkt.host_event); 1076 1077 if (comp_pkt.completion_status < 0) { 1078 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n"); 1079 ret = EPROTO; 1080 } else { 1081 ret = 0; 1082 } 1083 1084 out: 1085 free_completion(&comp_pkt.host_event); 1086 return (ret); 1087 } 1088 1089 /* 1090 * It looks this is only needed by Windows VM, but let's send the message too 1091 * just to make the host happy. 1092 */ 1093 static int 1094 hv_send_resources_allocated(struct hv_pcibus *hbus) 1095 { 1096 struct pci_resources_assigned *res_assigned; 1097 struct hv_pci_compl comp_pkt; 1098 struct hv_pci_dev *hpdev; 1099 struct pci_packet *pkt; 1100 uint32_t wslot; 1101 int ret = 0; 1102 1103 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned), 1104 M_DEVBUF, M_WAITOK | M_ZERO); 1105 1106 for (wslot = 0; wslot < 256; wslot++) { 1107 hpdev = get_pcichild_wslot(hbus, wslot); 1108 if (!hpdev) 1109 continue; 1110 1111 init_completion(&comp_pkt.host_event); 1112 1113 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned)); 1114 pkt->completion_func = hv_pci_generic_compl; 1115 pkt->compl_ctxt = &comp_pkt; 1116 1117 res_assigned = (struct pci_resources_assigned *)&pkt->message; 1118 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; 1119 res_assigned->wslot.val = hpdev->desc.wslot.val; 1120 1121 ret = vmbus_chan_send(hbus->sc->chan, 1122 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 1123 &pkt->message, sizeof(*res_assigned), (uint64_t)pkt); 1124 if (ret) { 1125 free_completion(&comp_pkt.host_event); 1126 break; 1127 } 1128 1129 wait_for_completion(&comp_pkt.host_event); 1130 free_completion(&comp_pkt.host_event); 1131 1132 if (comp_pkt.completion_status < 0) { 1133 ret = EPROTO; 1134 device_printf(hbus->pcib, 1135 "failed to send PCI_RESOURCES_ASSIGNED\n"); 1136 break; 1137 } 1138 } 1139 1140 free(pkt, M_DEVBUF); 1141 return (ret); 1142 } 1143 1144 static int 1145 hv_send_resources_released(struct hv_pcibus *hbus) 1146 { 1147 struct pci_child_message pkt; 1148 struct hv_pci_dev *hpdev; 1149 uint32_t wslot; 1150 int ret; 1151 1152 for (wslot = 0; wslot < 256; wslot++) { 1153 hpdev = get_pcichild_wslot(hbus, wslot); 1154 if (!hpdev) 1155 continue; 1156 1157 pkt.message_type.type = PCI_RESOURCES_RELEASED; 1158 pkt.wslot.val = hpdev->desc.wslot.val; 1159 1160 ret = vmbus_chan_send(hbus->sc->chan, 1161 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0); 1162 if (ret) 1163 return (ret); 1164 } 1165 1166 return (0); 1167 } 1168 1169 #define hv_cfg_read(x, s) \ 1170 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \ 1171 bus_size_t offset) \ 1172 { \ 1173 return (bus_read_##s(bus->cfg_res, offset)); \ 1174 } 1175 1176 #define hv_cfg_write(x, s) \ 1177 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \ 1178 bus_size_t offset, uint##x##_t val) \ 1179 { \ 1180 return (bus_write_##s(bus->cfg_res, offset, val)); \ 1181 } 1182 1183 hv_cfg_read(8, 1) 1184 hv_cfg_read(16, 2) 1185 hv_cfg_read(32, 4) 1186 1187 hv_cfg_write(8, 1) 1188 hv_cfg_write(16, 2) 1189 hv_cfg_write(32, 4) 1190 1191 static void 1192 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, 1193 uint32_t *val) 1194 { 1195 struct hv_pcibus *hbus = hpdev->hbus; 1196 bus_size_t addr = CFG_PAGE_OFFSET + where; 1197 1198 /* 1199 * If the attempt is to read the IDs or the ROM BAR, simulate that. 1200 */ 1201 if (where + size <= PCIR_COMMAND) { 1202 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size); 1203 } else if (where >= PCIR_REVID && where + size <= 1204 PCIR_CACHELNSZ) { 1205 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where - 1206 PCIR_REVID, size); 1207 } else if (where >= PCIR_SUBVEND_0 && where + size <= 1208 PCIR_BIOS) { 1209 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where - 1210 PCIR_SUBVEND_0, size); 1211 } else if (where >= PCIR_BIOS && where + size <= 1212 PCIR_CAP_PTR) { 1213 /* ROM BARs are unimplemented */ 1214 *val = 0; 1215 } else if ((where >= PCIR_INTLINE && where + size <= 1216 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) { 1217 /* 1218 * Interrupt Line and Interrupt PIN are hard-wired to zero 1219 * because this front-end only supports message-signaled 1220 * interrupts. 1221 */ 1222 *val = 0; 1223 } else if (where + size <= CFG_PAGE_SIZE) { 1224 mtx_lock(&hbus->config_lock); 1225 1226 /* Choose the function to be read. */ 1227 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1228 1229 /* Make sure the function was chosen before we start reading.*/ 1230 mb(); 1231 1232 /* Read from that function's config space. */ 1233 switch (size) { 1234 case 1: 1235 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr); 1236 break; 1237 case 2: 1238 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr); 1239 break; 1240 default: 1241 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr); 1242 break; 1243 } 1244 /* 1245 * Make sure the write was done before we release the lock, 1246 * allowing consecutive reads/writes. 1247 */ 1248 mb(); 1249 1250 mtx_unlock(&hbus->config_lock); 1251 } else { 1252 /* Invalid config read: it's unlikely to reach here. */ 1253 memset(val, 0, size); 1254 } 1255 } 1256 1257 static void 1258 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, 1259 uint32_t val) 1260 { 1261 struct hv_pcibus *hbus = hpdev->hbus; 1262 bus_size_t addr = CFG_PAGE_OFFSET + where; 1263 1264 /* SSIDs and ROM BARs are read-only */ 1265 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR) 1266 return; 1267 1268 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) { 1269 mtx_lock(&hbus->config_lock); 1270 1271 /* Choose the function to be written. */ 1272 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1273 1274 /* Make sure the function was chosen before we start writing.*/ 1275 wmb(); 1276 1277 /* Write to that function's config space. */ 1278 switch (size) { 1279 case 1: 1280 hv_cfg_write_1(hbus, addr, (uint8_t)val); 1281 break; 1282 case 2: 1283 hv_cfg_write_2(hbus, addr, (uint16_t)val); 1284 break; 1285 default: 1286 hv_cfg_write_4(hbus, addr, (uint32_t)val); 1287 break; 1288 } 1289 1290 /* 1291 * Make sure the write was done before we release the lock, 1292 * allowing consecutive reads/writes. 1293 */ 1294 mb(); 1295 1296 mtx_unlock(&hbus->config_lock); 1297 } else { 1298 /* Invalid config write: it's unlikely to reach here. */ 1299 return; 1300 } 1301 } 1302 1303 static void 1304 vmbus_pcib_set_detaching(void *arg, int pending __unused) 1305 { 1306 struct hv_pcibus *hbus = arg; 1307 1308 atomic_set_int(&hbus->detaching, 1); 1309 } 1310 1311 static void 1312 vmbus_pcib_pre_detach(struct hv_pcibus *hbus) 1313 { 1314 struct task task; 1315 1316 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus); 1317 1318 /* 1319 * Make sure the channel callback won't push any possible new 1320 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq. 1321 */ 1322 vmbus_chan_run_task(hbus->sc->chan, &task); 1323 1324 taskqueue_drain_all(hbus->sc->taskq); 1325 } 1326 1327 1328 /* 1329 * Standard probe entry point. 1330 * 1331 */ 1332 static int 1333 vmbus_pcib_probe(device_t dev) 1334 { 1335 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1336 &g_pass_through_dev_type) == 0) { 1337 device_set_desc(dev, "Hyper-V PCI Express Pass Through"); 1338 return (BUS_PROBE_DEFAULT); 1339 } 1340 return (ENXIO); 1341 } 1342 1343 /* 1344 * Standard attach entry point. 1345 * 1346 */ 1347 static int 1348 vmbus_pcib_attach(device_t dev) 1349 { 1350 const int pci_ring_size = (4 * PAGE_SIZE); 1351 const struct hyperv_guid *inst_guid; 1352 struct vmbus_channel *channel; 1353 struct vmbus_pcib_softc *sc; 1354 struct hv_pcibus *hbus; 1355 int rid = 0; 1356 int ret; 1357 1358 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO); 1359 hbus->pcib = dev; 1360 1361 channel = vmbus_get_channel(dev); 1362 inst_guid = vmbus_chan_guid_inst(channel); 1363 hbus->pci_domain = inst_guid->hv_guid[9] | 1364 (inst_guid->hv_guid[8] << 8); 1365 1366 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF); 1367 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF); 1368 TAILQ_INIT(&hbus->children); 1369 TAILQ_INIT(&hbus->dr_list); 1370 1371 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 1372 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH, 1373 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE)); 1374 1375 if (!hbus->cfg_res) { 1376 device_printf(dev, "failed to get resource for cfg window\n"); 1377 ret = ENXIO; 1378 goto free_bus; 1379 } 1380 1381 sc = device_get_softc(dev); 1382 sc->chan = channel; 1383 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); 1384 sc->hbus = hbus; 1385 1386 /* 1387 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT 1388 * messages. NB: we can't handle the messages in the channel callback 1389 * directly, because the message handlers need to send new messages 1390 * to the host and waits for the host's completion messages, which 1391 * must also be handled by the channel callback. 1392 */ 1393 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK, 1394 taskqueue_thread_enqueue, &sc->taskq); 1395 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq"); 1396 1397 hbus->sc = sc; 1398 1399 init_completion(&hbus->query_completion); 1400 hbus->query_comp = &hbus->query_completion; 1401 1402 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size, 1403 NULL, 0, vmbus_pcib_on_channel_callback, sc); 1404 if (ret) 1405 goto free_res; 1406 1407 ret = hv_pci_protocol_negotiation(hbus); 1408 if (ret) 1409 goto vmbus_close; 1410 1411 ret = hv_pci_query_relations(hbus); 1412 if (ret) 1413 goto vmbus_close; 1414 wait_for_completion(hbus->query_comp); 1415 1416 ret = hv_pci_enter_d0(hbus); 1417 if (ret) 1418 goto vmbus_close; 1419 1420 ret = hv_send_resources_allocated(hbus); 1421 if (ret) 1422 goto vmbus_close; 1423 1424 hbus->pci_bus = device_add_child(dev, "pci", -1); 1425 if (!hbus->pci_bus) { 1426 device_printf(dev, "failed to create pci bus\n"); 1427 ret = ENXIO; 1428 goto vmbus_close; 1429 } 1430 1431 bus_generic_attach(dev); 1432 1433 hbus->state = hv_pcibus_installed; 1434 1435 return (0); 1436 1437 vmbus_close: 1438 vmbus_pcib_pre_detach(hbus); 1439 vmbus_chan_close(sc->chan); 1440 free_res: 1441 taskqueue_free(sc->taskq); 1442 free_completion(&hbus->query_completion); 1443 free(sc->rx_buf, M_DEVBUF); 1444 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1445 free_bus: 1446 mtx_destroy(&hbus->device_list_lock); 1447 mtx_destroy(&hbus->config_lock); 1448 free(hbus, M_DEVBUF); 1449 return (ret); 1450 } 1451 1452 /* 1453 * Standard detach entry point 1454 */ 1455 static int 1456 vmbus_pcib_detach(device_t dev) 1457 { 1458 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1459 struct hv_pcibus *hbus = sc->hbus; 1460 struct pci_message teardown_packet; 1461 struct pci_bus_relations relations; 1462 int ret; 1463 1464 vmbus_pcib_pre_detach(hbus); 1465 1466 if (hbus->state == hv_pcibus_installed) 1467 bus_generic_detach(dev); 1468 1469 /* Delete any children which might still exist. */ 1470 memset(&relations, 0, sizeof(relations)); 1471 hv_pci_devices_present(hbus, &relations); 1472 1473 ret = hv_send_resources_released(hbus); 1474 if (ret) 1475 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n"); 1476 1477 teardown_packet.type = PCI_BUS_D0EXIT; 1478 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1479 &teardown_packet, sizeof(struct pci_message), 0); 1480 if (ret) 1481 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n"); 1482 1483 taskqueue_drain_all(hbus->sc->taskq); 1484 vmbus_chan_close(sc->chan); 1485 taskqueue_free(sc->taskq); 1486 1487 free_completion(&hbus->query_completion); 1488 free(sc->rx_buf, M_DEVBUF); 1489 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1490 1491 mtx_destroy(&hbus->device_list_lock); 1492 mtx_destroy(&hbus->config_lock); 1493 free(hbus, M_DEVBUF); 1494 1495 return (0); 1496 } 1497 1498 static int 1499 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val) 1500 { 1501 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1502 1503 switch (which) { 1504 case PCIB_IVAR_DOMAIN: 1505 *val = sc->hbus->pci_domain; 1506 return (0); 1507 1508 case PCIB_IVAR_BUS: 1509 /* There is only bus 0. */ 1510 *val = 0; 1511 return (0); 1512 } 1513 return (ENOENT); 1514 } 1515 1516 static int 1517 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val) 1518 { 1519 return (ENOENT); 1520 } 1521 1522 static struct resource * 1523 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, 1524 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 1525 { 1526 unsigned int bar_no; 1527 struct hv_pci_dev *hpdev; 1528 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1529 struct resource *res; 1530 unsigned int devfn; 1531 1532 if (type == PCI_RES_BUS) 1533 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid, 1534 start, end, count, flags)); 1535 1536 /* Devices with port I/O BAR are not supported. */ 1537 if (type == SYS_RES_IOPORT) 1538 return (NULL); 1539 1540 if (type == SYS_RES_MEMORY) { 1541 devfn = PCI_DEVFN(pci_get_slot(child), 1542 pci_get_function(child)); 1543 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1544 if (!hpdev) 1545 return (NULL); 1546 1547 bar_no = PCI_RID2BAR(*rid); 1548 if (bar_no >= MAX_NUM_BARS) 1549 return (NULL); 1550 1551 /* Make sure a 32-bit BAR gets a 32-bit address */ 1552 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64)) 1553 end = ulmin(end, 0xFFFFFFFF); 1554 } 1555 1556 res = bus_generic_alloc_resource(dev, child, type, rid, 1557 start, end, count, flags); 1558 /* 1559 * If this is a request for a specific range, assume it is 1560 * correct and pass it up to the parent. 1561 */ 1562 if (res == NULL && start + count - 1 == end) 1563 res = bus_generic_alloc_resource(dev, child, type, rid, 1564 start, end, count, flags); 1565 return (res); 1566 } 1567 1568 static int 1569 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid, 1570 struct resource *r) 1571 { 1572 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1573 1574 if (type == PCI_RES_BUS) 1575 return (pci_domain_release_bus(sc->hbus->pci_domain, child, 1576 rid, r)); 1577 1578 if (type == SYS_RES_IOPORT) 1579 return (EINVAL); 1580 1581 return (bus_generic_release_resource(dev, child, type, rid, r)); 1582 } 1583 1584 #if __FreeBSD_version >= 1100000 1585 static int 1586 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op, 1587 size_t setsize, cpuset_t *cpuset) 1588 { 1589 return (bus_get_cpus(pcib, op, setsize, cpuset)); 1590 } 1591 #endif 1592 1593 static uint32_t 1594 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, 1595 u_int reg, int bytes) 1596 { 1597 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1598 struct hv_pci_dev *hpdev; 1599 unsigned int devfn = PCI_DEVFN(slot, func); 1600 uint32_t data = 0; 1601 1602 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1603 1604 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1605 if (!hpdev) 1606 return (~0); 1607 1608 _hv_pcifront_read_config(hpdev, reg, bytes, &data); 1609 1610 return (data); 1611 } 1612 1613 static void 1614 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, 1615 u_int reg, uint32_t data, int bytes) 1616 { 1617 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1618 struct hv_pci_dev *hpdev; 1619 unsigned int devfn = PCI_DEVFN(slot, func); 1620 1621 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1622 1623 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1624 if (!hpdev) 1625 return; 1626 1627 _hv_pcifront_write_config(hpdev, reg, bytes, data); 1628 } 1629 1630 static int 1631 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin) 1632 { 1633 /* We only support MSI/MSI-X and don't support INTx interrupt. */ 1634 return (PCI_INVALID_IRQ); 1635 } 1636 1637 static int 1638 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count, 1639 int maxcount, int *irqs) 1640 { 1641 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount, 1642 irqs)); 1643 } 1644 1645 static int 1646 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) 1647 { 1648 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs)); 1649 } 1650 1651 static int 1652 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) 1653 { 1654 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq)); 1655 } 1656 1657 static int 1658 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq) 1659 { 1660 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq)); 1661 } 1662 1663 #define MSI_INTEL_ADDR_DEST 0x000ff000 1664 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ 1665 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED 1666 1667 static int 1668 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq, 1669 uint64_t *addr, uint32_t *data) 1670 { 1671 unsigned int devfn; 1672 struct hv_pci_dev *hpdev; 1673 1674 uint64_t v_addr; 1675 uint32_t v_data; 1676 struct hv_irq_desc *hid, *tmp_hid; 1677 unsigned int cpu, vcpu_id; 1678 unsigned int vector; 1679 1680 struct vmbus_pcib_softc *sc = device_get_softc(pcib); 1681 struct pci_create_interrupt *int_pkt; 1682 struct compose_comp_ctxt comp; 1683 struct { 1684 struct pci_packet pkt; 1685 uint8_t buffer[sizeof(struct pci_create_interrupt)]; 1686 } ctxt; 1687 1688 int ret; 1689 1690 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); 1691 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1692 if (!hpdev) 1693 return (ENOENT); 1694 1695 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq, 1696 &v_addr, &v_data); 1697 if (ret) 1698 return (ret); 1699 1700 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) { 1701 if (hid->irq == irq) { 1702 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link); 1703 hv_int_desc_free(hpdev, hid); 1704 break; 1705 } 1706 } 1707 1708 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12; 1709 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); 1710 vector = v_data & MSI_INTEL_DATA_INTVEC; 1711 1712 init_completion(&comp.comp_pkt.host_event); 1713 1714 memset(&ctxt, 0, sizeof(ctxt)); 1715 ctxt.pkt.completion_func = hv_pci_compose_compl; 1716 ctxt.pkt.compl_ctxt = ∁ 1717 1718 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message; 1719 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; 1720 int_pkt->wslot.val = hpdev->desc.wslot.val; 1721 int_pkt->int_desc.vector = vector; 1722 int_pkt->int_desc.vector_count = 1; 1723 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; 1724 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id; 1725 1726 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1727 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt), 1728 (uint64_t)&ctxt.pkt); 1729 if (ret) { 1730 free_completion(&comp.comp_pkt.host_event); 1731 return (ret); 1732 } 1733 1734 wait_for_completion(&comp.comp_pkt.host_event); 1735 free_completion(&comp.comp_pkt.host_event); 1736 1737 if (comp.comp_pkt.completion_status < 0) 1738 return (EPROTO); 1739 1740 *addr = comp.int_desc.address; 1741 *data = comp.int_desc.data; 1742 1743 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO); 1744 hid->irq = irq; 1745 hid->desc = comp.int_desc; 1746 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link); 1747 1748 return (0); 1749 } 1750 1751 static device_method_t vmbus_pcib_methods[] = { 1752 /* Device interface */ 1753 DEVMETHOD(device_probe, vmbus_pcib_probe), 1754 DEVMETHOD(device_attach, vmbus_pcib_attach), 1755 DEVMETHOD(device_detach, vmbus_pcib_detach), 1756 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1757 DEVMETHOD(device_suspend, bus_generic_suspend), 1758 DEVMETHOD(device_resume, bus_generic_resume), 1759 1760 /* Bus interface */ 1761 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar), 1762 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar), 1763 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource), 1764 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource), 1765 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 1766 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 1767 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), 1768 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), 1769 #if __FreeBSD_version >= 1100000 1770 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus), 1771 #endif 1772 1773 /* pcib interface */ 1774 DEVMETHOD(pcib_maxslots, pcib_maxslots), 1775 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config), 1776 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config), 1777 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr), 1778 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi), 1779 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi), 1780 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix), 1781 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix), 1782 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi), 1783 DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), 1784 1785 DEVMETHOD_END 1786 }; 1787 1788 static devclass_t pcib_devclass; 1789 1790 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods, 1791 sizeof(struct vmbus_pcib_softc)); 1792 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, pcib_devclass, 0, 0); 1793 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1); 1794 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1); 1795 1796 #endif /* NEW_PCIB */ 1797