1 /*- 2 * Copyright (c) 2016-2017 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #ifdef NEW_PCIB 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/types.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/kernel.h> 38 #include <sys/queue.h> 39 #include <sys/lock.h> 40 #include <sys/sx.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/bus.h> 44 #include <sys/rman.h> 45 #include <sys/mutex.h> 46 #include <sys/errno.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_param.h> 50 #include <vm/vm_kern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/atomic.h> 54 #include <machine/bus.h> 55 #include <machine/frame.h> 56 #include <machine/pci_cfgreg.h> 57 #include <machine/resource.h> 58 59 #include <sys/pciio.h> 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 #include <dev/pci/pci_private.h> 63 #include <dev/pci/pcib_private.h> 64 #include "pcib_if.h" 65 66 #include <machine/intr_machdep.h> 67 #include <x86/apicreg.h> 68 69 #include <dev/hyperv/include/hyperv.h> 70 #include <dev/hyperv/include/hyperv_busdma.h> 71 #include <dev/hyperv/include/vmbus_xact.h> 72 #include <dev/hyperv/vmbus/vmbus_reg.h> 73 #include <dev/hyperv/vmbus/vmbus_chanvar.h> 74 75 #include "vmbus_if.h" 76 77 #if __FreeBSD_version < 1100000 78 typedef u_long rman_res_t; 79 #define RM_MAX_END (~(rman_res_t)0) 80 #endif 81 82 struct completion { 83 unsigned int done; 84 struct mtx lock; 85 }; 86 87 static void 88 init_completion(struct completion *c) 89 { 90 memset(c, 0, sizeof(*c)); 91 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF); 92 c->done = 0; 93 } 94 95 static void 96 free_completion(struct completion *c) 97 { 98 mtx_destroy(&c->lock); 99 } 100 101 static void 102 complete(struct completion *c) 103 { 104 mtx_lock(&c->lock); 105 c->done++; 106 mtx_unlock(&c->lock); 107 wakeup(c); 108 } 109 110 static void 111 wait_for_completion(struct completion *c) 112 { 113 mtx_lock(&c->lock); 114 while (c->done == 0) 115 mtx_sleep(c, &c->lock, 0, "hvwfc", 0); 116 c->done--; 117 mtx_unlock(&c->lock); 118 } 119 120 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major))) 121 122 enum { 123 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), 124 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 125 }; 126 127 #define PCI_CONFIG_MMIO_LENGTH 0x2000 128 #define CFG_PAGE_OFFSET 0x1000 129 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) 130 131 /* 132 * Message Types 133 */ 134 135 enum pci_message_type { 136 /* 137 * Version 1.1 138 */ 139 PCI_MESSAGE_BASE = 0x42490000, 140 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0, 141 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1, 142 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4, 143 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5, 144 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6, 145 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7, 146 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8, 147 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9, 148 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA, 149 PCI_EJECT = PCI_MESSAGE_BASE + 0xB, 150 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC, 151 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD, 152 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE, 153 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF, 154 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10, 155 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11, 156 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12, 157 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13, 158 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14, 159 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15, 160 PCI_MESSAGE_MAXIMUM 161 }; 162 163 /* 164 * Structures defining the virtual PCI Express protocol. 165 */ 166 167 union pci_version { 168 struct { 169 uint16_t minor_version; 170 uint16_t major_version; 171 } parts; 172 uint32_t version; 173 } __packed; 174 175 /* 176 * This representation is the one used in Windows, which is 177 * what is expected when sending this back and forth with 178 * the Hyper-V parent partition. 179 */ 180 union win_slot_encoding { 181 struct { 182 uint32_t slot:5; 183 uint32_t func:3; 184 uint32_t reserved:24; 185 } bits; 186 uint32_t val; 187 } __packed; 188 189 struct pci_func_desc { 190 uint16_t v_id; /* vendor ID */ 191 uint16_t d_id; /* device ID */ 192 uint8_t rev; 193 uint8_t prog_intf; 194 uint8_t subclass; 195 uint8_t base_class; 196 uint32_t subsystem_id; 197 union win_slot_encoding wslot; 198 uint32_t ser; /* serial number */ 199 } __packed; 200 201 struct hv_msi_desc { 202 uint8_t vector; 203 uint8_t delivery_mode; 204 uint16_t vector_count; 205 uint32_t reserved; 206 uint64_t cpu_mask; 207 } __packed; 208 209 struct tran_int_desc { 210 uint16_t reserved; 211 uint16_t vector_count; 212 uint32_t data; 213 uint64_t address; 214 } __packed; 215 216 struct pci_message { 217 uint32_t type; 218 } __packed; 219 220 struct pci_child_message { 221 struct pci_message message_type; 222 union win_slot_encoding wslot; 223 } __packed; 224 225 struct pci_incoming_message { 226 struct vmbus_chanpkt_hdr hdr; 227 struct pci_message message_type; 228 } __packed; 229 230 struct pci_response { 231 struct vmbus_chanpkt_hdr hdr; 232 int32_t status; /* negative values are failures */ 233 } __packed; 234 235 struct pci_packet { 236 void (*completion_func)(void *context, struct pci_response *resp, 237 int resp_packet_size); 238 void *compl_ctxt; 239 240 struct pci_message message[0]; 241 }; 242 243 /* 244 * Specific message types supporting the PCI protocol. 245 */ 246 247 struct pci_version_request { 248 struct pci_message message_type; 249 uint32_t protocol_version; 250 uint32_t is_last_attempt:1; 251 uint32_t reservedz:31; 252 } __packed; 253 254 struct pci_bus_d0_entry { 255 struct pci_message message_type; 256 uint32_t reserved; 257 uint64_t mmio_base; 258 } __packed; 259 260 struct pci_bus_relations { 261 struct pci_incoming_message incoming; 262 uint32_t device_count; 263 struct pci_func_desc func[0]; 264 } __packed; 265 266 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1) 267 struct pci_q_res_req_response { 268 struct vmbus_chanpkt_hdr hdr; 269 int32_t status; /* negative values are failures */ 270 uint32_t probed_bar[MAX_NUM_BARS]; 271 } __packed; 272 273 struct pci_resources_assigned { 274 struct pci_message message_type; 275 union win_slot_encoding wslot; 276 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */ 277 uint32_t msi_descriptors; 278 uint32_t reserved[4]; 279 } __packed; 280 281 struct pci_create_interrupt { 282 struct pci_message message_type; 283 union win_slot_encoding wslot; 284 struct hv_msi_desc int_desc; 285 } __packed; 286 287 struct pci_create_int_response { 288 struct pci_response response; 289 uint32_t reserved; 290 struct tran_int_desc int_desc; 291 } __packed; 292 293 struct pci_delete_interrupt { 294 struct pci_message message_type; 295 union win_slot_encoding wslot; 296 struct tran_int_desc int_desc; 297 } __packed; 298 299 struct pci_dev_incoming { 300 struct pci_incoming_message incoming; 301 union win_slot_encoding wslot; 302 } __packed; 303 304 struct pci_eject_response { 305 struct pci_message message_type; 306 union win_slot_encoding wslot; 307 uint32_t status; 308 } __packed; 309 310 /* 311 * Driver specific state. 312 */ 313 314 enum hv_pcibus_state { 315 hv_pcibus_init = 0, 316 hv_pcibus_installed, 317 }; 318 319 struct hv_pcibus { 320 device_t pcib; 321 device_t pci_bus; 322 struct vmbus_pcib_softc *sc; 323 324 uint16_t pci_domain; 325 326 enum hv_pcibus_state state; 327 328 struct resource *cfg_res; 329 330 struct completion query_completion, *query_comp; 331 332 struct mtx config_lock; /* Avoid two threads writing index page */ 333 struct mtx device_list_lock; /* Protect lists below */ 334 TAILQ_HEAD(, hv_pci_dev) children; 335 TAILQ_HEAD(, hv_dr_state) dr_list; 336 337 volatile int detaching; 338 }; 339 340 struct hv_pci_dev { 341 TAILQ_ENTRY(hv_pci_dev) link; 342 343 struct pci_func_desc desc; 344 345 bool reported_missing; 346 347 struct hv_pcibus *hbus; 348 struct task eject_task; 349 350 TAILQ_HEAD(, hv_irq_desc) irq_desc_list; 351 352 /* 353 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then 354 * read it back, for each of the BAR offsets within config space. 355 */ 356 uint32_t probed_bar[MAX_NUM_BARS]; 357 }; 358 359 /* 360 * Tracks "Device Relations" messages from the host, which must be both 361 * processed in order. 362 */ 363 struct hv_dr_work { 364 struct task task; 365 struct hv_pcibus *bus; 366 }; 367 368 struct hv_dr_state { 369 TAILQ_ENTRY(hv_dr_state) link; 370 uint32_t device_count; 371 struct pci_func_desc func[0]; 372 }; 373 374 struct hv_irq_desc { 375 TAILQ_ENTRY(hv_irq_desc) link; 376 struct tran_int_desc desc; 377 int irq; 378 }; 379 380 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) 381 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) 382 #define PCI_FUNC(devfn) ((devfn) & 0x07) 383 384 static uint32_t 385 devfn_to_wslot(unsigned int devfn) 386 { 387 union win_slot_encoding wslot; 388 389 wslot.val = 0; 390 wslot.bits.slot = PCI_SLOT(devfn); 391 wslot.bits.func = PCI_FUNC(devfn); 392 393 return (wslot.val); 394 } 395 396 static unsigned int 397 wslot_to_devfn(uint32_t wslot) 398 { 399 union win_slot_encoding encoding; 400 unsigned int slot; 401 unsigned int func; 402 403 encoding.val = wslot; 404 405 slot = encoding.bits.slot; 406 func = encoding.bits.func; 407 408 return (PCI_DEVFN(slot, func)); 409 } 410 411 struct vmbus_pcib_softc { 412 struct vmbus_channel *chan; 413 void *rx_buf; 414 415 struct taskqueue *taskq; 416 417 struct hv_pcibus *hbus; 418 }; 419 420 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */ 421 static const struct hyperv_guid g_pass_through_dev_type = { 422 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, 423 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F} 424 }; 425 426 struct hv_pci_compl { 427 struct completion host_event; 428 int32_t completion_status; 429 }; 430 431 struct q_res_req_compl { 432 struct completion host_event; 433 struct hv_pci_dev *hpdev; 434 }; 435 436 struct compose_comp_ctxt { 437 struct hv_pci_compl comp_pkt; 438 struct tran_int_desc int_desc; 439 }; 440 441 static void 442 hv_pci_generic_compl(void *context, struct pci_response *resp, 443 int resp_packet_size) 444 { 445 struct hv_pci_compl *comp_pkt = context; 446 447 if (resp_packet_size >= sizeof(struct pci_response)) 448 comp_pkt->completion_status = resp->status; 449 else 450 comp_pkt->completion_status = -1; 451 452 complete(&comp_pkt->host_event); 453 } 454 455 static void 456 q_resource_requirements(void *context, struct pci_response *resp, 457 int resp_packet_size) 458 { 459 struct q_res_req_compl *completion = context; 460 struct pci_q_res_req_response *q_res_req = 461 (struct pci_q_res_req_response *)resp; 462 int i; 463 464 if (resp->status < 0) { 465 printf("vmbus_pcib: failed to query resource requirements\n"); 466 } else { 467 for (i = 0; i < MAX_NUM_BARS; i++) 468 completion->hpdev->probed_bar[i] = 469 q_res_req->probed_bar[i]; 470 } 471 472 complete(&completion->host_event); 473 } 474 475 static void 476 hv_pci_compose_compl(void *context, struct pci_response *resp, 477 int resp_packet_size) 478 { 479 struct compose_comp_ctxt *comp_pkt = context; 480 struct pci_create_int_response *int_resp = 481 (struct pci_create_int_response *)resp; 482 483 comp_pkt->comp_pkt.completion_status = resp->status; 484 comp_pkt->int_desc = int_resp->int_desc; 485 complete(&comp_pkt->comp_pkt.host_event); 486 } 487 488 static void 489 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid) 490 { 491 struct pci_delete_interrupt *int_pkt; 492 struct { 493 struct pci_packet pkt; 494 uint8_t buffer[sizeof(struct pci_delete_interrupt)]; 495 } ctxt; 496 497 memset(&ctxt, 0, sizeof(ctxt)); 498 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; 499 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; 500 int_pkt->wslot.val = hpdev->desc.wslot.val; 501 int_pkt->int_desc = hid->desc; 502 503 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 504 int_pkt, sizeof(*int_pkt), 0); 505 506 free(hid, M_DEVBUF); 507 } 508 509 static void 510 hv_pci_delete_device(struct hv_pci_dev *hpdev) 511 { 512 struct hv_pcibus *hbus = hpdev->hbus; 513 struct hv_irq_desc *hid, *tmp_hid; 514 device_t pci_dev; 515 int devfn; 516 517 devfn = wslot_to_devfn(hpdev->desc.wslot.val); 518 519 mtx_lock(&Giant); 520 521 pci_dev = pci_find_dbsf(hbus->pci_domain, 522 0, PCI_SLOT(devfn), PCI_FUNC(devfn)); 523 if (pci_dev) 524 device_delete_child(hbus->pci_bus, pci_dev); 525 526 mtx_unlock(&Giant); 527 528 mtx_lock(&hbus->device_list_lock); 529 TAILQ_REMOVE(&hbus->children, hpdev, link); 530 mtx_unlock(&hbus->device_list_lock); 531 532 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) 533 hv_int_desc_free(hpdev, hid); 534 535 free(hpdev, M_DEVBUF); 536 } 537 538 static struct hv_pci_dev * 539 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc) 540 { 541 struct hv_pci_dev *hpdev; 542 struct pci_child_message *res_req; 543 struct q_res_req_compl comp_pkt; 544 struct { 545 struct pci_packet pkt; 546 uint8_t buffer[sizeof(struct pci_child_message)]; 547 } ctxt; 548 int ret; 549 550 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO); 551 hpdev->hbus = hbus; 552 553 TAILQ_INIT(&hpdev->irq_desc_list); 554 555 init_completion(&comp_pkt.host_event); 556 comp_pkt.hpdev = hpdev; 557 558 ctxt.pkt.compl_ctxt = &comp_pkt; 559 ctxt.pkt.completion_func = q_resource_requirements; 560 561 res_req = (struct pci_child_message *)&ctxt.pkt.message; 562 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; 563 res_req->wslot.val = desc->wslot.val; 564 565 ret = vmbus_chan_send(hbus->sc->chan, 566 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 567 res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt); 568 if (ret) 569 goto err; 570 571 wait_for_completion(&comp_pkt.host_event); 572 free_completion(&comp_pkt.host_event); 573 574 hpdev->desc = *desc; 575 576 mtx_lock(&hbus->device_list_lock); 577 if (TAILQ_EMPTY(&hbus->children)) 578 hbus->pci_domain = desc->ser & 0xFFFF; 579 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link); 580 mtx_unlock(&hbus->device_list_lock); 581 return (hpdev); 582 err: 583 free_completion(&comp_pkt.host_event); 584 free(hpdev, M_DEVBUF); 585 return (NULL); 586 } 587 588 #if __FreeBSD_version < 1100000 589 590 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */ 591 592 static struct pci_devinfo * 593 pci_identify_function(device_t pcib, device_t dev, int domain, int busno, 594 int slot, int func, size_t dinfo_size) 595 { 596 struct pci_devinfo *dinfo; 597 598 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size); 599 if (dinfo != NULL) 600 pci_add_child(dev, dinfo); 601 602 return (dinfo); 603 } 604 605 static int 606 pci_rescan(device_t dev) 607 { 608 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w) 609 device_t pcib = device_get_parent(dev); 610 struct pci_softc *sc; 611 device_t child, *devlist, *unchanged; 612 int devcount, error, i, j, maxslots, oldcount; 613 int busno, domain, s, f, pcifunchigh; 614 uint8_t hdrtype; 615 616 /* No need to check for ARI on a rescan. */ 617 error = device_get_children(dev, &devlist, &devcount); 618 if (error) 619 return (error); 620 if (devcount != 0) { 621 unchanged = malloc(devcount * sizeof(device_t), M_TEMP, 622 M_NOWAIT | M_ZERO); 623 if (unchanged == NULL) { 624 free(devlist, M_TEMP); 625 return (ENOMEM); 626 } 627 } else 628 unchanged = NULL; 629 630 sc = device_get_softc(dev); 631 domain = pcib_get_domain(dev); 632 busno = pcib_get_bus(dev); 633 maxslots = PCIB_MAXSLOTS(pcib); 634 for (s = 0; s <= maxslots; s++) { 635 /* If function 0 is not present, skip to the next slot. */ 636 f = 0; 637 if (REG(PCIR_VENDOR, 2) == 0xffff) 638 continue; 639 pcifunchigh = 0; 640 hdrtype = REG(PCIR_HDRTYPE, 1); 641 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) 642 continue; 643 if (hdrtype & PCIM_MFDEV) 644 pcifunchigh = PCIB_MAXFUNCS(pcib); 645 for (f = 0; f <= pcifunchigh; f++) { 646 if (REG(PCIR_VENDOR, 2) == 0xffff) 647 continue; 648 649 /* 650 * Found a valid function. Check if a 651 * device_t for this device already exists. 652 */ 653 for (i = 0; i < devcount; i++) { 654 child = devlist[i]; 655 if (child == NULL) 656 continue; 657 if (pci_get_slot(child) == s && 658 pci_get_function(child) == f) { 659 unchanged[i] = child; 660 goto next_func; 661 } 662 } 663 664 pci_identify_function(pcib, dev, domain, busno, s, f, 665 sizeof(struct pci_devinfo)); 666 next_func:; 667 } 668 } 669 670 /* Remove devices that are no longer present. */ 671 for (i = 0; i < devcount; i++) { 672 if (unchanged[i] != NULL) 673 continue; 674 device_delete_child(dev, devlist[i]); 675 } 676 677 free(devlist, M_TEMP); 678 oldcount = devcount; 679 680 /* Try to attach the devices just added. */ 681 error = device_get_children(dev, &devlist, &devcount); 682 if (error) { 683 free(unchanged, M_TEMP); 684 return (error); 685 } 686 687 for (i = 0; i < devcount; i++) { 688 for (j = 0; j < oldcount; j++) { 689 if (devlist[i] == unchanged[j]) 690 goto next_device; 691 } 692 693 device_probe_and_attach(devlist[i]); 694 next_device:; 695 } 696 697 free(unchanged, M_TEMP); 698 free(devlist, M_TEMP); 699 return (0); 700 #undef REG 701 } 702 703 #else 704 705 static int 706 pci_rescan(device_t dev) 707 { 708 return (BUS_RESCAN(dev)); 709 } 710 711 #endif 712 713 static void 714 pci_devices_present_work(void *arg, int pending __unused) 715 { 716 struct hv_dr_work *dr_wrk = arg; 717 struct hv_dr_state *dr = NULL; 718 struct hv_pcibus *hbus; 719 uint32_t child_no; 720 bool found; 721 struct pci_func_desc *new_desc; 722 struct hv_pci_dev *hpdev, *tmp_hpdev; 723 struct completion *query_comp; 724 bool need_rescan = false; 725 726 hbus = dr_wrk->bus; 727 free(dr_wrk, M_DEVBUF); 728 729 /* Pull this off the queue and process it if it was the last one. */ 730 mtx_lock(&hbus->device_list_lock); 731 while (!TAILQ_EMPTY(&hbus->dr_list)) { 732 dr = TAILQ_FIRST(&hbus->dr_list); 733 TAILQ_REMOVE(&hbus->dr_list, dr, link); 734 735 /* Throw this away if the list still has stuff in it. */ 736 if (!TAILQ_EMPTY(&hbus->dr_list)) { 737 free(dr, M_DEVBUF); 738 continue; 739 } 740 } 741 mtx_unlock(&hbus->device_list_lock); 742 743 if (!dr) 744 return; 745 746 /* First, mark all existing children as reported missing. */ 747 mtx_lock(&hbus->device_list_lock); 748 TAILQ_FOREACH(hpdev, &hbus->children, link) 749 hpdev->reported_missing = true; 750 mtx_unlock(&hbus->device_list_lock); 751 752 /* Next, add back any reported devices. */ 753 for (child_no = 0; child_no < dr->device_count; child_no++) { 754 found = false; 755 new_desc = &dr->func[child_no]; 756 757 mtx_lock(&hbus->device_list_lock); 758 TAILQ_FOREACH(hpdev, &hbus->children, link) { 759 if ((hpdev->desc.wslot.val == 760 new_desc->wslot.val) && 761 (hpdev->desc.v_id == new_desc->v_id) && 762 (hpdev->desc.d_id == new_desc->d_id) && 763 (hpdev->desc.ser == new_desc->ser)) { 764 hpdev->reported_missing = false; 765 found = true; 766 break; 767 } 768 } 769 mtx_unlock(&hbus->device_list_lock); 770 771 if (!found) { 772 if (!need_rescan) 773 need_rescan = true; 774 775 hpdev = new_pcichild_device(hbus, new_desc); 776 if (!hpdev) 777 printf("vmbus_pcib: failed to add a child\n"); 778 } 779 } 780 781 /* Remove missing device(s), if any */ 782 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) { 783 if (hpdev->reported_missing) 784 hv_pci_delete_device(hpdev); 785 } 786 787 /* Rescan the bus to find any new device, if necessary. */ 788 if (hbus->state == hv_pcibus_installed && need_rescan) 789 pci_rescan(hbus->pci_bus); 790 791 /* Wake up hv_pci_query_relations(), if it's waiting. */ 792 query_comp = hbus->query_comp; 793 if (query_comp) { 794 hbus->query_comp = NULL; 795 complete(query_comp); 796 } 797 798 free(dr, M_DEVBUF); 799 } 800 801 static struct hv_pci_dev * 802 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot) 803 { 804 struct hv_pci_dev *hpdev, *ret = NULL; 805 806 mtx_lock(&hbus->device_list_lock); 807 TAILQ_FOREACH(hpdev, &hbus->children, link) { 808 if (hpdev->desc.wslot.val == wslot) { 809 ret = hpdev; 810 break; 811 } 812 } 813 mtx_unlock(&hbus->device_list_lock); 814 815 return (ret); 816 } 817 818 static void 819 hv_pci_devices_present(struct hv_pcibus *hbus, 820 struct pci_bus_relations *relations) 821 { 822 struct hv_dr_state *dr; 823 struct hv_dr_work *dr_wrk; 824 unsigned long dr_size; 825 826 if (hbus->detaching && relations->device_count > 0) 827 return; 828 829 dr_size = offsetof(struct hv_dr_state, func) + 830 (sizeof(struct pci_func_desc) * relations->device_count); 831 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); 832 833 dr->device_count = relations->device_count; 834 if (dr->device_count != 0) 835 memcpy(dr->func, relations->func, 836 sizeof(struct pci_func_desc) * dr->device_count); 837 838 mtx_lock(&hbus->device_list_lock); 839 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); 840 mtx_unlock(&hbus->device_list_lock); 841 842 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); 843 dr_wrk->bus = hbus; 844 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); 845 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); 846 } 847 848 static void 849 hv_eject_device_work(void *arg, int pending __unused) 850 { 851 struct hv_pci_dev *hpdev = arg; 852 union win_slot_encoding wslot = hpdev->desc.wslot; 853 struct hv_pcibus *hbus = hpdev->hbus; 854 struct pci_eject_response *eject_pkt; 855 struct { 856 struct pci_packet pkt; 857 uint8_t buffer[sizeof(struct pci_eject_response)]; 858 } ctxt; 859 860 hv_pci_delete_device(hpdev); 861 862 memset(&ctxt, 0, sizeof(ctxt)); 863 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message; 864 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE; 865 eject_pkt->wslot.val = wslot.val; 866 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 867 eject_pkt, sizeof(*eject_pkt), 0); 868 } 869 870 static void 871 hv_pci_eject_device(struct hv_pci_dev *hpdev) 872 { 873 struct hv_pcibus *hbus = hpdev->hbus; 874 struct taskqueue *taskq; 875 876 if (hbus->detaching) 877 return; 878 879 /* 880 * Push this task into the same taskqueue on which 881 * vmbus_pcib_attach() runs, so we're sure this task can't run 882 * concurrently with vmbus_pcib_attach(). 883 */ 884 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev); 885 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan); 886 taskqueue_enqueue(taskq, &hpdev->eject_task); 887 } 888 889 #define PCIB_PACKET_SIZE 0x100 890 891 static void 892 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg) 893 { 894 struct vmbus_pcib_softc *sc = arg; 895 struct hv_pcibus *hbus = sc->hbus; 896 897 void *buffer; 898 int bufferlen = PCIB_PACKET_SIZE; 899 900 struct pci_packet *comp_packet; 901 struct pci_response *response; 902 struct pci_incoming_message *new_msg; 903 struct pci_bus_relations *bus_rel; 904 struct pci_dev_incoming *dev_msg; 905 struct hv_pci_dev *hpdev; 906 907 buffer = sc->rx_buf; 908 do { 909 struct vmbus_chanpkt_hdr *pkt = buffer; 910 uint32_t bytes_rxed; 911 int ret; 912 913 bytes_rxed = bufferlen; 914 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); 915 916 if (ret == ENOBUFS) { 917 /* Handle large packet */ 918 if (bufferlen > PCIB_PACKET_SIZE) { 919 free(buffer, M_DEVBUF); 920 buffer = NULL; 921 } 922 923 /* alloc new buffer */ 924 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO); 925 bufferlen = bytes_rxed; 926 927 continue; 928 } 929 930 if (ret != 0) { 931 /* ignore EIO or EAGAIN */ 932 break; 933 } 934 935 if (bytes_rxed <= sizeof(struct pci_response)) 936 continue; 937 938 switch (pkt->cph_type) { 939 case VMBUS_CHANPKT_TYPE_COMP: 940 comp_packet = 941 (struct pci_packet *)(uintptr_t)pkt->cph_xactid; 942 response = (struct pci_response *)pkt; 943 comp_packet->completion_func(comp_packet->compl_ctxt, 944 response, bytes_rxed); 945 break; 946 case VMBUS_CHANPKT_TYPE_INBAND: 947 new_msg = (struct pci_incoming_message *)buffer; 948 949 switch (new_msg->message_type.type) { 950 case PCI_BUS_RELATIONS: 951 bus_rel = (struct pci_bus_relations *)buffer; 952 953 if (bus_rel->device_count == 0) 954 break; 955 956 if (bytes_rxed < 957 offsetof(struct pci_bus_relations, func) + 958 (sizeof(struct pci_func_desc) * 959 (bus_rel->device_count))) 960 break; 961 962 hv_pci_devices_present(hbus, bus_rel); 963 break; 964 965 case PCI_EJECT: 966 dev_msg = (struct pci_dev_incoming *)buffer; 967 hpdev = get_pcichild_wslot(hbus, 968 dev_msg->wslot.val); 969 970 if (hpdev) 971 hv_pci_eject_device(hpdev); 972 973 break; 974 default: 975 printf("vmbus_pcib: Unknown msg type 0x%x\n", 976 new_msg->message_type.type); 977 break; 978 } 979 break; 980 default: 981 printf("vmbus_pcib: Unknown VMBus msg type %hd\n", 982 pkt->cph_type); 983 break; 984 } 985 } while (1); 986 987 if (bufferlen > PCIB_PACKET_SIZE) 988 free(buffer, M_DEVBUF); 989 } 990 991 static int 992 hv_pci_protocol_negotiation(struct hv_pcibus *hbus) 993 { 994 struct pci_version_request *version_req; 995 struct hv_pci_compl comp_pkt; 996 struct { 997 struct pci_packet pkt; 998 uint8_t buffer[sizeof(struct pci_version_request)]; 999 } ctxt; 1000 int ret; 1001 1002 init_completion(&comp_pkt.host_event); 1003 1004 ctxt.pkt.completion_func = hv_pci_generic_compl; 1005 ctxt.pkt.compl_ctxt = &comp_pkt; 1006 version_req = (struct pci_version_request *)&ctxt.pkt.message; 1007 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; 1008 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT; 1009 version_req->is_last_attempt = 1; 1010 1011 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1012 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req), 1013 (uint64_t)(uintptr_t)&ctxt.pkt); 1014 if (ret) 1015 goto out; 1016 1017 wait_for_completion(&comp_pkt.host_event); 1018 1019 if (comp_pkt.completion_status < 0) { 1020 device_printf(hbus->pcib, 1021 "vmbus_pcib version negotiation failed: %x\n", 1022 comp_pkt.completion_status); 1023 ret = EPROTO; 1024 } else { 1025 ret = 0; 1026 } 1027 out: 1028 free_completion(&comp_pkt.host_event); 1029 return (ret); 1030 } 1031 1032 /* Ask the host to send along the list of child devices */ 1033 static int 1034 hv_pci_query_relations(struct hv_pcibus *hbus) 1035 { 1036 struct pci_message message; 1037 int ret; 1038 1039 message.type = PCI_QUERY_BUS_RELATIONS; 1040 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1041 &message, sizeof(message), 0); 1042 return (ret); 1043 } 1044 1045 static int 1046 hv_pci_enter_d0(struct hv_pcibus *hbus) 1047 { 1048 struct pci_bus_d0_entry *d0_entry; 1049 struct hv_pci_compl comp_pkt; 1050 struct { 1051 struct pci_packet pkt; 1052 uint8_t buffer[sizeof(struct pci_bus_d0_entry)]; 1053 } ctxt; 1054 int ret; 1055 1056 /* 1057 * Tell the host that the bus is ready to use, and moved into the 1058 * powered-on state. This includes telling the host which region 1059 * of memory-mapped I/O space has been chosen for configuration space 1060 * access. 1061 */ 1062 init_completion(&comp_pkt.host_event); 1063 1064 ctxt.pkt.completion_func = hv_pci_generic_compl; 1065 ctxt.pkt.compl_ctxt = &comp_pkt; 1066 1067 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message; 1068 memset(d0_entry, 0, sizeof(*d0_entry)); 1069 d0_entry->message_type.type = PCI_BUS_D0ENTRY; 1070 d0_entry->mmio_base = rman_get_start(hbus->cfg_res); 1071 1072 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1073 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry), 1074 (uint64_t)(uintptr_t)&ctxt.pkt); 1075 if (ret) 1076 goto out; 1077 1078 wait_for_completion(&comp_pkt.host_event); 1079 1080 if (comp_pkt.completion_status < 0) { 1081 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n"); 1082 ret = EPROTO; 1083 } else { 1084 ret = 0; 1085 } 1086 1087 out: 1088 free_completion(&comp_pkt.host_event); 1089 return (ret); 1090 } 1091 1092 /* 1093 * It looks this is only needed by Windows VM, but let's send the message too 1094 * just to make the host happy. 1095 */ 1096 static int 1097 hv_send_resources_allocated(struct hv_pcibus *hbus) 1098 { 1099 struct pci_resources_assigned *res_assigned; 1100 struct hv_pci_compl comp_pkt; 1101 struct hv_pci_dev *hpdev; 1102 struct pci_packet *pkt; 1103 uint32_t wslot; 1104 int ret = 0; 1105 1106 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned), 1107 M_DEVBUF, M_WAITOK | M_ZERO); 1108 1109 for (wslot = 0; wslot < 256; wslot++) { 1110 hpdev = get_pcichild_wslot(hbus, wslot); 1111 if (!hpdev) 1112 continue; 1113 1114 init_completion(&comp_pkt.host_event); 1115 1116 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned)); 1117 pkt->completion_func = hv_pci_generic_compl; 1118 pkt->compl_ctxt = &comp_pkt; 1119 1120 res_assigned = (struct pci_resources_assigned *)&pkt->message; 1121 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; 1122 res_assigned->wslot.val = hpdev->desc.wslot.val; 1123 1124 ret = vmbus_chan_send(hbus->sc->chan, 1125 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 1126 &pkt->message, sizeof(*res_assigned), 1127 (uint64_t)(uintptr_t)pkt); 1128 if (ret) { 1129 free_completion(&comp_pkt.host_event); 1130 break; 1131 } 1132 1133 wait_for_completion(&comp_pkt.host_event); 1134 free_completion(&comp_pkt.host_event); 1135 1136 if (comp_pkt.completion_status < 0) { 1137 ret = EPROTO; 1138 device_printf(hbus->pcib, 1139 "failed to send PCI_RESOURCES_ASSIGNED\n"); 1140 break; 1141 } 1142 } 1143 1144 free(pkt, M_DEVBUF); 1145 return (ret); 1146 } 1147 1148 static int 1149 hv_send_resources_released(struct hv_pcibus *hbus) 1150 { 1151 struct pci_child_message pkt; 1152 struct hv_pci_dev *hpdev; 1153 uint32_t wslot; 1154 int ret; 1155 1156 for (wslot = 0; wslot < 256; wslot++) { 1157 hpdev = get_pcichild_wslot(hbus, wslot); 1158 if (!hpdev) 1159 continue; 1160 1161 pkt.message_type.type = PCI_RESOURCES_RELEASED; 1162 pkt.wslot.val = hpdev->desc.wslot.val; 1163 1164 ret = vmbus_chan_send(hbus->sc->chan, 1165 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0); 1166 if (ret) 1167 return (ret); 1168 } 1169 1170 return (0); 1171 } 1172 1173 #define hv_cfg_read(x, s) \ 1174 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \ 1175 bus_size_t offset) \ 1176 { \ 1177 return (bus_read_##s(bus->cfg_res, offset)); \ 1178 } 1179 1180 #define hv_cfg_write(x, s) \ 1181 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \ 1182 bus_size_t offset, uint##x##_t val) \ 1183 { \ 1184 return (bus_write_##s(bus->cfg_res, offset, val)); \ 1185 } 1186 1187 hv_cfg_read(8, 1) 1188 hv_cfg_read(16, 2) 1189 hv_cfg_read(32, 4) 1190 1191 hv_cfg_write(8, 1) 1192 hv_cfg_write(16, 2) 1193 hv_cfg_write(32, 4) 1194 1195 static void 1196 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, 1197 uint32_t *val) 1198 { 1199 struct hv_pcibus *hbus = hpdev->hbus; 1200 bus_size_t addr = CFG_PAGE_OFFSET + where; 1201 1202 /* 1203 * If the attempt is to read the IDs or the ROM BAR, simulate that. 1204 */ 1205 if (where + size <= PCIR_COMMAND) { 1206 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size); 1207 } else if (where >= PCIR_REVID && where + size <= 1208 PCIR_CACHELNSZ) { 1209 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where - 1210 PCIR_REVID, size); 1211 } else if (where >= PCIR_SUBVEND_0 && where + size <= 1212 PCIR_BIOS) { 1213 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where - 1214 PCIR_SUBVEND_0, size); 1215 } else if (where >= PCIR_BIOS && where + size <= 1216 PCIR_CAP_PTR) { 1217 /* ROM BARs are unimplemented */ 1218 *val = 0; 1219 } else if ((where >= PCIR_INTLINE && where + size <= 1220 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) { 1221 /* 1222 * Interrupt Line and Interrupt PIN are hard-wired to zero 1223 * because this front-end only supports message-signaled 1224 * interrupts. 1225 */ 1226 *val = 0; 1227 } else if (where + size <= CFG_PAGE_SIZE) { 1228 mtx_lock(&hbus->config_lock); 1229 1230 /* Choose the function to be read. */ 1231 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1232 1233 /* Make sure the function was chosen before we start reading.*/ 1234 mb(); 1235 1236 /* Read from that function's config space. */ 1237 switch (size) { 1238 case 1: 1239 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr); 1240 break; 1241 case 2: 1242 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr); 1243 break; 1244 default: 1245 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr); 1246 break; 1247 } 1248 /* 1249 * Make sure the write was done before we release the lock, 1250 * allowing consecutive reads/writes. 1251 */ 1252 mb(); 1253 1254 mtx_unlock(&hbus->config_lock); 1255 } else { 1256 /* Invalid config read: it's unlikely to reach here. */ 1257 memset(val, 0, size); 1258 } 1259 } 1260 1261 static void 1262 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, 1263 uint32_t val) 1264 { 1265 struct hv_pcibus *hbus = hpdev->hbus; 1266 bus_size_t addr = CFG_PAGE_OFFSET + where; 1267 1268 /* SSIDs and ROM BARs are read-only */ 1269 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR) 1270 return; 1271 1272 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) { 1273 mtx_lock(&hbus->config_lock); 1274 1275 /* Choose the function to be written. */ 1276 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1277 1278 /* Make sure the function was chosen before we start writing.*/ 1279 wmb(); 1280 1281 /* Write to that function's config space. */ 1282 switch (size) { 1283 case 1: 1284 hv_cfg_write_1(hbus, addr, (uint8_t)val); 1285 break; 1286 case 2: 1287 hv_cfg_write_2(hbus, addr, (uint16_t)val); 1288 break; 1289 default: 1290 hv_cfg_write_4(hbus, addr, (uint32_t)val); 1291 break; 1292 } 1293 1294 /* 1295 * Make sure the write was done before we release the lock, 1296 * allowing consecutive reads/writes. 1297 */ 1298 mb(); 1299 1300 mtx_unlock(&hbus->config_lock); 1301 } else { 1302 /* Invalid config write: it's unlikely to reach here. */ 1303 return; 1304 } 1305 } 1306 1307 static void 1308 vmbus_pcib_set_detaching(void *arg, int pending __unused) 1309 { 1310 struct hv_pcibus *hbus = arg; 1311 1312 atomic_set_int(&hbus->detaching, 1); 1313 } 1314 1315 static void 1316 vmbus_pcib_pre_detach(struct hv_pcibus *hbus) 1317 { 1318 struct task task; 1319 1320 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus); 1321 1322 /* 1323 * Make sure the channel callback won't push any possible new 1324 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq. 1325 */ 1326 vmbus_chan_run_task(hbus->sc->chan, &task); 1327 1328 taskqueue_drain_all(hbus->sc->taskq); 1329 } 1330 1331 1332 /* 1333 * Standard probe entry point. 1334 * 1335 */ 1336 static int 1337 vmbus_pcib_probe(device_t dev) 1338 { 1339 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1340 &g_pass_through_dev_type) == 0) { 1341 device_set_desc(dev, "Hyper-V PCI Express Pass Through"); 1342 return (BUS_PROBE_DEFAULT); 1343 } 1344 return (ENXIO); 1345 } 1346 1347 /* 1348 * Standard attach entry point. 1349 * 1350 */ 1351 static int 1352 vmbus_pcib_attach(device_t dev) 1353 { 1354 const int pci_ring_size = (4 * PAGE_SIZE); 1355 const struct hyperv_guid *inst_guid; 1356 struct vmbus_channel *channel; 1357 struct vmbus_pcib_softc *sc; 1358 struct hv_pcibus *hbus; 1359 int rid = 0; 1360 int ret; 1361 1362 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO); 1363 hbus->pcib = dev; 1364 1365 channel = vmbus_get_channel(dev); 1366 inst_guid = vmbus_chan_guid_inst(channel); 1367 hbus->pci_domain = inst_guid->hv_guid[9] | 1368 (inst_guid->hv_guid[8] << 8); 1369 1370 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF); 1371 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF); 1372 TAILQ_INIT(&hbus->children); 1373 TAILQ_INIT(&hbus->dr_list); 1374 1375 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 1376 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH, 1377 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE)); 1378 1379 if (!hbus->cfg_res) { 1380 device_printf(dev, "failed to get resource for cfg window\n"); 1381 ret = ENXIO; 1382 goto free_bus; 1383 } 1384 1385 sc = device_get_softc(dev); 1386 sc->chan = channel; 1387 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); 1388 sc->hbus = hbus; 1389 1390 /* 1391 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT 1392 * messages. NB: we can't handle the messages in the channel callback 1393 * directly, because the message handlers need to send new messages 1394 * to the host and waits for the host's completion messages, which 1395 * must also be handled by the channel callback. 1396 */ 1397 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK, 1398 taskqueue_thread_enqueue, &sc->taskq); 1399 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq"); 1400 1401 hbus->sc = sc; 1402 1403 init_completion(&hbus->query_completion); 1404 hbus->query_comp = &hbus->query_completion; 1405 1406 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size, 1407 NULL, 0, vmbus_pcib_on_channel_callback, sc); 1408 if (ret) 1409 goto free_res; 1410 1411 ret = hv_pci_protocol_negotiation(hbus); 1412 if (ret) 1413 goto vmbus_close; 1414 1415 ret = hv_pci_query_relations(hbus); 1416 if (ret) 1417 goto vmbus_close; 1418 wait_for_completion(hbus->query_comp); 1419 1420 ret = hv_pci_enter_d0(hbus); 1421 if (ret) 1422 goto vmbus_close; 1423 1424 ret = hv_send_resources_allocated(hbus); 1425 if (ret) 1426 goto vmbus_close; 1427 1428 hbus->pci_bus = device_add_child(dev, "pci", -1); 1429 if (!hbus->pci_bus) { 1430 device_printf(dev, "failed to create pci bus\n"); 1431 ret = ENXIO; 1432 goto vmbus_close; 1433 } 1434 1435 bus_generic_attach(dev); 1436 1437 hbus->state = hv_pcibus_installed; 1438 1439 return (0); 1440 1441 vmbus_close: 1442 vmbus_pcib_pre_detach(hbus); 1443 vmbus_chan_close(sc->chan); 1444 free_res: 1445 taskqueue_free(sc->taskq); 1446 free_completion(&hbus->query_completion); 1447 free(sc->rx_buf, M_DEVBUF); 1448 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1449 free_bus: 1450 mtx_destroy(&hbus->device_list_lock); 1451 mtx_destroy(&hbus->config_lock); 1452 free(hbus, M_DEVBUF); 1453 return (ret); 1454 } 1455 1456 /* 1457 * Standard detach entry point 1458 */ 1459 static int 1460 vmbus_pcib_detach(device_t dev) 1461 { 1462 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1463 struct hv_pcibus *hbus = sc->hbus; 1464 struct pci_message teardown_packet; 1465 struct pci_bus_relations relations; 1466 int ret; 1467 1468 vmbus_pcib_pre_detach(hbus); 1469 1470 if (hbus->state == hv_pcibus_installed) 1471 bus_generic_detach(dev); 1472 1473 /* Delete any children which might still exist. */ 1474 memset(&relations, 0, sizeof(relations)); 1475 hv_pci_devices_present(hbus, &relations); 1476 1477 ret = hv_send_resources_released(hbus); 1478 if (ret) 1479 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n"); 1480 1481 teardown_packet.type = PCI_BUS_D0EXIT; 1482 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1483 &teardown_packet, sizeof(struct pci_message), 0); 1484 if (ret) 1485 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n"); 1486 1487 taskqueue_drain_all(hbus->sc->taskq); 1488 vmbus_chan_close(sc->chan); 1489 taskqueue_free(sc->taskq); 1490 1491 free_completion(&hbus->query_completion); 1492 free(sc->rx_buf, M_DEVBUF); 1493 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1494 1495 mtx_destroy(&hbus->device_list_lock); 1496 mtx_destroy(&hbus->config_lock); 1497 free(hbus, M_DEVBUF); 1498 1499 return (0); 1500 } 1501 1502 static int 1503 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val) 1504 { 1505 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1506 1507 switch (which) { 1508 case PCIB_IVAR_DOMAIN: 1509 *val = sc->hbus->pci_domain; 1510 return (0); 1511 1512 case PCIB_IVAR_BUS: 1513 /* There is only bus 0. */ 1514 *val = 0; 1515 return (0); 1516 } 1517 return (ENOENT); 1518 } 1519 1520 static int 1521 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val) 1522 { 1523 return (ENOENT); 1524 } 1525 1526 static struct resource * 1527 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, 1528 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 1529 { 1530 unsigned int bar_no; 1531 struct hv_pci_dev *hpdev; 1532 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1533 struct resource *res; 1534 unsigned int devfn; 1535 1536 if (type == PCI_RES_BUS) 1537 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid, 1538 start, end, count, flags)); 1539 1540 /* Devices with port I/O BAR are not supported. */ 1541 if (type == SYS_RES_IOPORT) 1542 return (NULL); 1543 1544 if (type == SYS_RES_MEMORY) { 1545 devfn = PCI_DEVFN(pci_get_slot(child), 1546 pci_get_function(child)); 1547 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1548 if (!hpdev) 1549 return (NULL); 1550 1551 bar_no = PCI_RID2BAR(*rid); 1552 if (bar_no >= MAX_NUM_BARS) 1553 return (NULL); 1554 1555 /* Make sure a 32-bit BAR gets a 32-bit address */ 1556 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64)) 1557 end = ulmin(end, 0xFFFFFFFF); 1558 } 1559 1560 res = bus_generic_alloc_resource(dev, child, type, rid, 1561 start, end, count, flags); 1562 /* 1563 * If this is a request for a specific range, assume it is 1564 * correct and pass it up to the parent. 1565 */ 1566 if (res == NULL && start + count - 1 == end) 1567 res = bus_generic_alloc_resource(dev, child, type, rid, 1568 start, end, count, flags); 1569 return (res); 1570 } 1571 1572 static int 1573 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid, 1574 struct resource *r) 1575 { 1576 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1577 1578 if (type == PCI_RES_BUS) 1579 return (pci_domain_release_bus(sc->hbus->pci_domain, child, 1580 rid, r)); 1581 1582 if (type == SYS_RES_IOPORT) 1583 return (EINVAL); 1584 1585 return (bus_generic_release_resource(dev, child, type, rid, r)); 1586 } 1587 1588 #if __FreeBSD_version >= 1100000 1589 static int 1590 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op, 1591 size_t setsize, cpuset_t *cpuset) 1592 { 1593 return (bus_get_cpus(pcib, op, setsize, cpuset)); 1594 } 1595 #endif 1596 1597 static uint32_t 1598 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, 1599 u_int reg, int bytes) 1600 { 1601 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1602 struct hv_pci_dev *hpdev; 1603 unsigned int devfn = PCI_DEVFN(slot, func); 1604 uint32_t data = 0; 1605 1606 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1607 1608 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1609 if (!hpdev) 1610 return (~0); 1611 1612 _hv_pcifront_read_config(hpdev, reg, bytes, &data); 1613 1614 return (data); 1615 } 1616 1617 static void 1618 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, 1619 u_int reg, uint32_t data, int bytes) 1620 { 1621 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1622 struct hv_pci_dev *hpdev; 1623 unsigned int devfn = PCI_DEVFN(slot, func); 1624 1625 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1626 1627 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1628 if (!hpdev) 1629 return; 1630 1631 _hv_pcifront_write_config(hpdev, reg, bytes, data); 1632 } 1633 1634 static int 1635 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin) 1636 { 1637 /* We only support MSI/MSI-X and don't support INTx interrupt. */ 1638 return (PCI_INVALID_IRQ); 1639 } 1640 1641 static int 1642 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count, 1643 int maxcount, int *irqs) 1644 { 1645 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount, 1646 irqs)); 1647 } 1648 1649 static int 1650 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) 1651 { 1652 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs)); 1653 } 1654 1655 static int 1656 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) 1657 { 1658 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq)); 1659 } 1660 1661 static int 1662 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq) 1663 { 1664 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq)); 1665 } 1666 1667 #define MSI_INTEL_ADDR_DEST 0x000ff000 1668 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ 1669 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED 1670 1671 static int 1672 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq, 1673 uint64_t *addr, uint32_t *data) 1674 { 1675 unsigned int devfn; 1676 struct hv_pci_dev *hpdev; 1677 1678 uint64_t v_addr; 1679 uint32_t v_data; 1680 struct hv_irq_desc *hid, *tmp_hid; 1681 unsigned int cpu, vcpu_id; 1682 unsigned int vector; 1683 1684 struct vmbus_pcib_softc *sc = device_get_softc(pcib); 1685 struct pci_create_interrupt *int_pkt; 1686 struct compose_comp_ctxt comp; 1687 struct { 1688 struct pci_packet pkt; 1689 uint8_t buffer[sizeof(struct pci_create_interrupt)]; 1690 } ctxt; 1691 1692 int ret; 1693 1694 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); 1695 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1696 if (!hpdev) 1697 return (ENOENT); 1698 1699 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq, 1700 &v_addr, &v_data); 1701 if (ret) 1702 return (ret); 1703 1704 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) { 1705 if (hid->irq == irq) { 1706 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link); 1707 hv_int_desc_free(hpdev, hid); 1708 break; 1709 } 1710 } 1711 1712 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12; 1713 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); 1714 vector = v_data & MSI_INTEL_DATA_INTVEC; 1715 1716 init_completion(&comp.comp_pkt.host_event); 1717 1718 memset(&ctxt, 0, sizeof(ctxt)); 1719 ctxt.pkt.completion_func = hv_pci_compose_compl; 1720 ctxt.pkt.compl_ctxt = ∁ 1721 1722 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message; 1723 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; 1724 int_pkt->wslot.val = hpdev->desc.wslot.val; 1725 int_pkt->int_desc.vector = vector; 1726 int_pkt->int_desc.vector_count = 1; 1727 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; 1728 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id; 1729 1730 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1731 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt), 1732 (uint64_t)(uintptr_t)&ctxt.pkt); 1733 if (ret) { 1734 free_completion(&comp.comp_pkt.host_event); 1735 return (ret); 1736 } 1737 1738 wait_for_completion(&comp.comp_pkt.host_event); 1739 free_completion(&comp.comp_pkt.host_event); 1740 1741 if (comp.comp_pkt.completion_status < 0) 1742 return (EPROTO); 1743 1744 *addr = comp.int_desc.address; 1745 *data = comp.int_desc.data; 1746 1747 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO); 1748 hid->irq = irq; 1749 hid->desc = comp.int_desc; 1750 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link); 1751 1752 return (0); 1753 } 1754 1755 static device_method_t vmbus_pcib_methods[] = { 1756 /* Device interface */ 1757 DEVMETHOD(device_probe, vmbus_pcib_probe), 1758 DEVMETHOD(device_attach, vmbus_pcib_attach), 1759 DEVMETHOD(device_detach, vmbus_pcib_detach), 1760 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1761 DEVMETHOD(device_suspend, bus_generic_suspend), 1762 DEVMETHOD(device_resume, bus_generic_resume), 1763 1764 /* Bus interface */ 1765 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar), 1766 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar), 1767 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource), 1768 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource), 1769 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 1770 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 1771 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), 1772 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), 1773 #if __FreeBSD_version >= 1100000 1774 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus), 1775 #endif 1776 1777 /* pcib interface */ 1778 DEVMETHOD(pcib_maxslots, pcib_maxslots), 1779 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config), 1780 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config), 1781 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr), 1782 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi), 1783 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi), 1784 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix), 1785 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix), 1786 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi), 1787 DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), 1788 1789 DEVMETHOD_END 1790 }; 1791 1792 static devclass_t pcib_devclass; 1793 1794 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods, 1795 sizeof(struct vmbus_pcib_softc)); 1796 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, pcib_devclass, 0, 0); 1797 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1); 1798 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1); 1799 1800 #endif /* NEW_PCIB */ 1801