1 /*- 2 * Copyright (c) 2016 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #ifdef NEW_PCIB 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/types.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/kernel.h> 38 #include <sys/queue.h> 39 #include <sys/lock.h> 40 #include <sys/sx.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/bus.h> 44 #include <sys/rman.h> 45 #include <sys/mutex.h> 46 #include <sys/errno.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_param.h> 50 #include <vm/vm_kern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/atomic.h> 54 #include <machine/bus.h> 55 #include <machine/frame.h> 56 #include <machine/pci_cfgreg.h> 57 #include <machine/resource.h> 58 59 #include <sys/pciio.h> 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 #include <dev/pci/pci_private.h> 63 #include <dev/pci/pcib_private.h> 64 #include "pcib_if.h" 65 66 #include <machine/intr_machdep.h> 67 #include <x86/apicreg.h> 68 69 #include <dev/hyperv/include/hyperv.h> 70 #include <dev/hyperv/include/hyperv_busdma.h> 71 #include <dev/hyperv/include/vmbus_xact.h> 72 #include <dev/hyperv/vmbus/vmbus_reg.h> 73 #include <dev/hyperv/vmbus/vmbus_chanvar.h> 74 75 #include "vmbus_if.h" 76 77 #if __FreeBSD_version < 1100000 78 typedef u_long rman_res_t; 79 #define RM_MAX_END (~(rman_res_t)0) 80 #endif 81 82 struct completion { 83 unsigned int done; 84 struct mtx lock; 85 }; 86 87 static void 88 init_completion(struct completion *c) 89 { 90 memset(c, 0, sizeof(*c)); 91 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF); 92 c->done = 0; 93 } 94 95 static void 96 free_completion(struct completion *c) 97 { 98 mtx_destroy(&c->lock); 99 } 100 101 static void 102 complete(struct completion *c) 103 { 104 mtx_lock(&c->lock); 105 c->done++; 106 mtx_unlock(&c->lock); 107 wakeup(c); 108 } 109 110 static void 111 wait_for_completion(struct completion *c) 112 { 113 mtx_lock(&c->lock); 114 while (c->done == 0) 115 mtx_sleep(c, &c->lock, 0, "hvwfc", 0); 116 c->done--; 117 mtx_unlock(&c->lock); 118 } 119 120 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (major))) 121 122 enum { 123 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), 124 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 125 }; 126 127 #define PCI_CONFIG_MMIO_LENGTH 0x2000 128 #define CFG_PAGE_OFFSET 0x1000 129 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) 130 131 /* 132 * Message Types 133 */ 134 135 enum pci_message_type { 136 /* 137 * Version 1.1 138 */ 139 PCI_MESSAGE_BASE = 0x42490000, 140 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0, 141 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1, 142 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4, 143 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5, 144 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6, 145 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7, 146 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8, 147 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9, 148 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA, 149 PCI_EJECT = PCI_MESSAGE_BASE + 0xB, 150 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC, 151 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD, 152 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE, 153 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF, 154 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10, 155 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11, 156 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12, 157 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13, 158 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14, 159 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15, 160 PCI_MESSAGE_MAXIMUM 161 }; 162 163 /* 164 * Structures defining the virtual PCI Express protocol. 165 */ 166 167 union pci_version { 168 struct { 169 uint16_t minor_version; 170 uint16_t major_version; 171 } parts; 172 uint32_t version; 173 } __packed; 174 175 /* 176 * This representation is the one used in Windows, which is 177 * what is expected when sending this back and forth with 178 * the Hyper-V parent partition. 179 */ 180 union win_slot_encoding { 181 struct { 182 uint32_t slot:5; 183 uint32_t func:3; 184 uint32_t reserved:24; 185 } bits; 186 uint32_t val; 187 } __packed; 188 189 struct pci_func_desc { 190 uint16_t v_id; /* vendor ID */ 191 uint16_t d_id; /* device ID */ 192 uint8_t rev; 193 uint8_t prog_intf; 194 uint8_t subclass; 195 uint8_t base_class; 196 uint32_t subsystem_id; 197 union win_slot_encoding wslot; 198 uint32_t ser; /* serial number */ 199 } __packed; 200 201 struct hv_msi_desc { 202 uint8_t vector; 203 uint8_t delivery_mode; 204 uint16_t vector_count; 205 uint32_t reserved; 206 uint64_t cpu_mask; 207 } __packed; 208 209 struct tran_int_desc { 210 uint16_t reserved; 211 uint16_t vector_count; 212 uint32_t data; 213 uint64_t address; 214 } __packed; 215 216 struct pci_message { 217 uint32_t type; 218 } __packed; 219 220 struct pci_child_message { 221 struct pci_message message_type; 222 union win_slot_encoding wslot; 223 } __packed; 224 225 struct pci_incoming_message { 226 struct vmbus_chanpkt_hdr hdr; 227 struct pci_message message_type; 228 } __packed; 229 230 struct pci_response { 231 struct vmbus_chanpkt_hdr hdr; 232 int32_t status; /* negative values are failures */ 233 } __packed; 234 235 struct pci_packet { 236 void (*completion_func)(void *context, struct pci_response *resp, 237 int resp_packet_size); 238 void *compl_ctxt; 239 240 struct pci_message message[0]; 241 }; 242 243 /* 244 * Specific message types supporting the PCI protocol. 245 */ 246 247 struct pci_version_request { 248 struct pci_message message_type; 249 uint32_t protocol_version; 250 uint32_t is_last_attempt:1; 251 uint32_t reservedz:31; 252 } __packed; 253 254 struct pci_bus_d0_entry { 255 struct pci_message message_type; 256 uint32_t reserved; 257 uint64_t mmio_base; 258 } __packed; 259 260 struct pci_bus_relations { 261 struct pci_incoming_message incoming; 262 uint32_t device_count; 263 struct pci_func_desc func[0]; 264 } __packed; 265 266 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1) 267 struct pci_q_res_req_response { 268 struct vmbus_chanpkt_hdr hdr; 269 int32_t status; /* negative values are failures */ 270 uint32_t probed_bar[MAX_NUM_BARS]; 271 } __packed; 272 273 struct pci_resources_assigned { 274 struct pci_message message_type; 275 union win_slot_encoding wslot; 276 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */ 277 uint32_t msi_descriptors; 278 uint32_t reserved[4]; 279 } __packed; 280 281 struct pci_create_interrupt { 282 struct pci_message message_type; 283 union win_slot_encoding wslot; 284 struct hv_msi_desc int_desc; 285 } __packed; 286 287 struct pci_create_int_response { 288 struct pci_response response; 289 uint32_t reserved; 290 struct tran_int_desc int_desc; 291 } __packed; 292 293 struct pci_delete_interrupt { 294 struct pci_message message_type; 295 union win_slot_encoding wslot; 296 struct tran_int_desc int_desc; 297 } __packed; 298 299 struct pci_dev_incoming { 300 struct pci_incoming_message incoming; 301 union win_slot_encoding wslot; 302 } __packed; 303 304 struct pci_eject_response { 305 struct pci_message message_type; 306 union win_slot_encoding wslot; 307 uint32_t status; 308 } __packed; 309 310 /* 311 * Driver specific state. 312 */ 313 314 enum hv_pcibus_state { 315 hv_pcibus_init = 0, 316 hv_pcibus_installed, 317 }; 318 319 struct hv_pcibus { 320 device_t pcib; 321 device_t pci_bus; 322 struct vmbus_pcib_softc *sc; 323 324 uint16_t pci_domain; 325 326 enum hv_pcibus_state state; 327 328 struct resource *cfg_res; 329 330 struct completion query_completion, *query_comp; 331 332 struct mtx config_lock; /* Avoid two threads writing index page */ 333 struct mtx device_list_lock; /* Protect lists below */ 334 TAILQ_HEAD(, hv_pci_dev) children; 335 TAILQ_HEAD(, hv_dr_state) dr_list; 336 337 volatile int detaching; 338 }; 339 340 struct hv_pci_dev { 341 TAILQ_ENTRY(hv_pci_dev) link; 342 343 struct pci_func_desc desc; 344 345 bool reported_missing; 346 347 struct hv_pcibus *hbus; 348 struct task eject_task; 349 350 TAILQ_HEAD(, hv_irq_desc) irq_desc_list; 351 352 /* 353 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then 354 * read it back, for each of the BAR offsets within config space. 355 */ 356 uint32_t probed_bar[MAX_NUM_BARS]; 357 }; 358 359 /* 360 * Tracks "Device Relations" messages from the host, which must be both 361 * processed in order. 362 */ 363 struct hv_dr_work { 364 struct task task; 365 struct hv_pcibus *bus; 366 }; 367 368 struct hv_dr_state { 369 TAILQ_ENTRY(hv_dr_state) link; 370 uint32_t device_count; 371 struct pci_func_desc func[0]; 372 }; 373 374 struct hv_irq_desc { 375 TAILQ_ENTRY(hv_irq_desc) link; 376 struct tran_int_desc desc; 377 int irq; 378 }; 379 380 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) 381 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) 382 #define PCI_FUNC(devfn) ((devfn) & 0x07) 383 384 static uint32_t 385 devfn_to_wslot(unsigned int devfn) 386 { 387 union win_slot_encoding wslot; 388 389 wslot.val = 0; 390 wslot.bits.slot = PCI_SLOT(devfn); 391 wslot.bits.func = PCI_FUNC(devfn); 392 393 return (wslot.val); 394 } 395 396 static unsigned int 397 wslot_to_devfn(uint32_t wslot) 398 { 399 union win_slot_encoding encoding; 400 unsigned int slot; 401 unsigned int func; 402 403 encoding.val = wslot; 404 405 slot = encoding.bits.slot; 406 func = encoding.bits.func; 407 408 return (PCI_DEVFN(slot, func)); 409 } 410 411 struct vmbus_pcib_softc { 412 struct vmbus_channel *chan; 413 void *rx_buf; 414 415 struct taskqueue *taskq; 416 417 struct hv_pcibus *hbus; 418 }; 419 420 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */ 421 static const struct hyperv_guid g_pass_through_dev_type = { 422 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, 423 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F} 424 }; 425 426 struct hv_pci_compl { 427 struct completion host_event; 428 int32_t completion_status; 429 }; 430 431 struct q_res_req_compl { 432 struct completion host_event; 433 struct hv_pci_dev *hpdev; 434 }; 435 436 struct compose_comp_ctxt { 437 struct hv_pci_compl comp_pkt; 438 struct tran_int_desc int_desc; 439 }; 440 441 static void 442 hv_pci_generic_compl(void *context, struct pci_response *resp, 443 int resp_packet_size) 444 { 445 struct hv_pci_compl *comp_pkt = context; 446 447 if (resp_packet_size >= sizeof(struct pci_response)) 448 comp_pkt->completion_status = resp->status; 449 else 450 comp_pkt->completion_status = -1; 451 452 complete(&comp_pkt->host_event); 453 } 454 455 static void 456 q_resource_requirements(void *context, struct pci_response *resp, 457 int resp_packet_size) 458 { 459 struct q_res_req_compl *completion = context; 460 struct pci_q_res_req_response *q_res_req = 461 (struct pci_q_res_req_response *)resp; 462 int i; 463 464 if (resp->status < 0) { 465 printf("vmbus_pcib: failed to query resource requirements\n"); 466 } else { 467 for (i = 0; i < MAX_NUM_BARS; i++) 468 completion->hpdev->probed_bar[i] = 469 q_res_req->probed_bar[i]; 470 } 471 472 complete(&completion->host_event); 473 } 474 475 static void 476 hv_pci_compose_compl(void *context, struct pci_response *resp, 477 int resp_packet_size) 478 { 479 struct compose_comp_ctxt *comp_pkt = context; 480 struct pci_create_int_response *int_resp = 481 (struct pci_create_int_response *)resp; 482 483 comp_pkt->comp_pkt.completion_status = resp->status; 484 comp_pkt->int_desc = int_resp->int_desc; 485 complete(&comp_pkt->comp_pkt.host_event); 486 } 487 488 static void 489 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid) 490 { 491 struct pci_delete_interrupt *int_pkt; 492 struct { 493 struct pci_packet pkt; 494 uint8_t buffer[sizeof(struct pci_delete_interrupt)]; 495 } ctxt; 496 497 memset(&ctxt, 0, sizeof(ctxt)); 498 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; 499 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; 500 int_pkt->wslot.val = hpdev->desc.wslot.val; 501 int_pkt->int_desc = hid->desc; 502 503 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 504 int_pkt, sizeof(*int_pkt), 0); 505 506 free(hid, M_DEVBUF); 507 } 508 509 static void 510 hv_pci_delete_device(struct hv_pci_dev *hpdev) 511 { 512 struct hv_pcibus *hbus = hpdev->hbus; 513 struct hv_irq_desc *hid, *tmp_hid; 514 device_t pci_dev; 515 int devfn; 516 517 devfn = wslot_to_devfn(hpdev->desc.wslot.val); 518 519 mtx_lock(&Giant); 520 521 pci_dev = pci_find_dbsf(hbus->pci_domain, 522 0, PCI_SLOT(devfn), PCI_FUNC(devfn)); 523 if (pci_dev) 524 device_delete_child(hbus->pci_bus, pci_dev); 525 526 mtx_unlock(&Giant); 527 528 mtx_lock(&hbus->device_list_lock); 529 TAILQ_REMOVE(&hbus->children, hpdev, link); 530 mtx_unlock(&hbus->device_list_lock); 531 532 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) 533 hv_int_desc_free(hpdev, hid); 534 535 free(hpdev, M_DEVBUF); 536 } 537 538 static struct hv_pci_dev * 539 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc) 540 { 541 struct hv_pci_dev *hpdev; 542 struct pci_child_message *res_req; 543 struct q_res_req_compl comp_pkt; 544 struct { 545 struct pci_packet pkt; 546 uint8_t buffer[sizeof(struct pci_child_message)]; 547 } ctxt; 548 int ret; 549 550 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO); 551 hpdev->hbus = hbus; 552 553 TAILQ_INIT(&hpdev->irq_desc_list); 554 555 init_completion(&comp_pkt.host_event); 556 comp_pkt.hpdev = hpdev; 557 558 ctxt.pkt.compl_ctxt = &comp_pkt; 559 ctxt.pkt.completion_func = q_resource_requirements; 560 561 res_req = (struct pci_child_message *)&ctxt.pkt.message; 562 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; 563 res_req->wslot.val = desc->wslot.val; 564 565 ret = vmbus_chan_send(hbus->sc->chan, 566 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 567 res_req, sizeof(*res_req), (uint64_t)&ctxt.pkt); 568 if (ret) 569 goto err; 570 571 wait_for_completion(&comp_pkt.host_event); 572 free_completion(&comp_pkt.host_event); 573 574 hpdev->desc = *desc; 575 576 mtx_lock(&hbus->device_list_lock); 577 if (TAILQ_EMPTY(&hbus->children)) 578 hbus->pci_domain = desc->ser & 0xFFFF; 579 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link); 580 mtx_unlock(&hbus->device_list_lock); 581 return (hpdev); 582 err: 583 free_completion(&comp_pkt.host_event); 584 free(hpdev, M_DEVBUF); 585 return (NULL); 586 } 587 588 #if __FreeBSD_version < 1100000 589 590 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */ 591 592 static struct pci_devinfo * 593 pci_identify_function(device_t pcib, device_t dev, int domain, int busno, 594 int slot, int func, size_t dinfo_size) 595 { 596 struct pci_devinfo *dinfo; 597 598 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size); 599 if (dinfo != NULL) 600 pci_add_child(dev, dinfo); 601 602 return (dinfo); 603 } 604 605 static int 606 pci_rescan(device_t dev) 607 { 608 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w) 609 device_t pcib = device_get_parent(dev); 610 struct pci_softc *sc; 611 device_t child, *devlist, *unchanged; 612 int devcount, error, i, j, maxslots, oldcount; 613 int busno, domain, s, f, pcifunchigh; 614 uint8_t hdrtype; 615 616 /* No need to check for ARI on a rescan. */ 617 error = device_get_children(dev, &devlist, &devcount); 618 if (error) 619 return (error); 620 if (devcount != 0) { 621 unchanged = malloc(devcount * sizeof(device_t), M_TEMP, 622 M_NOWAIT | M_ZERO); 623 if (unchanged == NULL) { 624 free(devlist, M_TEMP); 625 return (ENOMEM); 626 } 627 } else 628 unchanged = NULL; 629 630 sc = device_get_softc(dev); 631 domain = pcib_get_domain(dev); 632 busno = pcib_get_bus(dev); 633 maxslots = PCIB_MAXSLOTS(pcib); 634 for (s = 0; s <= maxslots; s++) { 635 /* If function 0 is not present, skip to the next slot. */ 636 f = 0; 637 if (REG(PCIR_VENDOR, 2) == 0xffff) 638 continue; 639 pcifunchigh = 0; 640 hdrtype = REG(PCIR_HDRTYPE, 1); 641 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) 642 continue; 643 if (hdrtype & PCIM_MFDEV) 644 pcifunchigh = PCIB_MAXFUNCS(pcib); 645 for (f = 0; f <= pcifunchigh; f++) { 646 if (REG(PCIR_VENDOR, 2) == 0xffff) 647 continue; 648 649 /* 650 * Found a valid function. Check if a 651 * device_t for this device already exists. 652 */ 653 for (i = 0; i < devcount; i++) { 654 child = devlist[i]; 655 if (child == NULL) 656 continue; 657 if (pci_get_slot(child) == s && 658 pci_get_function(child) == f) { 659 unchanged[i] = child; 660 goto next_func; 661 } 662 } 663 664 pci_identify_function(pcib, dev, domain, busno, s, f, 665 sizeof(struct pci_devinfo)); 666 next_func:; 667 } 668 } 669 670 /* Remove devices that are no longer present. */ 671 for (i = 0; i < devcount; i++) { 672 if (unchanged[i] != NULL) 673 continue; 674 device_delete_child(dev, devlist[i]); 675 } 676 677 free(devlist, M_TEMP); 678 oldcount = devcount; 679 680 /* Try to attach the devices just added. */ 681 error = device_get_children(dev, &devlist, &devcount); 682 if (error) { 683 free(unchanged, M_TEMP); 684 return (error); 685 } 686 687 for (i = 0; i < devcount; i++) { 688 for (j = 0; j < oldcount; j++) { 689 if (devlist[i] == unchanged[j]) 690 goto next_device; 691 } 692 693 device_probe_and_attach(devlist[i]); 694 next_device:; 695 } 696 697 free(unchanged, M_TEMP); 698 free(devlist, M_TEMP); 699 return (0); 700 #undef REG 701 } 702 703 #else 704 705 static int 706 pci_rescan(device_t dev) 707 { 708 return (BUS_RESCAN(dev)); 709 } 710 711 #endif 712 713 static void 714 pci_devices_present_work(void *arg, int pending __unused) 715 { 716 struct hv_dr_work *dr_wrk = arg; 717 struct hv_dr_state *dr = NULL; 718 struct hv_pcibus *hbus; 719 uint32_t child_no; 720 bool found; 721 struct pci_func_desc *new_desc; 722 struct hv_pci_dev *hpdev, *tmp_hpdev; 723 struct completion *query_comp; 724 bool need_rescan = false; 725 726 hbus = dr_wrk->bus; 727 free(dr_wrk, M_DEVBUF); 728 729 /* Pull this off the queue and process it if it was the last one. */ 730 mtx_lock(&hbus->device_list_lock); 731 while (!TAILQ_EMPTY(&hbus->dr_list)) { 732 dr = TAILQ_FIRST(&hbus->dr_list); 733 TAILQ_REMOVE(&hbus->dr_list, dr, link); 734 735 /* Throw this away if the list still has stuff in it. */ 736 if (!TAILQ_EMPTY(&hbus->dr_list)) { 737 free(dr, M_DEVBUF); 738 continue; 739 } 740 } 741 mtx_unlock(&hbus->device_list_lock); 742 743 if (!dr) 744 return; 745 746 /* First, mark all existing children as reported missing. */ 747 mtx_lock(&hbus->device_list_lock); 748 TAILQ_FOREACH(hpdev, &hbus->children, link) 749 hpdev->reported_missing = true; 750 mtx_unlock(&hbus->device_list_lock); 751 752 /* Next, add back any reported devices. */ 753 for (child_no = 0; child_no < dr->device_count; child_no++) { 754 found = false; 755 new_desc = &dr->func[child_no]; 756 757 mtx_lock(&hbus->device_list_lock); 758 TAILQ_FOREACH(hpdev, &hbus->children, link) { 759 if ((hpdev->desc.wslot.val == 760 new_desc->wslot.val) && 761 (hpdev->desc.v_id == new_desc->v_id) && 762 (hpdev->desc.d_id == new_desc->d_id) && 763 (hpdev->desc.ser == new_desc->ser)) { 764 hpdev->reported_missing = false; 765 found = true; 766 break; 767 } 768 } 769 mtx_unlock(&hbus->device_list_lock); 770 771 if (!found) { 772 if (!need_rescan) 773 need_rescan = true; 774 775 hpdev = new_pcichild_device(hbus, new_desc); 776 if (!hpdev) 777 printf("vmbus_pcib: failed to add a child\n"); 778 } 779 } 780 781 /* Remove missing device(s), if any */ 782 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) { 783 if (hpdev->reported_missing) 784 hv_pci_delete_device(hpdev); 785 } 786 787 /* Rescan the bus to find any new device, if necessary. */ 788 if (hbus->state == hv_pcibus_installed && need_rescan) 789 pci_rescan(hbus->pci_bus); 790 791 /* Wake up hv_pci_query_relations(), if it's waiting. */ 792 query_comp = hbus->query_comp; 793 if (query_comp) { 794 hbus->query_comp = NULL; 795 complete(query_comp); 796 } 797 798 free(dr, M_DEVBUF); 799 } 800 801 static struct hv_pci_dev * 802 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot) 803 { 804 struct hv_pci_dev *hpdev, *ret = NULL; 805 806 mtx_lock(&hbus->device_list_lock); 807 TAILQ_FOREACH(hpdev, &hbus->children, link) { 808 if (hpdev->desc.wslot.val == wslot) { 809 ret = hpdev; 810 break; 811 } 812 } 813 mtx_unlock(&hbus->device_list_lock); 814 815 return (ret); 816 } 817 818 static void 819 hv_pci_devices_present(struct hv_pcibus *hbus, 820 struct pci_bus_relations *relations) 821 { 822 struct hv_dr_state *dr; 823 struct hv_dr_work *dr_wrk; 824 unsigned long dr_size; 825 826 if (hbus->detaching && relations->device_count > 0) 827 return; 828 829 dr_size = offsetof(struct hv_dr_state, func) + 830 (sizeof(struct pci_func_desc) * relations->device_count); 831 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); 832 833 dr->device_count = relations->device_count; 834 if (dr->device_count != 0) 835 memcpy(dr->func, relations->func, 836 sizeof(struct pci_func_desc) * dr->device_count); 837 838 mtx_lock(&hbus->device_list_lock); 839 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); 840 mtx_unlock(&hbus->device_list_lock); 841 842 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); 843 dr_wrk->bus = hbus; 844 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); 845 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); 846 } 847 848 static void 849 hv_eject_device_work(void *arg, int pending __unused) 850 { 851 struct hv_pci_dev *hpdev = arg; 852 union win_slot_encoding wslot = hpdev->desc.wslot; 853 struct hv_pcibus *hbus = hpdev->hbus; 854 struct pci_eject_response *eject_pkt; 855 struct { 856 struct pci_packet pkt; 857 uint8_t buffer[sizeof(struct pci_eject_response)]; 858 } ctxt; 859 860 hv_pci_delete_device(hpdev); 861 862 memset(&ctxt, 0, sizeof(ctxt)); 863 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message; 864 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE; 865 eject_pkt->wslot.val = wslot.val; 866 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 867 eject_pkt, sizeof(*eject_pkt), 0); 868 } 869 870 static void 871 hv_pci_eject_device(struct hv_pci_dev *hpdev) 872 { 873 struct hv_pcibus *hbus = hpdev->hbus; 874 struct taskqueue *taskq; 875 876 if (hbus->detaching) 877 return; 878 879 /* 880 * Push this task into the same taskqueue on which 881 * vmbus_pcib_attach() runs, so we're sure this task can't run 882 * concurrently with vmbus_pcib_attach(). 883 */ 884 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev); 885 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan); 886 taskqueue_enqueue(taskq, &hpdev->eject_task); 887 } 888 889 #define PCIB_PACKET_SIZE 0x100 890 891 static void 892 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg) 893 { 894 struct vmbus_pcib_softc *sc = arg; 895 struct hv_pcibus *hbus = sc->hbus; 896 897 void *buffer; 898 int bufferlen = PCIB_PACKET_SIZE; 899 900 struct pci_packet *comp_packet; 901 struct pci_response *response; 902 struct pci_incoming_message *new_msg; 903 struct pci_bus_relations *bus_rel; 904 struct pci_dev_incoming *dev_msg; 905 struct hv_pci_dev *hpdev; 906 907 buffer = sc->rx_buf; 908 do { 909 struct vmbus_chanpkt_hdr *pkt = buffer; 910 uint32_t bytes_rxed; 911 int ret; 912 913 bytes_rxed = bufferlen; 914 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); 915 916 if (ret == ENOBUFS) { 917 /* Handle large packet */ 918 if (bufferlen > PCIB_PACKET_SIZE) { 919 free(buffer, M_DEVBUF); 920 buffer = NULL; 921 } 922 923 /* alloc new buffer */ 924 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO); 925 bufferlen = bytes_rxed; 926 927 continue; 928 } 929 930 if (ret != 0) { 931 /* ignore EIO or EAGAIN */ 932 break; 933 } 934 935 if (bytes_rxed <= sizeof(struct pci_response)) 936 continue; 937 938 switch (pkt->cph_type) { 939 case VMBUS_CHANPKT_TYPE_COMP: 940 comp_packet = (struct pci_packet *)pkt->cph_xactid; 941 response = (struct pci_response *)pkt; 942 comp_packet->completion_func(comp_packet->compl_ctxt, 943 response, bytes_rxed); 944 break; 945 case VMBUS_CHANPKT_TYPE_INBAND: 946 new_msg = (struct pci_incoming_message *)buffer; 947 948 switch (new_msg->message_type.type) { 949 case PCI_BUS_RELATIONS: 950 bus_rel = (struct pci_bus_relations *)buffer; 951 952 if (bus_rel->device_count == 0) 953 break; 954 955 if (bytes_rxed < 956 offsetof(struct pci_bus_relations, func) + 957 (sizeof(struct pci_func_desc) * 958 (bus_rel->device_count))) 959 break; 960 961 hv_pci_devices_present(hbus, bus_rel); 962 break; 963 964 case PCI_EJECT: 965 dev_msg = (struct pci_dev_incoming *)buffer; 966 hpdev = get_pcichild_wslot(hbus, 967 dev_msg->wslot.val); 968 969 if (hpdev) 970 hv_pci_eject_device(hpdev); 971 972 break; 973 default: 974 printf("vmbus_pcib: Unknown msg type 0x%x\n", 975 new_msg->message_type.type); 976 break; 977 } 978 break; 979 default: 980 printf("vmbus_pcib: Unknown VMBus msg type %hd\n", 981 pkt->cph_type); 982 break; 983 } 984 } while (1); 985 986 if (bufferlen > PCIB_PACKET_SIZE) 987 free(buffer, M_DEVBUF); 988 } 989 990 static int 991 hv_pci_protocol_negotiation(struct hv_pcibus *hbus) 992 { 993 struct pci_version_request *version_req; 994 struct hv_pci_compl comp_pkt; 995 struct { 996 struct pci_packet pkt; 997 uint8_t buffer[sizeof(struct pci_version_request)]; 998 } ctxt; 999 int ret; 1000 1001 init_completion(&comp_pkt.host_event); 1002 1003 ctxt.pkt.completion_func = hv_pci_generic_compl; 1004 ctxt.pkt.compl_ctxt = &comp_pkt; 1005 version_req = (struct pci_version_request *)&ctxt.pkt.message; 1006 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; 1007 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT; 1008 version_req->is_last_attempt = 1; 1009 1010 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1011 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req), 1012 (uint64_t)&ctxt.pkt); 1013 if (ret) 1014 goto out; 1015 1016 wait_for_completion(&comp_pkt.host_event); 1017 1018 if (comp_pkt.completion_status < 0) { 1019 device_printf(hbus->pcib, 1020 "vmbus_pcib version negotiation failed: %x\n", 1021 comp_pkt.completion_status); 1022 ret = EPROTO; 1023 } else { 1024 ret = 0; 1025 } 1026 out: 1027 free_completion(&comp_pkt.host_event); 1028 return (ret); 1029 } 1030 1031 /* Ask the host to send along the list of child devices */ 1032 static int 1033 hv_pci_query_relations(struct hv_pcibus *hbus) 1034 { 1035 struct pci_message message; 1036 int ret; 1037 1038 message.type = PCI_QUERY_BUS_RELATIONS; 1039 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1040 &message, sizeof(message), 0); 1041 return (ret); 1042 } 1043 1044 static int 1045 hv_pci_enter_d0(struct hv_pcibus *hbus) 1046 { 1047 struct pci_bus_d0_entry *d0_entry; 1048 struct hv_pci_compl comp_pkt; 1049 struct { 1050 struct pci_packet pkt; 1051 uint8_t buffer[sizeof(struct pci_bus_d0_entry)]; 1052 } ctxt; 1053 int ret; 1054 1055 /* 1056 * Tell the host that the bus is ready to use, and moved into the 1057 * powered-on state. This includes telling the host which region 1058 * of memory-mapped I/O space has been chosen for configuration space 1059 * access. 1060 */ 1061 init_completion(&comp_pkt.host_event); 1062 1063 ctxt.pkt.completion_func = hv_pci_generic_compl; 1064 ctxt.pkt.compl_ctxt = &comp_pkt; 1065 1066 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message; 1067 memset(d0_entry, 0, sizeof(*d0_entry)); 1068 d0_entry->message_type.type = PCI_BUS_D0ENTRY; 1069 d0_entry->mmio_base = rman_get_start(hbus->cfg_res); 1070 1071 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1072 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry), 1073 (uint64_t)&ctxt.pkt); 1074 if (ret) 1075 goto out; 1076 1077 wait_for_completion(&comp_pkt.host_event); 1078 1079 if (comp_pkt.completion_status < 0) { 1080 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n"); 1081 ret = EPROTO; 1082 } else { 1083 ret = 0; 1084 } 1085 1086 out: 1087 free_completion(&comp_pkt.host_event); 1088 return (ret); 1089 } 1090 1091 /* 1092 * It looks this is only needed by Windows VM, but let's send the message too 1093 * just to make the host happy. 1094 */ 1095 static int 1096 hv_send_resources_allocated(struct hv_pcibus *hbus) 1097 { 1098 struct pci_resources_assigned *res_assigned; 1099 struct hv_pci_compl comp_pkt; 1100 struct hv_pci_dev *hpdev; 1101 struct pci_packet *pkt; 1102 uint32_t wslot; 1103 int ret = 0; 1104 1105 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned), 1106 M_DEVBUF, M_WAITOK | M_ZERO); 1107 1108 for (wslot = 0; wslot < 256; wslot++) { 1109 hpdev = get_pcichild_wslot(hbus, wslot); 1110 if (!hpdev) 1111 continue; 1112 1113 init_completion(&comp_pkt.host_event); 1114 1115 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned)); 1116 pkt->completion_func = hv_pci_generic_compl; 1117 pkt->compl_ctxt = &comp_pkt; 1118 1119 res_assigned = (struct pci_resources_assigned *)&pkt->message; 1120 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; 1121 res_assigned->wslot.val = hpdev->desc.wslot.val; 1122 1123 ret = vmbus_chan_send(hbus->sc->chan, 1124 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 1125 &pkt->message, sizeof(*res_assigned), (uint64_t)pkt); 1126 if (ret) { 1127 free_completion(&comp_pkt.host_event); 1128 break; 1129 } 1130 1131 wait_for_completion(&comp_pkt.host_event); 1132 free_completion(&comp_pkt.host_event); 1133 1134 if (comp_pkt.completion_status < 0) { 1135 ret = EPROTO; 1136 device_printf(hbus->pcib, 1137 "failed to send PCI_RESOURCES_ASSIGNED\n"); 1138 break; 1139 } 1140 } 1141 1142 free(pkt, M_DEVBUF); 1143 return (ret); 1144 } 1145 1146 static int 1147 hv_send_resources_released(struct hv_pcibus *hbus) 1148 { 1149 struct pci_child_message pkt; 1150 struct hv_pci_dev *hpdev; 1151 uint32_t wslot; 1152 int ret; 1153 1154 for (wslot = 0; wslot < 256; wslot++) { 1155 hpdev = get_pcichild_wslot(hbus, wslot); 1156 if (!hpdev) 1157 continue; 1158 1159 pkt.message_type.type = PCI_RESOURCES_RELEASED; 1160 pkt.wslot.val = hpdev->desc.wslot.val; 1161 1162 ret = vmbus_chan_send(hbus->sc->chan, 1163 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0); 1164 if (ret) 1165 return (ret); 1166 } 1167 1168 return (0); 1169 } 1170 1171 #define hv_cfg_read(x, s) \ 1172 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \ 1173 bus_size_t offset) \ 1174 { \ 1175 return (bus_read_##s(bus->cfg_res, offset)); \ 1176 } 1177 1178 #define hv_cfg_write(x, s) \ 1179 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \ 1180 bus_size_t offset, uint##x##_t val) \ 1181 { \ 1182 return (bus_write_##s(bus->cfg_res, offset, val)); \ 1183 } 1184 1185 hv_cfg_read(8, 1) 1186 hv_cfg_read(16, 2) 1187 hv_cfg_read(32, 4) 1188 1189 hv_cfg_write(8, 1) 1190 hv_cfg_write(16, 2) 1191 hv_cfg_write(32, 4) 1192 1193 static void 1194 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, 1195 uint32_t *val) 1196 { 1197 struct hv_pcibus *hbus = hpdev->hbus; 1198 bus_size_t addr = CFG_PAGE_OFFSET + where; 1199 1200 /* 1201 * If the attempt is to read the IDs or the ROM BAR, simulate that. 1202 */ 1203 if (where + size <= PCIR_COMMAND) { 1204 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size); 1205 } else if (where >= PCIR_REVID && where + size <= 1206 PCIR_CACHELNSZ) { 1207 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where - 1208 PCIR_REVID, size); 1209 } else if (where >= PCIR_SUBVEND_0 && where + size <= 1210 PCIR_BIOS) { 1211 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where - 1212 PCIR_SUBVEND_0, size); 1213 } else if (where >= PCIR_BIOS && where + size <= 1214 PCIR_CAP_PTR) { 1215 /* ROM BARs are unimplemented */ 1216 *val = 0; 1217 } else if ((where >= PCIR_INTLINE && where + size <= 1218 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) { 1219 /* 1220 * Interrupt Line and Interrupt PIN are hard-wired to zero 1221 * because this front-end only supports message-signaled 1222 * interrupts. 1223 */ 1224 *val = 0; 1225 } else if (where + size <= CFG_PAGE_SIZE) { 1226 mtx_lock(&hbus->config_lock); 1227 1228 /* Choose the function to be read. */ 1229 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1230 1231 /* Make sure the function was chosen before we start reading.*/ 1232 mb(); 1233 1234 /* Read from that function's config space. */ 1235 switch (size) { 1236 case 1: 1237 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr); 1238 break; 1239 case 2: 1240 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr); 1241 break; 1242 default: 1243 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr); 1244 break; 1245 } 1246 /* 1247 * Make sure the write was done before we release the lock, 1248 * allowing consecutive reads/writes. 1249 */ 1250 mb(); 1251 1252 mtx_unlock(&hbus->config_lock); 1253 } else { 1254 /* Invalid config read: it's unlikely to reach here. */ 1255 memset(val, 0, size); 1256 } 1257 } 1258 1259 static void 1260 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, 1261 uint32_t val) 1262 { 1263 struct hv_pcibus *hbus = hpdev->hbus; 1264 bus_size_t addr = CFG_PAGE_OFFSET + where; 1265 1266 /* SSIDs and ROM BARs are read-only */ 1267 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR) 1268 return; 1269 1270 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) { 1271 mtx_lock(&hbus->config_lock); 1272 1273 /* Choose the function to be written. */ 1274 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1275 1276 /* Make sure the function was chosen before we start writing.*/ 1277 wmb(); 1278 1279 /* Write to that function's config space. */ 1280 switch (size) { 1281 case 1: 1282 hv_cfg_write_1(hbus, addr, (uint8_t)val); 1283 break; 1284 case 2: 1285 hv_cfg_write_2(hbus, addr, (uint16_t)val); 1286 break; 1287 default: 1288 hv_cfg_write_4(hbus, addr, (uint32_t)val); 1289 break; 1290 } 1291 1292 /* 1293 * Make sure the write was done before we release the lock, 1294 * allowing consecutive reads/writes. 1295 */ 1296 mb(); 1297 1298 mtx_unlock(&hbus->config_lock); 1299 } else { 1300 /* Invalid config write: it's unlikely to reach here. */ 1301 return; 1302 } 1303 } 1304 1305 static void 1306 vmbus_pcib_set_detaching(void *arg, int pending __unused) 1307 { 1308 struct hv_pcibus *hbus = arg; 1309 1310 atomic_set_int(&hbus->detaching, 1); 1311 } 1312 1313 static void 1314 vmbus_pcib_pre_detach(struct hv_pcibus *hbus) 1315 { 1316 struct task task; 1317 1318 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus); 1319 1320 /* 1321 * Make sure the channel callback won't push any possible new 1322 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq. 1323 */ 1324 vmbus_chan_run_task(hbus->sc->chan, &task); 1325 1326 taskqueue_drain_all(hbus->sc->taskq); 1327 } 1328 1329 1330 /* 1331 * Standard probe entry point. 1332 * 1333 */ 1334 static int 1335 vmbus_pcib_probe(device_t dev) 1336 { 1337 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1338 &g_pass_through_dev_type) == 0) { 1339 device_set_desc(dev, "Hyper-V PCI Express Pass Through"); 1340 return (BUS_PROBE_DEFAULT); 1341 } 1342 return (ENXIO); 1343 } 1344 1345 /* 1346 * Standard attach entry point. 1347 * 1348 */ 1349 static int 1350 vmbus_pcib_attach(device_t dev) 1351 { 1352 const int pci_ring_size = (4 * PAGE_SIZE); 1353 const struct hyperv_guid *inst_guid; 1354 struct vmbus_channel *channel; 1355 struct vmbus_pcib_softc *sc; 1356 struct hv_pcibus *hbus; 1357 int rid = 0; 1358 int ret; 1359 1360 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO); 1361 hbus->pcib = dev; 1362 1363 channel = vmbus_get_channel(dev); 1364 inst_guid = vmbus_chan_guid_inst(channel); 1365 hbus->pci_domain = inst_guid->hv_guid[9] | 1366 (inst_guid->hv_guid[8] << 8); 1367 1368 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF); 1369 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF); 1370 TAILQ_INIT(&hbus->children); 1371 TAILQ_INIT(&hbus->dr_list); 1372 1373 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 1374 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH, 1375 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE)); 1376 1377 if (!hbus->cfg_res) { 1378 device_printf(dev, "failed to get resource for cfg window\n"); 1379 ret = ENXIO; 1380 goto free_bus; 1381 } 1382 1383 sc = device_get_softc(dev); 1384 sc->chan = channel; 1385 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); 1386 sc->hbus = hbus; 1387 1388 /* 1389 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT 1390 * messages. NB: we can't handle the messages in the channel callback 1391 * directly, because the message handlers need to send new messages 1392 * to the host and waits for the host's completion messages, which 1393 * must also be handled by the channel callback. 1394 */ 1395 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK, 1396 taskqueue_thread_enqueue, &sc->taskq); 1397 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq"); 1398 1399 hbus->sc = sc; 1400 1401 init_completion(&hbus->query_completion); 1402 hbus->query_comp = &hbus->query_completion; 1403 1404 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size, 1405 NULL, 0, vmbus_pcib_on_channel_callback, sc); 1406 if (ret) 1407 goto free_res; 1408 1409 ret = hv_pci_protocol_negotiation(hbus); 1410 if (ret) 1411 goto vmbus_close; 1412 1413 ret = hv_pci_query_relations(hbus); 1414 if (ret) 1415 goto vmbus_close; 1416 wait_for_completion(hbus->query_comp); 1417 1418 ret = hv_pci_enter_d0(hbus); 1419 if (ret) 1420 goto vmbus_close; 1421 1422 ret = hv_send_resources_allocated(hbus); 1423 if (ret) 1424 goto vmbus_close; 1425 1426 hbus->pci_bus = device_add_child(dev, "pci", -1); 1427 if (!hbus->pci_bus) { 1428 device_printf(dev, "failed to create pci bus\n"); 1429 ret = ENXIO; 1430 goto vmbus_close; 1431 } 1432 1433 bus_generic_attach(dev); 1434 1435 hbus->state = hv_pcibus_installed; 1436 1437 return (0); 1438 1439 vmbus_close: 1440 vmbus_pcib_pre_detach(hbus); 1441 vmbus_chan_close(sc->chan); 1442 free_res: 1443 taskqueue_free(sc->taskq); 1444 free_completion(&hbus->query_completion); 1445 free(sc->rx_buf, M_DEVBUF); 1446 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1447 free_bus: 1448 mtx_destroy(&hbus->device_list_lock); 1449 mtx_destroy(&hbus->config_lock); 1450 free(hbus, M_DEVBUF); 1451 return (ret); 1452 } 1453 1454 /* 1455 * Standard detach entry point 1456 */ 1457 static int 1458 vmbus_pcib_detach(device_t dev) 1459 { 1460 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1461 struct hv_pcibus *hbus = sc->hbus; 1462 struct pci_message teardown_packet; 1463 struct pci_bus_relations relations; 1464 int ret; 1465 1466 vmbus_pcib_pre_detach(hbus); 1467 1468 if (hbus->state == hv_pcibus_installed) 1469 bus_generic_detach(dev); 1470 1471 /* Delete any children which might still exist. */ 1472 memset(&relations, 0, sizeof(relations)); 1473 hv_pci_devices_present(hbus, &relations); 1474 1475 ret = hv_send_resources_released(hbus); 1476 if (ret) 1477 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n"); 1478 1479 teardown_packet.type = PCI_BUS_D0EXIT; 1480 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1481 &teardown_packet, sizeof(struct pci_message), 0); 1482 if (ret) 1483 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n"); 1484 1485 taskqueue_drain_all(hbus->sc->taskq); 1486 vmbus_chan_close(sc->chan); 1487 taskqueue_free(sc->taskq); 1488 1489 free_completion(&hbus->query_completion); 1490 free(sc->rx_buf, M_DEVBUF); 1491 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1492 1493 mtx_destroy(&hbus->device_list_lock); 1494 mtx_destroy(&hbus->config_lock); 1495 free(hbus, M_DEVBUF); 1496 1497 return (0); 1498 } 1499 1500 static int 1501 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val) 1502 { 1503 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1504 1505 switch (which) { 1506 case PCIB_IVAR_DOMAIN: 1507 *val = sc->hbus->pci_domain; 1508 return (0); 1509 1510 case PCIB_IVAR_BUS: 1511 /* There is only bus 0. */ 1512 *val = 0; 1513 return (0); 1514 } 1515 return (ENOENT); 1516 } 1517 1518 static int 1519 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val) 1520 { 1521 return (ENOENT); 1522 } 1523 1524 static struct resource * 1525 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, 1526 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 1527 { 1528 unsigned int bar_no; 1529 struct hv_pci_dev *hpdev; 1530 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1531 struct resource *res; 1532 unsigned int devfn; 1533 1534 if (type == PCI_RES_BUS) 1535 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid, 1536 start, end, count, flags)); 1537 1538 /* Devices with port I/O BAR are not supported. */ 1539 if (type == SYS_RES_IOPORT) 1540 return (NULL); 1541 1542 if (type == SYS_RES_MEMORY) { 1543 devfn = PCI_DEVFN(pci_get_slot(child), 1544 pci_get_function(child)); 1545 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1546 if (!hpdev) 1547 return (NULL); 1548 1549 bar_no = PCI_RID2BAR(*rid); 1550 if (bar_no >= MAX_NUM_BARS) 1551 return (NULL); 1552 1553 /* Make sure a 32-bit BAR gets a 32-bit address */ 1554 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64)) 1555 end = ulmin(end, 0xFFFFFFFF); 1556 } 1557 1558 res = bus_generic_alloc_resource(dev, child, type, rid, 1559 start, end, count, flags); 1560 /* 1561 * If this is a request for a specific range, assume it is 1562 * correct and pass it up to the parent. 1563 */ 1564 if (res == NULL && start + count - 1 == end) 1565 res = bus_generic_alloc_resource(dev, child, type, rid, 1566 start, end, count, flags); 1567 return (res); 1568 } 1569 1570 static int 1571 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid, 1572 struct resource *r) 1573 { 1574 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1575 1576 if (type == PCI_RES_BUS) 1577 return (pci_domain_release_bus(sc->hbus->pci_domain, child, 1578 rid, r)); 1579 1580 if (type == SYS_RES_IOPORT) 1581 return (EINVAL); 1582 1583 return (bus_generic_release_resource(dev, child, type, rid, r)); 1584 } 1585 1586 #if __FreeBSD_version >= 1100000 1587 static int 1588 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op, 1589 size_t setsize, cpuset_t *cpuset) 1590 { 1591 return (bus_get_cpus(pcib, op, setsize, cpuset)); 1592 } 1593 #endif 1594 1595 static uint32_t 1596 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, 1597 u_int reg, int bytes) 1598 { 1599 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1600 struct hv_pci_dev *hpdev; 1601 unsigned int devfn = PCI_DEVFN(slot, func); 1602 uint32_t data = 0; 1603 1604 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1605 1606 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1607 if (!hpdev) 1608 return (~0); 1609 1610 _hv_pcifront_read_config(hpdev, reg, bytes, &data); 1611 1612 return (data); 1613 } 1614 1615 static void 1616 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, 1617 u_int reg, uint32_t data, int bytes) 1618 { 1619 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1620 struct hv_pci_dev *hpdev; 1621 unsigned int devfn = PCI_DEVFN(slot, func); 1622 1623 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1624 1625 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1626 if (!hpdev) 1627 return; 1628 1629 _hv_pcifront_write_config(hpdev, reg, bytes, data); 1630 } 1631 1632 static int 1633 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin) 1634 { 1635 /* We only support MSI/MSI-X and don't support INTx interrupt. */ 1636 return (PCI_INVALID_IRQ); 1637 } 1638 1639 static int 1640 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count, 1641 int maxcount, int *irqs) 1642 { 1643 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount, 1644 irqs)); 1645 } 1646 1647 static int 1648 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) 1649 { 1650 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs)); 1651 } 1652 1653 static int 1654 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) 1655 { 1656 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq)); 1657 } 1658 1659 static int 1660 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq) 1661 { 1662 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq)); 1663 } 1664 1665 #define MSI_INTEL_ADDR_DEST 0x000ff000 1666 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ 1667 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED 1668 1669 static int 1670 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq, 1671 uint64_t *addr, uint32_t *data) 1672 { 1673 unsigned int devfn; 1674 struct hv_pci_dev *hpdev; 1675 1676 uint64_t v_addr; 1677 uint32_t v_data; 1678 struct hv_irq_desc *hid, *tmp_hid; 1679 unsigned int cpu, vcpu_id; 1680 unsigned int vector; 1681 1682 struct vmbus_pcib_softc *sc = device_get_softc(pcib); 1683 struct pci_create_interrupt *int_pkt; 1684 struct compose_comp_ctxt comp; 1685 struct { 1686 struct pci_packet pkt; 1687 uint8_t buffer[sizeof(struct pci_create_interrupt)]; 1688 } ctxt; 1689 1690 int ret; 1691 1692 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); 1693 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1694 if (!hpdev) 1695 return (ENOENT); 1696 1697 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq, 1698 &v_addr, &v_data); 1699 if (ret) 1700 return (ret); 1701 1702 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) { 1703 if (hid->irq == irq) { 1704 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link); 1705 hv_int_desc_free(hpdev, hid); 1706 break; 1707 } 1708 } 1709 1710 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12; 1711 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); 1712 vector = v_data & MSI_INTEL_DATA_INTVEC; 1713 1714 init_completion(&comp.comp_pkt.host_event); 1715 1716 memset(&ctxt, 0, sizeof(ctxt)); 1717 ctxt.pkt.completion_func = hv_pci_compose_compl; 1718 ctxt.pkt.compl_ctxt = ∁ 1719 1720 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message; 1721 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; 1722 int_pkt->wslot.val = hpdev->desc.wslot.val; 1723 int_pkt->int_desc.vector = vector; 1724 int_pkt->int_desc.vector_count = 1; 1725 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; 1726 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id; 1727 1728 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1729 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt), 1730 (uint64_t)&ctxt.pkt); 1731 if (ret) { 1732 free_completion(&comp.comp_pkt.host_event); 1733 return (ret); 1734 } 1735 1736 wait_for_completion(&comp.comp_pkt.host_event); 1737 free_completion(&comp.comp_pkt.host_event); 1738 1739 if (comp.comp_pkt.completion_status < 0) 1740 return (EPROTO); 1741 1742 *addr = comp.int_desc.address; 1743 *data = comp.int_desc.data; 1744 1745 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO); 1746 hid->irq = irq; 1747 hid->desc = comp.int_desc; 1748 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link); 1749 1750 return (0); 1751 } 1752 1753 static device_method_t vmbus_pcib_methods[] = { 1754 /* Device interface */ 1755 DEVMETHOD(device_probe, vmbus_pcib_probe), 1756 DEVMETHOD(device_attach, vmbus_pcib_attach), 1757 DEVMETHOD(device_detach, vmbus_pcib_detach), 1758 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1759 DEVMETHOD(device_suspend, bus_generic_suspend), 1760 DEVMETHOD(device_resume, bus_generic_resume), 1761 1762 /* Bus interface */ 1763 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar), 1764 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar), 1765 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource), 1766 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource), 1767 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 1768 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 1769 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), 1770 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), 1771 #if __FreeBSD_version >= 1100000 1772 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus), 1773 #endif 1774 1775 /* pcib interface */ 1776 DEVMETHOD(pcib_maxslots, pcib_maxslots), 1777 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config), 1778 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config), 1779 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr), 1780 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi), 1781 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi), 1782 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix), 1783 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix), 1784 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi), 1785 DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), 1786 1787 DEVMETHOD_END 1788 }; 1789 1790 static devclass_t pcib_devclass; 1791 1792 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods, 1793 sizeof(struct vmbus_pcib_softc)); 1794 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, pcib_devclass, 0, 0); 1795 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1); 1796 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1); 1797 1798 #endif /* NEW_PCIB */ 1799