1 /*- 2 * Copyright (c) 2016-2017 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #ifdef NEW_PCIB 31 #include "opt_acpi.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/kernel.h> 39 #include <sys/queue.h> 40 #include <sys/lock.h> 41 #include <sys/sx.h> 42 #include <sys/smp.h> 43 #include <sys/sysctl.h> 44 #include <sys/bus.h> 45 #include <sys/rman.h> 46 #include <sys/mutex.h> 47 #include <sys/errno.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/vm_kern.h> 52 #include <vm/pmap.h> 53 54 #if defined(__aarch64__) 55 #include <arm64/include/intr.h> 56 #endif 57 #include <machine/atomic.h> 58 #include <machine/bus.h> 59 #include <machine/frame.h> 60 #include <machine/pci_cfgreg.h> 61 #include <machine/resource.h> 62 63 #include <sys/pciio.h> 64 #include <dev/pci/pcireg.h> 65 #include <dev/pci/pcivar.h> 66 #include <dev/pci/pci_private.h> 67 #include <dev/pci/pcib_private.h> 68 #include "pcib_if.h" 69 #if defined(__i386__) || defined(__amd64__) 70 #include <machine/intr_machdep.h> 71 #include <x86/apicreg.h> 72 #endif 73 #if defined(__aarch64__) 74 #include <contrib/dev/acpica/include/acpi.h> 75 #include <contrib/dev/acpica/include/accommon.h> 76 #include <dev/acpica/acpivar.h> 77 #include <dev/acpica/acpi_pcibvar.h> 78 #endif 79 #include <dev/hyperv/include/hyperv.h> 80 #include <dev/hyperv/include/hyperv_busdma.h> 81 #include <dev/hyperv/include/vmbus_xact.h> 82 #include <dev/hyperv/vmbus/vmbus_reg.h> 83 #include <dev/hyperv/vmbus/vmbus_chanvar.h> 84 85 #include "vmbus_if.h" 86 87 #if __FreeBSD_version < 1100000 88 typedef u_long rman_res_t; 89 #define RM_MAX_END (~(rman_res_t)0) 90 #endif 91 92 struct completion { 93 unsigned int done; 94 struct mtx lock; 95 }; 96 97 static void 98 init_completion(struct completion *c) 99 { 100 memset(c, 0, sizeof(*c)); 101 mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF); 102 c->done = 0; 103 } 104 105 static void 106 free_completion(struct completion *c) 107 { 108 mtx_destroy(&c->lock); 109 } 110 111 static void 112 complete(struct completion *c) 113 { 114 mtx_lock(&c->lock); 115 c->done++; 116 mtx_unlock(&c->lock); 117 wakeup(c); 118 } 119 120 static void 121 wait_for_completion(struct completion *c) 122 { 123 mtx_lock(&c->lock); 124 while (c->done == 0) 125 mtx_sleep(c, &c->lock, 0, "hvwfc", 0); 126 c->done--; 127 mtx_unlock(&c->lock); 128 } 129 130 /* 131 * Return: 0 if completed, a non-zero value if timed out. 132 */ 133 static int 134 wait_for_completion_timeout(struct completion *c, int timeout) 135 { 136 int ret; 137 138 mtx_lock(&c->lock); 139 140 if (c->done == 0) 141 mtx_sleep(c, &c->lock, 0, "hvwfc", timeout); 142 143 if (c->done > 0) { 144 c->done--; 145 ret = 0; 146 } else { 147 ret = 1; 148 } 149 150 mtx_unlock(&c->lock); 151 152 return (ret); 153 } 154 155 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor))) 156 157 enum { 158 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), 159 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1 160 }; 161 162 #define PCI_CONFIG_MMIO_LENGTH 0x2000 163 #define CFG_PAGE_OFFSET 0x1000 164 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) 165 166 /* 167 * Message Types 168 */ 169 170 enum pci_message_type { 171 /* 172 * Version 1.1 173 */ 174 PCI_MESSAGE_BASE = 0x42490000, 175 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0, 176 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1, 177 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4, 178 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5, 179 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6, 180 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7, 181 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8, 182 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9, 183 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA, 184 PCI_EJECT = PCI_MESSAGE_BASE + 0xB, 185 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC, 186 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD, 187 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE, 188 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF, 189 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10, 190 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11, 191 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12, 192 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13, 193 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14, 194 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15, 195 PCI_RESOURCES_ASSIGNED2 = PCI_MESSAGE_BASE + 0x16, 196 PCI_CREATE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x17, 197 PCI_DELETE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x18, /* unused */ 198 PCI_BUS_RELATIONS2 = PCI_MESSAGE_BASE + 0x19, 199 PCI_RESOURCES_ASSIGNED3 = PCI_MESSAGE_BASE + 0x1A, 200 PCI_CREATE_INTERRUPT_MESSAGE3 = PCI_MESSAGE_BASE + 0x1B, 201 PCI_MESSAGE_MAXIMUM 202 }; 203 204 /* 205 * Structures defining the virtual PCI Express protocol. 206 */ 207 208 union pci_version { 209 struct { 210 uint16_t minor_version; 211 uint16_t major_version; 212 } parts; 213 uint32_t version; 214 } __packed; 215 216 /* 217 * This representation is the one used in Windows, which is 218 * what is expected when sending this back and forth with 219 * the Hyper-V parent partition. 220 */ 221 union win_slot_encoding { 222 struct { 223 uint32_t slot:5; 224 uint32_t func:3; 225 uint32_t reserved:24; 226 } bits; 227 uint32_t val; 228 } __packed; 229 230 struct pci_func_desc { 231 uint16_t v_id; /* vendor ID */ 232 uint16_t d_id; /* device ID */ 233 uint8_t rev; 234 uint8_t prog_intf; 235 uint8_t subclass; 236 uint8_t base_class; 237 uint32_t subsystem_id; 238 union win_slot_encoding wslot; 239 uint32_t ser; /* serial number */ 240 } __packed; 241 242 struct pci_func_desc2 { 243 uint16_t v_id; /* vendor ID */ 244 uint16_t d_id; /* device ID */ 245 uint8_t rev; 246 uint8_t prog_intf; 247 uint8_t subclass; 248 uint8_t base_class; 249 uint32_t subsystem_id; 250 union win_slot_encoding wslot; 251 uint32_t ser; /* serial number */ 252 uint32_t flags; 253 uint16_t virtual_numa_node; 254 uint16_t reserved; 255 } __packed; 256 257 258 struct hv_msi_desc { 259 uint8_t vector; 260 uint8_t delivery_mode; 261 uint16_t vector_count; 262 uint32_t reserved; 263 uint64_t cpu_mask; 264 } __packed; 265 266 struct hv_msi_desc3 { 267 uint32_t vector; 268 uint8_t delivery_mode; 269 uint8_t reserved; 270 uint16_t vector_count; 271 uint16_t processor_count; 272 uint16_t processor_array[32]; 273 } __packed; 274 275 struct tran_int_desc { 276 uint16_t reserved; 277 uint16_t vector_count; 278 uint32_t data; 279 uint64_t address; 280 } __packed; 281 282 struct pci_message { 283 uint32_t type; 284 } __packed; 285 286 struct pci_child_message { 287 struct pci_message message_type; 288 union win_slot_encoding wslot; 289 } __packed; 290 291 struct pci_incoming_message { 292 struct vmbus_chanpkt_hdr hdr; 293 struct pci_message message_type; 294 } __packed; 295 296 struct pci_response { 297 struct vmbus_chanpkt_hdr hdr; 298 int32_t status; /* negative values are failures */ 299 } __packed; 300 301 struct pci_packet { 302 void (*completion_func)(void *context, struct pci_response *resp, 303 int resp_packet_size); 304 void *compl_ctxt; 305 306 struct pci_message message[0]; 307 }; 308 309 /* 310 * Specific message types supporting the PCI protocol. 311 */ 312 313 struct pci_version_request { 314 struct pci_message message_type; 315 uint32_t protocol_version; 316 uint32_t is_last_attempt:1; 317 uint32_t reservedz:31; 318 } __packed; 319 320 struct pci_bus_d0_entry { 321 struct pci_message message_type; 322 uint32_t reserved; 323 uint64_t mmio_base; 324 } __packed; 325 326 struct pci_bus_relations { 327 struct pci_incoming_message incoming; 328 uint32_t device_count; 329 struct pci_func_desc func[0]; 330 } __packed; 331 332 struct pci_bus_relations2 { 333 struct pci_incoming_message incoming; 334 uint32_t device_count; 335 struct pci_func_desc2 func[0]; 336 } __packed; 337 338 #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1) 339 struct pci_q_res_req_response { 340 struct vmbus_chanpkt_hdr hdr; 341 int32_t status; /* negative values are failures */ 342 uint32_t probed_bar[MAX_NUM_BARS]; 343 } __packed; 344 345 struct pci_resources_assigned { 346 struct pci_message message_type; 347 union win_slot_encoding wslot; 348 uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */ 349 uint32_t msi_descriptors; 350 uint32_t reserved[4]; 351 } __packed; 352 353 struct pci_resources_assigned2 { 354 struct pci_message message_type; 355 union win_slot_encoding wslot; 356 uint8_t memory_range[0x14][6]; /* not used here */ 357 uint32_t msi_descriptor_count; 358 uint8_t reserved[70]; 359 } __packed; 360 361 struct pci_create_interrupt { 362 struct pci_message message_type; 363 union win_slot_encoding wslot; 364 struct hv_msi_desc int_desc; 365 } __packed; 366 367 struct pci_create_interrupt3 { 368 struct pci_message message_type; 369 union win_slot_encoding wslot; 370 struct hv_msi_desc3 int_desc; 371 } __packed; 372 373 struct pci_create_int_response { 374 struct pci_response response; 375 uint32_t reserved; 376 struct tran_int_desc int_desc; 377 } __packed; 378 379 struct pci_delete_interrupt { 380 struct pci_message message_type; 381 union win_slot_encoding wslot; 382 struct tran_int_desc int_desc; 383 } __packed; 384 385 struct pci_dev_incoming { 386 struct pci_incoming_message incoming; 387 union win_slot_encoding wslot; 388 } __packed; 389 390 struct pci_eject_response { 391 struct pci_message message_type; 392 union win_slot_encoding wslot; 393 uint32_t status; 394 } __packed; 395 396 /* 397 * Driver specific state. 398 */ 399 400 enum hv_pcibus_state { 401 hv_pcibus_init = 0, 402 hv_pcibus_installed, 403 }; 404 405 struct hv_pcibus { 406 device_t pcib; 407 device_t pci_bus; 408 struct vmbus_pcib_softc *sc; 409 410 uint16_t pci_domain; 411 412 enum hv_pcibus_state state; 413 414 struct resource *cfg_res; 415 416 struct completion query_completion, *query_comp; 417 418 struct mtx config_lock; /* Avoid two threads writing index page */ 419 struct mtx device_list_lock; /* Protect lists below */ 420 uint32_t protocol_version; 421 TAILQ_HEAD(, hv_pci_dev) children; 422 TAILQ_HEAD(, hv_dr_state) dr_list; 423 424 volatile int detaching; 425 }; 426 427 struct hv_pcidev_desc { 428 uint16_t v_id; /* vendor ID */ 429 uint16_t d_id; /* device ID */ 430 uint8_t rev; 431 uint8_t prog_intf; 432 uint8_t subclass; 433 uint8_t base_class; 434 uint32_t subsystem_id; 435 union win_slot_encoding wslot; 436 uint32_t ser; /* serial number */ 437 uint32_t flags; 438 uint16_t virtual_numa_node; 439 } __packed; 440 441 442 struct hv_pci_dev { 443 TAILQ_ENTRY(hv_pci_dev) link; 444 445 struct pci_func_desc desc; 446 447 bool reported_missing; 448 449 struct hv_pcibus *hbus; 450 struct task eject_task; 451 452 TAILQ_HEAD(, hv_irq_desc) irq_desc_list; 453 454 /* 455 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then 456 * read it back, for each of the BAR offsets within config space. 457 */ 458 uint32_t probed_bar[MAX_NUM_BARS]; 459 }; 460 461 /* 462 * Tracks "Device Relations" messages from the host, which must be both 463 * processed in order. 464 */ 465 struct hv_dr_work { 466 struct task task; 467 struct hv_pcibus *bus; 468 }; 469 470 struct hv_dr_state { 471 TAILQ_ENTRY(hv_dr_state) link; 472 uint32_t device_count; 473 struct pci_func_desc func[0]; 474 }; 475 476 struct hv_irq_desc { 477 TAILQ_ENTRY(hv_irq_desc) link; 478 struct tran_int_desc desc; 479 int irq; 480 }; 481 482 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) 483 #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) 484 #define PCI_FUNC(devfn) ((devfn) & 0x07) 485 486 static uint32_t 487 devfn_to_wslot(unsigned int devfn) 488 { 489 union win_slot_encoding wslot; 490 491 wslot.val = 0; 492 wslot.bits.slot = PCI_SLOT(devfn); 493 wslot.bits.func = PCI_FUNC(devfn); 494 495 return (wslot.val); 496 } 497 498 static unsigned int 499 wslot_to_devfn(uint32_t wslot) 500 { 501 union win_slot_encoding encoding; 502 unsigned int slot; 503 unsigned int func; 504 505 encoding.val = wslot; 506 507 slot = encoding.bits.slot; 508 func = encoding.bits.func; 509 510 return (PCI_DEVFN(slot, func)); 511 } 512 513 struct vmbus_pcib_softc { 514 struct vmbus_channel *chan; 515 void *rx_buf; 516 517 struct taskqueue *taskq; 518 519 struct hv_pcibus *hbus; 520 }; 521 522 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */ 523 static const struct hyperv_guid g_pass_through_dev_type = { 524 .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, 525 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F} 526 }; 527 528 struct hv_pci_compl { 529 struct completion host_event; 530 int32_t completion_status; 531 }; 532 533 struct q_res_req_compl { 534 struct completion host_event; 535 struct hv_pci_dev *hpdev; 536 }; 537 538 struct compose_comp_ctxt { 539 struct hv_pci_compl comp_pkt; 540 struct tran_int_desc int_desc; 541 }; 542 543 /* 544 * It is possible the device is revoked during initialization. 545 * Check if this happens during wait. 546 * Return: 0 if response arrived, ENODEV if device revoked. 547 */ 548 static int 549 wait_for_response(struct hv_pcibus *hbus, struct completion *c) 550 { 551 do { 552 if (vmbus_chan_is_revoked(hbus->sc->chan)) { 553 device_printf(hbus->pcib, 554 "The device is revoked.\n"); 555 return (ENODEV); 556 } 557 } while (wait_for_completion_timeout(c, hz /10) != 0); 558 559 return 0; 560 } 561 562 static void 563 hv_pci_generic_compl(void *context, struct pci_response *resp, 564 int resp_packet_size) 565 { 566 struct hv_pci_compl *comp_pkt = context; 567 568 if (resp_packet_size >= sizeof(struct pci_response)) 569 comp_pkt->completion_status = resp->status; 570 else 571 comp_pkt->completion_status = -1; 572 573 complete(&comp_pkt->host_event); 574 } 575 576 static void 577 q_resource_requirements(void *context, struct pci_response *resp, 578 int resp_packet_size) 579 { 580 struct q_res_req_compl *completion = context; 581 struct pci_q_res_req_response *q_res_req = 582 (struct pci_q_res_req_response *)resp; 583 int i; 584 585 if (resp->status < 0) { 586 printf("vmbus_pcib: failed to query resource requirements\n"); 587 } else { 588 for (i = 0; i < MAX_NUM_BARS; i++) 589 completion->hpdev->probed_bar[i] = 590 q_res_req->probed_bar[i]; 591 } 592 593 complete(&completion->host_event); 594 } 595 596 static void 597 hv_pci_compose_compl(void *context, struct pci_response *resp, 598 int resp_packet_size) 599 { 600 struct compose_comp_ctxt *comp_pkt = context; 601 struct pci_create_int_response *int_resp = 602 (struct pci_create_int_response *)resp; 603 604 comp_pkt->comp_pkt.completion_status = resp->status; 605 comp_pkt->int_desc = int_resp->int_desc; 606 complete(&comp_pkt->comp_pkt.host_event); 607 } 608 609 static void 610 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid) 611 { 612 struct pci_delete_interrupt *int_pkt; 613 struct { 614 struct pci_packet pkt; 615 uint8_t buffer[sizeof(struct pci_delete_interrupt)]; 616 } ctxt; 617 618 memset(&ctxt, 0, sizeof(ctxt)); 619 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; 620 int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; 621 int_pkt->wslot.val = hpdev->desc.wslot.val; 622 int_pkt->int_desc = hid->desc; 623 624 vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 625 int_pkt, sizeof(*int_pkt), 0); 626 627 free(hid, M_DEVBUF); 628 } 629 630 static void 631 hv_pci_delete_device(struct hv_pci_dev *hpdev) 632 { 633 struct hv_pcibus *hbus = hpdev->hbus; 634 struct hv_irq_desc *hid, *tmp_hid; 635 device_t pci_dev; 636 int devfn; 637 638 devfn = wslot_to_devfn(hpdev->desc.wslot.val); 639 640 bus_topo_lock(); 641 642 pci_dev = pci_find_dbsf(hbus->pci_domain, 643 0, PCI_SLOT(devfn), PCI_FUNC(devfn)); 644 if (pci_dev) 645 device_delete_child(hbus->pci_bus, pci_dev); 646 647 bus_topo_unlock(); 648 649 mtx_lock(&hbus->device_list_lock); 650 TAILQ_REMOVE(&hbus->children, hpdev, link); 651 mtx_unlock(&hbus->device_list_lock); 652 653 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) 654 hv_int_desc_free(hpdev, hid); 655 656 free(hpdev, M_DEVBUF); 657 } 658 659 static struct hv_pci_dev * 660 new_pcichild_device(struct hv_pcibus *hbus, struct pci_func_desc *desc) 661 { 662 struct hv_pci_dev *hpdev; 663 struct pci_child_message *res_req; 664 struct q_res_req_compl comp_pkt; 665 struct { 666 struct pci_packet pkt; 667 uint8_t buffer[sizeof(struct pci_child_message)]; 668 } ctxt; 669 int ret; 670 671 hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO); 672 hpdev->hbus = hbus; 673 674 TAILQ_INIT(&hpdev->irq_desc_list); 675 676 init_completion(&comp_pkt.host_event); 677 comp_pkt.hpdev = hpdev; 678 679 ctxt.pkt.compl_ctxt = &comp_pkt; 680 ctxt.pkt.completion_func = q_resource_requirements; 681 682 res_req = (struct pci_child_message *)&ctxt.pkt.message; 683 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; 684 res_req->wslot.val = desc->wslot.val; 685 686 ret = vmbus_chan_send(hbus->sc->chan, 687 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 688 res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt); 689 if (ret) 690 goto err; 691 692 if (wait_for_response(hbus, &comp_pkt.host_event)) 693 goto err; 694 695 free_completion(&comp_pkt.host_event); 696 697 hpdev->desc = *desc; 698 699 mtx_lock(&hbus->device_list_lock); 700 if (TAILQ_EMPTY(&hbus->children)) 701 hbus->pci_domain = desc->ser & 0xFFFF; 702 TAILQ_INSERT_TAIL(&hbus->children, hpdev, link); 703 mtx_unlock(&hbus->device_list_lock); 704 return (hpdev); 705 err: 706 free_completion(&comp_pkt.host_event); 707 free(hpdev, M_DEVBUF); 708 return (NULL); 709 } 710 711 #if __FreeBSD_version < 1100000 712 713 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */ 714 715 static struct pci_devinfo * 716 pci_identify_function(device_t pcib, device_t dev, int domain, int busno, 717 int slot, int func, size_t dinfo_size) 718 { 719 struct pci_devinfo *dinfo; 720 721 dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size); 722 if (dinfo != NULL) 723 pci_add_child(dev, dinfo); 724 725 return (dinfo); 726 } 727 728 static int 729 pci_rescan(device_t dev) 730 { 731 #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w) 732 device_t pcib = device_get_parent(dev); 733 struct pci_softc *sc; 734 device_t child, *devlist, *unchanged; 735 int devcount, error, i, j, maxslots, oldcount; 736 int busno, domain, s, f, pcifunchigh; 737 uint8_t hdrtype; 738 739 /* No need to check for ARI on a rescan. */ 740 error = device_get_children(dev, &devlist, &devcount); 741 if (error) 742 return (error); 743 if (devcount != 0) { 744 unchanged = malloc(devcount * sizeof(device_t), M_TEMP, 745 M_NOWAIT | M_ZERO); 746 if (unchanged == NULL) { 747 free(devlist, M_TEMP); 748 return (ENOMEM); 749 } 750 } else 751 unchanged = NULL; 752 753 sc = device_get_softc(dev); 754 domain = pcib_get_domain(dev); 755 busno = pcib_get_bus(dev); 756 maxslots = PCIB_MAXSLOTS(pcib); 757 for (s = 0; s <= maxslots; s++) { 758 /* If function 0 is not present, skip to the next slot. */ 759 f = 0; 760 if (REG(PCIR_VENDOR, 2) == 0xffff) 761 continue; 762 pcifunchigh = 0; 763 hdrtype = REG(PCIR_HDRTYPE, 1); 764 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) 765 continue; 766 if (hdrtype & PCIM_MFDEV) 767 pcifunchigh = PCIB_MAXFUNCS(pcib); 768 for (f = 0; f <= pcifunchigh; f++) { 769 if (REG(PCIR_VENDOR, 2) == 0xffff) 770 continue; 771 772 /* 773 * Found a valid function. Check if a 774 * device_t for this device already exists. 775 */ 776 for (i = 0; i < devcount; i++) { 777 child = devlist[i]; 778 if (child == NULL) 779 continue; 780 if (pci_get_slot(child) == s && 781 pci_get_function(child) == f) { 782 unchanged[i] = child; 783 goto next_func; 784 } 785 } 786 787 pci_identify_function(pcib, dev, domain, busno, s, f, 788 sizeof(struct pci_devinfo)); 789 next_func:; 790 } 791 } 792 793 /* Remove devices that are no longer present. */ 794 for (i = 0; i < devcount; i++) { 795 if (unchanged[i] != NULL) 796 continue; 797 device_delete_child(dev, devlist[i]); 798 } 799 800 free(devlist, M_TEMP); 801 oldcount = devcount; 802 803 /* Try to attach the devices just added. */ 804 error = device_get_children(dev, &devlist, &devcount); 805 if (error) { 806 free(unchanged, M_TEMP); 807 return (error); 808 } 809 810 for (i = 0; i < devcount; i++) { 811 for (j = 0; j < oldcount; j++) { 812 if (devlist[i] == unchanged[j]) 813 goto next_device; 814 } 815 816 device_probe_and_attach(devlist[i]); 817 next_device:; 818 } 819 820 free(unchanged, M_TEMP); 821 free(devlist, M_TEMP); 822 return (0); 823 #undef REG 824 } 825 826 #else 827 828 static int 829 pci_rescan(device_t dev) 830 { 831 return (BUS_RESCAN(dev)); 832 } 833 834 #endif 835 836 static void 837 pci_devices_present_work(void *arg, int pending __unused) 838 { 839 struct hv_dr_work *dr_wrk = arg; 840 struct hv_dr_state *dr = NULL; 841 struct hv_pcibus *hbus; 842 uint32_t child_no; 843 bool found; 844 struct pci_func_desc *new_desc; 845 struct hv_pci_dev *hpdev, *tmp_hpdev; 846 struct completion *query_comp; 847 bool need_rescan = false; 848 849 hbus = dr_wrk->bus; 850 free(dr_wrk, M_DEVBUF); 851 852 /* Pull this off the queue and process it if it was the last one. */ 853 mtx_lock(&hbus->device_list_lock); 854 while (!TAILQ_EMPTY(&hbus->dr_list)) { 855 dr = TAILQ_FIRST(&hbus->dr_list); 856 TAILQ_REMOVE(&hbus->dr_list, dr, link); 857 858 /* Throw this away if the list still has stuff in it. */ 859 if (!TAILQ_EMPTY(&hbus->dr_list)) { 860 free(dr, M_DEVBUF); 861 continue; 862 } 863 } 864 mtx_unlock(&hbus->device_list_lock); 865 866 if (!dr) 867 return; 868 869 /* First, mark all existing children as reported missing. */ 870 mtx_lock(&hbus->device_list_lock); 871 TAILQ_FOREACH(hpdev, &hbus->children, link) 872 hpdev->reported_missing = true; 873 mtx_unlock(&hbus->device_list_lock); 874 875 /* Next, add back any reported devices. */ 876 for (child_no = 0; child_no < dr->device_count; child_no++) { 877 found = false; 878 new_desc = &dr->func[child_no]; 879 880 mtx_lock(&hbus->device_list_lock); 881 TAILQ_FOREACH(hpdev, &hbus->children, link) { 882 if ((hpdev->desc.wslot.val == 883 new_desc->wslot.val) && 884 (hpdev->desc.v_id == new_desc->v_id) && 885 (hpdev->desc.d_id == new_desc->d_id) && 886 (hpdev->desc.ser == new_desc->ser)) { 887 hpdev->reported_missing = false; 888 found = true; 889 break; 890 } 891 } 892 mtx_unlock(&hbus->device_list_lock); 893 894 if (!found) { 895 if (!need_rescan) 896 need_rescan = true; 897 898 hpdev = new_pcichild_device(hbus, new_desc); 899 if (!hpdev) 900 printf("vmbus_pcib: failed to add a child\n"); 901 } 902 } 903 904 /* Remove missing device(s), if any */ 905 TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) { 906 if (hpdev->reported_missing) 907 hv_pci_delete_device(hpdev); 908 } 909 910 /* Rescan the bus to find any new device, if necessary. */ 911 if (hbus->state == hv_pcibus_installed && need_rescan) 912 pci_rescan(hbus->pci_bus); 913 914 /* Wake up hv_pci_query_relations(), if it's waiting. */ 915 query_comp = hbus->query_comp; 916 if (query_comp) { 917 hbus->query_comp = NULL; 918 complete(query_comp); 919 } 920 921 free(dr, M_DEVBUF); 922 } 923 924 static struct hv_pci_dev * 925 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot) 926 { 927 struct hv_pci_dev *hpdev, *ret = NULL; 928 929 mtx_lock(&hbus->device_list_lock); 930 TAILQ_FOREACH(hpdev, &hbus->children, link) { 931 if (hpdev->desc.wslot.val == wslot) { 932 ret = hpdev; 933 break; 934 } 935 } 936 mtx_unlock(&hbus->device_list_lock); 937 938 return (ret); 939 } 940 941 static void 942 hv_pci_devices_present(struct hv_pcibus *hbus, 943 struct pci_bus_relations *relations) 944 { 945 struct hv_dr_state *dr; 946 struct hv_dr_work *dr_wrk; 947 unsigned long dr_size; 948 949 if (hbus->detaching && relations->device_count > 0) 950 return; 951 952 dr_size = offsetof(struct hv_dr_state, func) + 953 (sizeof(struct pci_func_desc) * relations->device_count); 954 dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); 955 956 dr->device_count = relations->device_count; 957 if (dr->device_count != 0) 958 memcpy(dr->func, relations->func, 959 sizeof(struct pci_func_desc) * dr->device_count); 960 961 mtx_lock(&hbus->device_list_lock); 962 TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); 963 mtx_unlock(&hbus->device_list_lock); 964 965 dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); 966 dr_wrk->bus = hbus; 967 TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); 968 taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); 969 } 970 971 static void 972 hv_eject_device_work(void *arg, int pending __unused) 973 { 974 struct hv_pci_dev *hpdev = arg; 975 union win_slot_encoding wslot = hpdev->desc.wslot; 976 struct hv_pcibus *hbus = hpdev->hbus; 977 struct pci_eject_response *eject_pkt; 978 struct { 979 struct pci_packet pkt; 980 uint8_t buffer[sizeof(struct pci_eject_response)]; 981 } ctxt; 982 983 hv_pci_delete_device(hpdev); 984 985 memset(&ctxt, 0, sizeof(ctxt)); 986 eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message; 987 eject_pkt->message_type.type = PCI_EJECTION_COMPLETE; 988 eject_pkt->wslot.val = wslot.val; 989 vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 990 eject_pkt, sizeof(*eject_pkt), 0); 991 } 992 993 static void 994 hv_pci_eject_device(struct hv_pci_dev *hpdev) 995 { 996 struct hv_pcibus *hbus = hpdev->hbus; 997 struct taskqueue *taskq; 998 999 if (hbus->detaching) 1000 return; 1001 1002 /* 1003 * Push this task into the same taskqueue on which 1004 * vmbus_pcib_attach() runs, so we're sure this task can't run 1005 * concurrently with vmbus_pcib_attach(). 1006 */ 1007 TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev); 1008 taskq = vmbus_chan_mgmt_tq(hbus->sc->chan); 1009 taskqueue_enqueue(taskq, &hpdev->eject_task); 1010 } 1011 1012 #define PCIB_PACKET_SIZE 0x100 1013 1014 static void 1015 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg) 1016 { 1017 struct vmbus_pcib_softc *sc = arg; 1018 struct hv_pcibus *hbus = sc->hbus; 1019 1020 void *buffer; 1021 int bufferlen = PCIB_PACKET_SIZE; 1022 1023 struct pci_packet *comp_packet; 1024 struct pci_response *response; 1025 struct pci_incoming_message *new_msg; 1026 struct pci_bus_relations *bus_rel; 1027 struct pci_dev_incoming *dev_msg; 1028 struct hv_pci_dev *hpdev; 1029 1030 buffer = sc->rx_buf; 1031 do { 1032 struct vmbus_chanpkt_hdr *pkt = buffer; 1033 uint32_t bytes_rxed; 1034 int ret; 1035 1036 bytes_rxed = bufferlen; 1037 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); 1038 1039 if (ret == ENOBUFS) { 1040 /* Handle large packet */ 1041 if (bufferlen > PCIB_PACKET_SIZE) { 1042 free(buffer, M_DEVBUF); 1043 buffer = NULL; 1044 } 1045 1046 /* alloc new buffer */ 1047 buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO); 1048 bufferlen = bytes_rxed; 1049 1050 continue; 1051 } 1052 1053 if (ret != 0) { 1054 /* ignore EIO or EAGAIN */ 1055 break; 1056 } 1057 1058 if (bytes_rxed <= sizeof(struct pci_response)) 1059 continue; 1060 1061 switch (pkt->cph_type) { 1062 case VMBUS_CHANPKT_TYPE_COMP: 1063 comp_packet = 1064 (struct pci_packet *)(uintptr_t)pkt->cph_xactid; 1065 response = (struct pci_response *)pkt; 1066 comp_packet->completion_func(comp_packet->compl_ctxt, 1067 response, bytes_rxed); 1068 break; 1069 case VMBUS_CHANPKT_TYPE_INBAND: 1070 new_msg = (struct pci_incoming_message *)buffer; 1071 1072 switch (new_msg->message_type.type) { 1073 case PCI_BUS_RELATIONS: 1074 bus_rel = (struct pci_bus_relations *)buffer; 1075 1076 if (bus_rel->device_count == 0) 1077 break; 1078 1079 if (bytes_rxed < 1080 offsetof(struct pci_bus_relations, func) + 1081 (sizeof(struct pci_func_desc) * 1082 (bus_rel->device_count))) 1083 break; 1084 1085 hv_pci_devices_present(hbus, bus_rel); 1086 break; 1087 1088 case PCI_EJECT: 1089 dev_msg = (struct pci_dev_incoming *)buffer; 1090 hpdev = get_pcichild_wslot(hbus, 1091 dev_msg->wslot.val); 1092 1093 if (hpdev) 1094 hv_pci_eject_device(hpdev); 1095 1096 break; 1097 default: 1098 printf("vmbus_pcib: Unknown msg type 0x%x\n", 1099 new_msg->message_type.type); 1100 break; 1101 } 1102 break; 1103 default: 1104 printf("vmbus_pcib: Unknown VMBus msg type %hd\n", 1105 pkt->cph_type); 1106 break; 1107 } 1108 } while (1); 1109 1110 if (bufferlen > PCIB_PACKET_SIZE) 1111 free(buffer, M_DEVBUF); 1112 } 1113 1114 static int 1115 hv_pci_protocol_negotiation(struct hv_pcibus *hbus) 1116 { 1117 struct pci_version_request *version_req; 1118 struct hv_pci_compl comp_pkt; 1119 struct { 1120 struct pci_packet pkt; 1121 uint8_t buffer[sizeof(struct pci_version_request)]; 1122 } ctxt; 1123 int ret; 1124 1125 init_completion(&comp_pkt.host_event); 1126 1127 ctxt.pkt.completion_func = hv_pci_generic_compl; 1128 ctxt.pkt.compl_ctxt = &comp_pkt; 1129 version_req = (struct pci_version_request *)&ctxt.pkt.message; 1130 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; 1131 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT; 1132 version_req->is_last_attempt = 1; 1133 1134 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1135 VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req), 1136 (uint64_t)(uintptr_t)&ctxt.pkt); 1137 if (!ret) 1138 ret = wait_for_response(hbus, &comp_pkt.host_event); 1139 1140 if (ret) { 1141 device_printf(hbus->pcib, 1142 "vmbus_pcib failed to request version: %d\n", 1143 ret); 1144 goto out; 1145 } 1146 1147 if (comp_pkt.completion_status < 0) { 1148 device_printf(hbus->pcib, 1149 "vmbus_pcib version negotiation failed: %x\n", 1150 comp_pkt.completion_status); 1151 ret = EPROTO; 1152 } else { 1153 ret = 0; 1154 } 1155 out: 1156 free_completion(&comp_pkt.host_event); 1157 return (ret); 1158 } 1159 1160 /* Ask the host to send along the list of child devices */ 1161 static int 1162 hv_pci_query_relations(struct hv_pcibus *hbus) 1163 { 1164 struct pci_message message; 1165 int ret; 1166 1167 message.type = PCI_QUERY_BUS_RELATIONS; 1168 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1169 &message, sizeof(message), 0); 1170 return (ret); 1171 } 1172 1173 static int 1174 hv_pci_enter_d0(struct hv_pcibus *hbus) 1175 { 1176 struct pci_bus_d0_entry *d0_entry; 1177 struct hv_pci_compl comp_pkt; 1178 struct { 1179 struct pci_packet pkt; 1180 uint8_t buffer[sizeof(struct pci_bus_d0_entry)]; 1181 } ctxt; 1182 int ret; 1183 1184 /* 1185 * Tell the host that the bus is ready to use, and moved into the 1186 * powered-on state. This includes telling the host which region 1187 * of memory-mapped I/O space has been chosen for configuration space 1188 * access. 1189 */ 1190 init_completion(&comp_pkt.host_event); 1191 1192 ctxt.pkt.completion_func = hv_pci_generic_compl; 1193 ctxt.pkt.compl_ctxt = &comp_pkt; 1194 1195 d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message; 1196 memset(d0_entry, 0, sizeof(*d0_entry)); 1197 d0_entry->message_type.type = PCI_BUS_D0ENTRY; 1198 d0_entry->mmio_base = rman_get_start(hbus->cfg_res); 1199 1200 ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1201 VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry), 1202 (uint64_t)(uintptr_t)&ctxt.pkt); 1203 if (!ret) 1204 ret = wait_for_response(hbus, &comp_pkt.host_event); 1205 1206 if (ret) 1207 goto out; 1208 1209 if (comp_pkt.completion_status < 0) { 1210 device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n"); 1211 ret = EPROTO; 1212 } else { 1213 ret = 0; 1214 } 1215 1216 out: 1217 free_completion(&comp_pkt.host_event); 1218 return (ret); 1219 } 1220 1221 /* 1222 * It looks this is only needed by Windows VM, but let's send the message too 1223 * just to make the host happy. 1224 */ 1225 static int 1226 hv_send_resources_allocated(struct hv_pcibus *hbus) 1227 { 1228 struct pci_resources_assigned *res_assigned; 1229 struct hv_pci_compl comp_pkt; 1230 struct hv_pci_dev *hpdev; 1231 struct pci_packet *pkt; 1232 uint32_t wslot; 1233 int ret = 0; 1234 1235 pkt = malloc(sizeof(*pkt) + sizeof(*res_assigned), 1236 M_DEVBUF, M_WAITOK | M_ZERO); 1237 1238 for (wslot = 0; wslot < 256; wslot++) { 1239 hpdev = get_pcichild_wslot(hbus, wslot); 1240 if (!hpdev) 1241 continue; 1242 1243 init_completion(&comp_pkt.host_event); 1244 1245 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned)); 1246 pkt->completion_func = hv_pci_generic_compl; 1247 pkt->compl_ctxt = &comp_pkt; 1248 1249 res_assigned = (struct pci_resources_assigned *)&pkt->message; 1250 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; 1251 res_assigned->wslot.val = hpdev->desc.wslot.val; 1252 1253 ret = vmbus_chan_send(hbus->sc->chan, 1254 VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, 1255 &pkt->message, sizeof(*res_assigned), 1256 (uint64_t)(uintptr_t)pkt); 1257 if (!ret) 1258 ret = wait_for_response(hbus, &comp_pkt.host_event); 1259 1260 free_completion(&comp_pkt.host_event); 1261 1262 if (ret) 1263 break; 1264 1265 if (comp_pkt.completion_status < 0) { 1266 ret = EPROTO; 1267 device_printf(hbus->pcib, 1268 "failed to send PCI_RESOURCES_ASSIGNED\n"); 1269 break; 1270 } 1271 } 1272 1273 free(pkt, M_DEVBUF); 1274 return (ret); 1275 } 1276 1277 static int 1278 hv_send_resources_released(struct hv_pcibus *hbus) 1279 { 1280 struct pci_child_message pkt; 1281 struct hv_pci_dev *hpdev; 1282 uint32_t wslot; 1283 int ret; 1284 1285 for (wslot = 0; wslot < 256; wslot++) { 1286 hpdev = get_pcichild_wslot(hbus, wslot); 1287 if (!hpdev) 1288 continue; 1289 1290 pkt.message_type.type = PCI_RESOURCES_RELEASED; 1291 pkt.wslot.val = hpdev->desc.wslot.val; 1292 1293 ret = vmbus_chan_send(hbus->sc->chan, 1294 VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0); 1295 if (ret) 1296 return (ret); 1297 } 1298 1299 return (0); 1300 } 1301 1302 #define hv_cfg_read(x, s) \ 1303 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \ 1304 bus_size_t offset) \ 1305 { \ 1306 return (bus_read_##s(bus->cfg_res, offset)); \ 1307 } 1308 1309 #define hv_cfg_write(x, s) \ 1310 static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \ 1311 bus_size_t offset, uint##x##_t val) \ 1312 { \ 1313 return (bus_write_##s(bus->cfg_res, offset, val)); \ 1314 } 1315 1316 hv_cfg_read(8, 1) 1317 hv_cfg_read(16, 2) 1318 hv_cfg_read(32, 4) 1319 1320 hv_cfg_write(8, 1) 1321 hv_cfg_write(16, 2) 1322 hv_cfg_write(32, 4) 1323 1324 static void 1325 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, 1326 uint32_t *val) 1327 { 1328 struct hv_pcibus *hbus = hpdev->hbus; 1329 bus_size_t addr = CFG_PAGE_OFFSET + where; 1330 1331 /* 1332 * If the attempt is to read the IDs or the ROM BAR, simulate that. 1333 */ 1334 if (where + size <= PCIR_COMMAND) { 1335 memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size); 1336 } else if (where >= PCIR_REVID && where + size <= 1337 PCIR_CACHELNSZ) { 1338 memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where - 1339 PCIR_REVID, size); 1340 } else if (where >= PCIR_SUBVEND_0 && where + size <= 1341 PCIR_BIOS) { 1342 memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where - 1343 PCIR_SUBVEND_0, size); 1344 } else if (where >= PCIR_BIOS && where + size <= 1345 PCIR_CAP_PTR) { 1346 /* ROM BARs are unimplemented */ 1347 *val = 0; 1348 } else if ((where >= PCIR_INTLINE && where + size <= 1349 PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) { 1350 /* 1351 * Interrupt Line and Interrupt PIN are hard-wired to zero 1352 * because this front-end only supports message-signaled 1353 * interrupts. 1354 */ 1355 *val = 0; 1356 } else if (where + size <= CFG_PAGE_SIZE) { 1357 mtx_lock(&hbus->config_lock); 1358 1359 /* Choose the function to be read. */ 1360 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1361 1362 /* Make sure the function was chosen before we start reading.*/ 1363 mb(); 1364 1365 /* Read from that function's config space. */ 1366 switch (size) { 1367 case 1: 1368 *((uint8_t *)val) = hv_cfg_read_1(hbus, addr); 1369 break; 1370 case 2: 1371 *((uint16_t *)val) = hv_cfg_read_2(hbus, addr); 1372 break; 1373 default: 1374 *((uint32_t *)val) = hv_cfg_read_4(hbus, addr); 1375 break; 1376 } 1377 /* 1378 * Make sure the write was done before we release the lock, 1379 * allowing consecutive reads/writes. 1380 */ 1381 mb(); 1382 1383 mtx_unlock(&hbus->config_lock); 1384 } else { 1385 /* Invalid config read: it's unlikely to reach here. */ 1386 memset(val, 0, size); 1387 } 1388 } 1389 1390 static void 1391 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, 1392 uint32_t val) 1393 { 1394 struct hv_pcibus *hbus = hpdev->hbus; 1395 bus_size_t addr = CFG_PAGE_OFFSET + where; 1396 1397 /* SSIDs and ROM BARs are read-only */ 1398 if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR) 1399 return; 1400 1401 if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) { 1402 mtx_lock(&hbus->config_lock); 1403 1404 /* Choose the function to be written. */ 1405 hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); 1406 1407 /* Make sure the function was chosen before we start writing.*/ 1408 wmb(); 1409 1410 /* Write to that function's config space. */ 1411 switch (size) { 1412 case 1: 1413 hv_cfg_write_1(hbus, addr, (uint8_t)val); 1414 break; 1415 case 2: 1416 hv_cfg_write_2(hbus, addr, (uint16_t)val); 1417 break; 1418 default: 1419 hv_cfg_write_4(hbus, addr, (uint32_t)val); 1420 break; 1421 } 1422 1423 /* 1424 * Make sure the write was done before we release the lock, 1425 * allowing consecutive reads/writes. 1426 */ 1427 mb(); 1428 1429 mtx_unlock(&hbus->config_lock); 1430 } else { 1431 /* Invalid config write: it's unlikely to reach here. */ 1432 return; 1433 } 1434 } 1435 1436 /* 1437 * The vPCI in some Hyper-V releases do not initialize the last 4 1438 * bit of BAR registers. This could result weird problems causing PCI 1439 * code fail to configure BAR correctly. 1440 * 1441 * Just write all 1's to those BARs whose probed values are not zero. 1442 * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate 1443 * correctly. 1444 */ 1445 1446 static void 1447 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus) 1448 { 1449 struct hv_pci_dev *hpdev; 1450 int i; 1451 1452 mtx_lock(&hbus->device_list_lock); 1453 TAILQ_FOREACH(hpdev, &hbus->children, link) { 1454 for (i = 0; i < 6; i++) { 1455 /* Ignore empty bar */ 1456 if (hpdev->probed_bar[i] == 0) 1457 continue; 1458 1459 uint32_t bar_val = 0; 1460 1461 _hv_pcifront_read_config(hpdev, PCIR_BAR(i), 1462 4, &bar_val); 1463 1464 if (hpdev->probed_bar[i] != bar_val) { 1465 if (bootverbose) 1466 printf("vmbus_pcib: initialize bar %d " 1467 "by writing all 1s\n", i); 1468 1469 _hv_pcifront_write_config(hpdev, PCIR_BAR(i), 1470 4, 0xffffffff); 1471 1472 /* Now write the original value back */ 1473 _hv_pcifront_write_config(hpdev, PCIR_BAR(i), 1474 4, bar_val); 1475 } 1476 } 1477 } 1478 mtx_unlock(&hbus->device_list_lock); 1479 } 1480 1481 static void 1482 vmbus_pcib_set_detaching(void *arg, int pending __unused) 1483 { 1484 struct hv_pcibus *hbus = arg; 1485 1486 atomic_set_int(&hbus->detaching, 1); 1487 } 1488 1489 static void 1490 vmbus_pcib_pre_detach(struct hv_pcibus *hbus) 1491 { 1492 struct task task; 1493 1494 TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus); 1495 1496 /* 1497 * Make sure the channel callback won't push any possible new 1498 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq. 1499 */ 1500 vmbus_chan_run_task(hbus->sc->chan, &task); 1501 1502 taskqueue_drain_all(hbus->sc->taskq); 1503 } 1504 1505 1506 /* 1507 * Standard probe entry point. 1508 * 1509 */ 1510 static int 1511 vmbus_pcib_probe(device_t dev) 1512 { 1513 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 1514 &g_pass_through_dev_type) == 0) { 1515 device_set_desc(dev, "Hyper-V PCI Express Pass Through"); 1516 return (BUS_PROBE_DEFAULT); 1517 } 1518 return (ENXIO); 1519 } 1520 1521 /* 1522 * Standard attach entry point. 1523 * 1524 */ 1525 static int 1526 vmbus_pcib_attach(device_t dev) 1527 { 1528 const int pci_ring_size = (4 * PAGE_SIZE); 1529 const struct hyperv_guid *inst_guid; 1530 struct vmbus_channel *channel; 1531 struct vmbus_pcib_softc *sc; 1532 struct hv_pcibus *hbus; 1533 int rid = 0; 1534 int ret; 1535 1536 hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO); 1537 hbus->pcib = dev; 1538 1539 channel = vmbus_get_channel(dev); 1540 inst_guid = vmbus_chan_guid_inst(channel); 1541 hbus->pci_domain = inst_guid->hv_guid[9] | 1542 (inst_guid->hv_guid[8] << 8); 1543 1544 mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF); 1545 mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF); 1546 TAILQ_INIT(&hbus->children); 1547 TAILQ_INIT(&hbus->dr_list); 1548 1549 hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 1550 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH, 1551 RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE)); 1552 1553 if (!hbus->cfg_res) { 1554 device_printf(dev, "failed to get resource for cfg window\n"); 1555 ret = ENXIO; 1556 goto free_bus; 1557 } 1558 1559 sc = device_get_softc(dev); 1560 sc->chan = channel; 1561 sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); 1562 sc->hbus = hbus; 1563 1564 /* 1565 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT 1566 * messages. NB: we can't handle the messages in the channel callback 1567 * directly, because the message handlers need to send new messages 1568 * to the host and waits for the host's completion messages, which 1569 * must also be handled by the channel callback. 1570 */ 1571 sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK, 1572 taskqueue_thread_enqueue, &sc->taskq); 1573 taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq"); 1574 1575 hbus->sc = sc; 1576 1577 init_completion(&hbus->query_completion); 1578 hbus->query_comp = &hbus->query_completion; 1579 1580 ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size, 1581 NULL, 0, vmbus_pcib_on_channel_callback, sc); 1582 if (ret) 1583 goto free_res; 1584 1585 ret = hv_pci_protocol_negotiation(hbus); 1586 if (ret) 1587 goto vmbus_close; 1588 1589 ret = hv_pci_query_relations(hbus); 1590 if (!ret) 1591 ret = wait_for_response(hbus, hbus->query_comp); 1592 1593 if (ret) 1594 goto vmbus_close; 1595 1596 ret = hv_pci_enter_d0(hbus); 1597 if (ret) 1598 goto vmbus_close; 1599 1600 ret = hv_send_resources_allocated(hbus); 1601 if (ret) 1602 goto vmbus_close; 1603 1604 vmbus_pcib_prepopulate_bars(hbus); 1605 1606 hbus->pci_bus = device_add_child(dev, "pci", -1); 1607 if (!hbus->pci_bus) { 1608 device_printf(dev, "failed to create pci bus\n"); 1609 ret = ENXIO; 1610 goto vmbus_close; 1611 } 1612 1613 bus_generic_attach(dev); 1614 1615 hbus->state = hv_pcibus_installed; 1616 1617 return (0); 1618 1619 vmbus_close: 1620 vmbus_pcib_pre_detach(hbus); 1621 vmbus_chan_close(sc->chan); 1622 free_res: 1623 taskqueue_free(sc->taskq); 1624 free_completion(&hbus->query_completion); 1625 free(sc->rx_buf, M_DEVBUF); 1626 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1627 free_bus: 1628 mtx_destroy(&hbus->device_list_lock); 1629 mtx_destroy(&hbus->config_lock); 1630 free(hbus, M_DEVBUF); 1631 return (ret); 1632 } 1633 1634 /* 1635 * Standard detach entry point 1636 */ 1637 static int 1638 vmbus_pcib_detach(device_t dev) 1639 { 1640 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1641 struct hv_pcibus *hbus = sc->hbus; 1642 struct pci_message teardown_packet; 1643 struct pci_bus_relations relations; 1644 int ret; 1645 1646 vmbus_pcib_pre_detach(hbus); 1647 1648 if (hbus->state == hv_pcibus_installed) 1649 bus_generic_detach(dev); 1650 1651 /* Delete any children which might still exist. */ 1652 memset(&relations, 0, sizeof(relations)); 1653 hv_pci_devices_present(hbus, &relations); 1654 1655 ret = hv_send_resources_released(hbus); 1656 if (ret) 1657 device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n"); 1658 1659 teardown_packet.type = PCI_BUS_D0EXIT; 1660 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, 1661 &teardown_packet, sizeof(struct pci_message), 0); 1662 if (ret) 1663 device_printf(dev, "failed to send PCI_BUS_D0EXIT\n"); 1664 1665 taskqueue_drain_all(hbus->sc->taskq); 1666 vmbus_chan_close(sc->chan); 1667 taskqueue_free(sc->taskq); 1668 1669 free_completion(&hbus->query_completion); 1670 free(sc->rx_buf, M_DEVBUF); 1671 bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); 1672 1673 mtx_destroy(&hbus->device_list_lock); 1674 mtx_destroy(&hbus->config_lock); 1675 free(hbus, M_DEVBUF); 1676 1677 return (0); 1678 } 1679 1680 static int 1681 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val) 1682 { 1683 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1684 1685 switch (which) { 1686 case PCIB_IVAR_DOMAIN: 1687 *val = sc->hbus->pci_domain; 1688 return (0); 1689 1690 case PCIB_IVAR_BUS: 1691 /* There is only bus 0. */ 1692 *val = 0; 1693 return (0); 1694 } 1695 return (ENOENT); 1696 } 1697 1698 static int 1699 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val) 1700 { 1701 return (ENOENT); 1702 } 1703 1704 static struct resource * 1705 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, 1706 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 1707 { 1708 unsigned int bar_no; 1709 struct hv_pci_dev *hpdev; 1710 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1711 struct resource *res; 1712 unsigned int devfn; 1713 1714 if (type == PCI_RES_BUS) 1715 return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid, 1716 start, end, count, flags)); 1717 1718 /* Devices with port I/O BAR are not supported. */ 1719 if (type == SYS_RES_IOPORT) 1720 return (NULL); 1721 1722 if (type == SYS_RES_MEMORY) { 1723 devfn = PCI_DEVFN(pci_get_slot(child), 1724 pci_get_function(child)); 1725 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1726 if (!hpdev) 1727 return (NULL); 1728 1729 bar_no = PCI_RID2BAR(*rid); 1730 if (bar_no >= MAX_NUM_BARS) 1731 return (NULL); 1732 1733 /* Make sure a 32-bit BAR gets a 32-bit address */ 1734 if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64)) 1735 end = ulmin(end, 0xFFFFFFFF); 1736 } 1737 1738 res = bus_generic_alloc_resource(dev, child, type, rid, 1739 start, end, count, flags); 1740 /* 1741 * If this is a request for a specific range, assume it is 1742 * correct and pass it up to the parent. 1743 */ 1744 if (res == NULL && start + count - 1 == end) 1745 res = bus_generic_alloc_resource(dev, child, type, rid, 1746 start, end, count, flags); 1747 return (res); 1748 } 1749 1750 static int 1751 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid, 1752 struct resource *r) 1753 { 1754 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1755 1756 if (type == PCI_RES_BUS) 1757 return (pci_domain_release_bus(sc->hbus->pci_domain, child, 1758 rid, r)); 1759 1760 if (type == SYS_RES_IOPORT) 1761 return (EINVAL); 1762 1763 return (bus_generic_release_resource(dev, child, type, rid, r)); 1764 } 1765 1766 #if __FreeBSD_version >= 1100000 1767 static int 1768 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op, 1769 size_t setsize, cpuset_t *cpuset) 1770 { 1771 return (bus_get_cpus(pcib, op, setsize, cpuset)); 1772 } 1773 #endif 1774 1775 static uint32_t 1776 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, 1777 u_int reg, int bytes) 1778 { 1779 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1780 struct hv_pci_dev *hpdev; 1781 unsigned int devfn = PCI_DEVFN(slot, func); 1782 uint32_t data = 0; 1783 1784 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1785 1786 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1787 if (!hpdev) 1788 return (~0); 1789 1790 _hv_pcifront_read_config(hpdev, reg, bytes, &data); 1791 1792 return (data); 1793 } 1794 1795 static void 1796 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, 1797 u_int reg, uint32_t data, int bytes) 1798 { 1799 struct vmbus_pcib_softc *sc = device_get_softc(dev); 1800 struct hv_pci_dev *hpdev; 1801 unsigned int devfn = PCI_DEVFN(slot, func); 1802 1803 KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); 1804 1805 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1806 if (!hpdev) 1807 return; 1808 1809 _hv_pcifront_write_config(hpdev, reg, bytes, data); 1810 } 1811 1812 static int 1813 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin) 1814 { 1815 /* We only support MSI/MSI-X and don't support INTx interrupt. */ 1816 return (PCI_INVALID_IRQ); 1817 } 1818 1819 static int 1820 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count, 1821 int maxcount, int *irqs) 1822 { 1823 return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount, 1824 irqs)); 1825 } 1826 1827 static int 1828 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) 1829 { 1830 return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs)); 1831 } 1832 1833 static int 1834 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) 1835 { 1836 return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq)); 1837 } 1838 1839 static int 1840 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq) 1841 { 1842 return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq)); 1843 } 1844 1845 #define MSI_INTEL_ADDR_DEST 0x000ff000 1846 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ 1847 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED 1848 1849 static int 1850 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq, 1851 uint64_t *addr, uint32_t *data) 1852 { 1853 unsigned int devfn; 1854 struct hv_pci_dev *hpdev; 1855 1856 uint64_t v_addr; 1857 uint32_t v_data; 1858 struct hv_irq_desc *hid, *tmp_hid; 1859 unsigned int cpu, vcpu_id; 1860 unsigned int vector; 1861 1862 struct vmbus_pcib_softc *sc = device_get_softc(pcib); 1863 struct pci_create_interrupt *int_pkt; 1864 struct compose_comp_ctxt comp; 1865 struct { 1866 struct pci_packet pkt; 1867 uint8_t buffer[sizeof(struct pci_create_interrupt)]; 1868 } ctxt; 1869 1870 int ret; 1871 1872 devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); 1873 hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); 1874 if (!hpdev) 1875 return (ENOENT); 1876 1877 ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq, 1878 &v_addr, &v_data); 1879 if (ret) 1880 return (ret); 1881 1882 TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) { 1883 if (hid->irq == irq) { 1884 TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link); 1885 hv_int_desc_free(hpdev, hid); 1886 break; 1887 } 1888 } 1889 1890 cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12; 1891 vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); 1892 vector = v_data & MSI_INTEL_DATA_INTVEC; 1893 1894 init_completion(&comp.comp_pkt.host_event); 1895 1896 memset(&ctxt, 0, sizeof(ctxt)); 1897 ctxt.pkt.completion_func = hv_pci_compose_compl; 1898 ctxt.pkt.compl_ctxt = ∁ 1899 1900 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message; 1901 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; 1902 int_pkt->wslot.val = hpdev->desc.wslot.val; 1903 int_pkt->int_desc.vector = vector; 1904 int_pkt->int_desc.vector_count = 1; 1905 int_pkt->int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; 1906 int_pkt->int_desc.cpu_mask = 1ULL << vcpu_id; 1907 1908 ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 1909 VMBUS_CHANPKT_FLAG_RC, int_pkt, sizeof(*int_pkt), 1910 (uint64_t)(uintptr_t)&ctxt.pkt); 1911 if (ret) { 1912 free_completion(&comp.comp_pkt.host_event); 1913 return (ret); 1914 } 1915 1916 wait_for_completion(&comp.comp_pkt.host_event); 1917 free_completion(&comp.comp_pkt.host_event); 1918 1919 if (comp.comp_pkt.completion_status < 0) 1920 return (EPROTO); 1921 1922 *addr = comp.int_desc.address; 1923 *data = comp.int_desc.data; 1924 1925 hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO); 1926 hid->irq = irq; 1927 hid->desc = comp.int_desc; 1928 TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link); 1929 1930 return (0); 1931 } 1932 1933 static device_method_t vmbus_pcib_methods[] = { 1934 /* Device interface */ 1935 DEVMETHOD(device_probe, vmbus_pcib_probe), 1936 DEVMETHOD(device_attach, vmbus_pcib_attach), 1937 DEVMETHOD(device_detach, vmbus_pcib_detach), 1938 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1939 DEVMETHOD(device_suspend, bus_generic_suspend), 1940 DEVMETHOD(device_resume, bus_generic_resume), 1941 1942 /* Bus interface */ 1943 DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar), 1944 DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar), 1945 DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource), 1946 DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource), 1947 DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 1948 DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 1949 DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), 1950 DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), 1951 #if __FreeBSD_version >= 1100000 1952 DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus), 1953 #endif 1954 1955 /* pcib interface */ 1956 DEVMETHOD(pcib_maxslots, pcib_maxslots), 1957 DEVMETHOD(pcib_read_config, vmbus_pcib_read_config), 1958 DEVMETHOD(pcib_write_config, vmbus_pcib_write_config), 1959 DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr), 1960 DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi), 1961 DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi), 1962 DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix), 1963 DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix), 1964 DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi), 1965 DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), 1966 1967 DEVMETHOD_END 1968 }; 1969 1970 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods, 1971 sizeof(struct vmbus_pcib_softc)); 1972 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0); 1973 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1); 1974 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1); 1975 1976 #endif /* NEW_PCIB */ 1977