/*- * Copyright (c) 2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__aarch64__) #include #endif #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #endif #if defined(__aarch64__) #include #include #include #include #endif #include #include #include #include #include "vmbus_if.h" struct completion { unsigned int done; struct mtx lock; }; static void init_completion(struct completion *c) { memset(c, 0, sizeof(*c)); mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF); c->done = 0; } static void reinit_completion(struct completion *c) { c->done = 0; } static void free_completion(struct completion *c) { mtx_destroy(&c->lock); } static void complete(struct completion *c) { mtx_lock(&c->lock); c->done++; mtx_unlock(&c->lock); wakeup(c); } static void wait_for_completion(struct completion *c) { mtx_lock(&c->lock); while (c->done == 0) mtx_sleep(c, &c->lock, 0, "hvwfc", 0); c->done--; mtx_unlock(&c->lock); } /* * Return: 0 if completed, a non-zero value if timed out. */ static int wait_for_completion_timeout(struct completion *c, int timeout) { int ret; mtx_lock(&c->lock); if (c->done == 0) mtx_sleep(c, &c->lock, 0, "hvwfc", timeout); if (c->done > 0) { c->done--; ret = 0; } else { ret = 1; } mtx_unlock(&c->lock); return (ret); } #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor))) enum pci_protocol_version_t { PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4), }; static enum pci_protocol_version_t pci_protocol_versions[] = { PCI_PROTOCOL_VERSION_1_4, PCI_PROTOCOL_VERSION_1_1, }; #define PCI_CONFIG_MMIO_LENGTH 0x2000 #define CFG_PAGE_OFFSET 0x1000 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET) /* * Message Types */ enum pci_message_type { /* * Version 1.1 */ PCI_MESSAGE_BASE = 0x42490000, PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0, PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1, PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4, PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5, PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6, PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7, PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8, PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9, PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA, PCI_EJECT = PCI_MESSAGE_BASE + 0xB, PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC, PCI_REENABLE = PCI_MESSAGE_BASE + 0xD, PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE, PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF, PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10, PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11, PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12, PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13, PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14, PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15, PCI_RESOURCES_ASSIGNED2 = PCI_MESSAGE_BASE + 0x16, PCI_CREATE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x17, PCI_DELETE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x18, /* unused */ PCI_BUS_RELATIONS2 = PCI_MESSAGE_BASE + 0x19, PCI_RESOURCES_ASSIGNED3 = PCI_MESSAGE_BASE + 0x1A, PCI_CREATE_INTERRUPT_MESSAGE3 = PCI_MESSAGE_BASE + 0x1B, PCI_MESSAGE_MAXIMUM }; #define STATUS_REVISION_MISMATCH 0xC0000059 /* * Structures defining the virtual PCI Express protocol. */ union pci_version { struct { uint16_t minor_version; uint16_t major_version; } parts; uint32_t version; } __packed; /* * This representation is the one used in Windows, which is * what is expected when sending this back and forth with * the Hyper-V parent partition. */ union win_slot_encoding { struct { uint32_t slot:5; uint32_t func:3; uint32_t reserved:24; } bits; uint32_t val; } __packed; struct pci_func_desc { uint16_t v_id; /* vendor ID */ uint16_t d_id; /* device ID */ uint8_t rev; uint8_t prog_intf; uint8_t subclass; uint8_t base_class; uint32_t subsystem_id; union win_slot_encoding wslot; uint32_t ser; /* serial number */ } __packed; struct pci_func_desc2 { uint16_t v_id; /* vendor ID */ uint16_t d_id; /* device ID */ uint8_t rev; uint8_t prog_intf; uint8_t subclass; uint8_t base_class; uint32_t subsystem_id; union win_slot_encoding wslot; uint32_t ser; /* serial number */ uint32_t flags; uint16_t virtual_numa_node; uint16_t reserved; } __packed; struct hv_msi_desc { uint8_t vector; uint8_t delivery_mode; uint16_t vector_count; uint32_t reserved; uint64_t cpu_mask; } __packed; struct hv_msi_desc3 { uint32_t vector; uint8_t delivery_mode; uint8_t reserved; uint16_t vector_count; uint16_t processor_count; uint16_t processor_array[32]; } __packed; struct tran_int_desc { uint16_t reserved; uint16_t vector_count; uint32_t data; uint64_t address; } __packed; struct pci_message { uint32_t type; } __packed; struct pci_child_message { struct pci_message message_type; union win_slot_encoding wslot; } __packed; struct pci_incoming_message { struct vmbus_chanpkt_hdr hdr; struct pci_message message_type; } __packed; struct pci_response { struct vmbus_chanpkt_hdr hdr; int32_t status; /* negative values are failures */ } __packed; struct pci_packet { void (*completion_func)(void *context, struct pci_response *resp, int resp_packet_size); void *compl_ctxt; struct pci_message message[0]; }; /* * Specific message types supporting the PCI protocol. */ struct pci_version_request { struct pci_message message_type; uint32_t protocol_version; uint32_t reservedz:31; } __packed; struct pci_bus_d0_entry { struct pci_message message_type; uint32_t reserved; uint64_t mmio_base; } __packed; struct pci_bus_relations { struct pci_incoming_message incoming; uint32_t device_count; struct pci_func_desc func[0]; } __packed; struct pci_bus_relations2 { struct pci_incoming_message incoming; uint32_t device_count; struct pci_func_desc2 func[0]; } __packed; #define MAX_NUM_BARS (PCIR_MAX_BAR_0 + 1) struct pci_q_res_req_response { struct vmbus_chanpkt_hdr hdr; int32_t status; /* negative values are failures */ uint32_t probed_bar[MAX_NUM_BARS]; } __packed; struct pci_resources_assigned { struct pci_message message_type; union win_slot_encoding wslot; uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */ uint32_t msi_descriptors; uint32_t reserved[4]; } __packed; struct pci_resources_assigned2 { struct pci_message message_type; union win_slot_encoding wslot; uint8_t memory_range[0x14][6]; /* not used here */ uint32_t msi_descriptor_count; uint8_t reserved[70]; } __packed; struct pci_create_interrupt { struct pci_message message_type; union win_slot_encoding wslot; struct hv_msi_desc int_desc; } __packed; struct pci_create_interrupt3 { struct pci_message message_type; union win_slot_encoding wslot; struct hv_msi_desc3 int_desc; } __packed; struct pci_create_int_response { struct pci_response response; uint32_t reserved; struct tran_int_desc int_desc; } __packed; struct pci_delete_interrupt { struct pci_message message_type; union win_slot_encoding wslot; struct tran_int_desc int_desc; } __packed; struct pci_dev_incoming { struct pci_incoming_message incoming; union win_slot_encoding wslot; } __packed; struct pci_eject_response { struct pci_message message_type; union win_slot_encoding wslot; uint32_t status; } __packed; /* * Driver specific state. */ enum hv_pcibus_state { hv_pcibus_init = 0, hv_pcibus_installed, }; struct hv_pcibus { device_t pcib; device_t pci_bus; struct vmbus_pcib_softc *sc; uint16_t pci_domain; enum hv_pcibus_state state; struct resource *cfg_res; struct completion query_completion, *query_comp; struct mtx config_lock; /* Avoid two threads writing index page */ struct mtx device_list_lock; /* Protect lists below */ uint32_t protocol_version; TAILQ_HEAD(, hv_pci_dev) children; TAILQ_HEAD(, hv_dr_state) dr_list; volatile int detaching; }; struct hv_pcidev_desc { uint16_t v_id; /* vendor ID */ uint16_t d_id; /* device ID */ uint8_t rev; uint8_t prog_intf; uint8_t subclass; uint8_t base_class; uint32_t subsystem_id; union win_slot_encoding wslot; uint32_t ser; /* serial number */ uint32_t flags; uint16_t virtual_numa_node; } __packed; struct hv_pci_dev { TAILQ_ENTRY(hv_pci_dev) link; struct hv_pcidev_desc desc; bool reported_missing; struct hv_pcibus *hbus; struct task eject_task; TAILQ_HEAD(, hv_irq_desc) irq_desc_list; /* * What would be observed if one wrote 0xFFFFFFFF to a BAR and then * read it back, for each of the BAR offsets within config space. */ uint32_t probed_bar[MAX_NUM_BARS]; }; /* * Tracks "Device Relations" messages from the host, which must be both * processed in order. */ struct hv_dr_work { struct task task; struct hv_pcibus *bus; }; struct hv_dr_state { TAILQ_ENTRY(hv_dr_state) link; uint32_t device_count; struct hv_pcidev_desc func[0]; }; struct hv_irq_desc { TAILQ_ENTRY(hv_irq_desc) link; struct tran_int_desc desc; int irq; }; #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) static uint32_t devfn_to_wslot(unsigned int devfn) { union win_slot_encoding wslot; wslot.val = 0; wslot.bits.slot = PCI_SLOT(devfn); wslot.bits.func = PCI_FUNC(devfn); return (wslot.val); } static unsigned int wslot_to_devfn(uint32_t wslot) { union win_slot_encoding encoding; unsigned int slot; unsigned int func; encoding.val = wslot; slot = encoding.bits.slot; func = encoding.bits.func; return (PCI_DEVFN(slot, func)); } struct vmbus_pcib_softc { struct vmbus_channel *chan; void *rx_buf; struct taskqueue *taskq; struct hv_pcibus *hbus; }; /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */ static const struct hyperv_guid g_pass_through_dev_type = { .hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F} }; struct hv_pci_compl { struct completion host_event; int32_t completion_status; }; struct q_res_req_compl { struct completion host_event; struct hv_pci_dev *hpdev; }; struct compose_comp_ctxt { struct hv_pci_compl comp_pkt; struct tran_int_desc int_desc; }; /* * It is possible the device is revoked during initialization. * Check if this happens during wait. * Return: 0 if response arrived, ENODEV if device revoked. */ static int wait_for_response(struct hv_pcibus *hbus, struct completion *c) { do { if (vmbus_chan_is_revoked(hbus->sc->chan)) { device_printf(hbus->pcib, "The device is revoked.\n"); return (ENODEV); } } while (wait_for_completion_timeout(c, hz /10) != 0); return 0; } static void hv_pci_generic_compl(void *context, struct pci_response *resp, int resp_packet_size) { struct hv_pci_compl *comp_pkt = context; if (resp_packet_size >= sizeof(struct pci_response)) comp_pkt->completion_status = resp->status; else comp_pkt->completion_status = -1; complete(&comp_pkt->host_event); } static void q_resource_requirements(void *context, struct pci_response *resp, int resp_packet_size) { struct q_res_req_compl *completion = context; struct pci_q_res_req_response *q_res_req = (struct pci_q_res_req_response *)resp; int i; if (resp->status < 0) { printf("vmbus_pcib: failed to query resource requirements\n"); } else { for (i = 0; i < MAX_NUM_BARS; i++) completion->hpdev->probed_bar[i] = q_res_req->probed_bar[i]; } complete(&completion->host_event); } static void hv_pci_compose_compl(void *context, struct pci_response *resp, int resp_packet_size) { struct compose_comp_ctxt *comp_pkt = context; struct pci_create_int_response *int_resp = (struct pci_create_int_response *)resp; comp_pkt->comp_pkt.completion_status = resp->status; comp_pkt->int_desc = int_resp->int_desc; complete(&comp_pkt->comp_pkt.host_event); } static void hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid) { struct pci_delete_interrupt *int_pkt; struct { struct pci_packet pkt; uint8_t buffer[sizeof(struct pci_delete_interrupt)]; } ctxt; memset(&ctxt, 0, sizeof(ctxt)); int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; int_pkt->wslot.val = hpdev->desc.wslot.val; int_pkt->int_desc = hid->desc; vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, int_pkt, sizeof(*int_pkt), 0); free(hid, M_DEVBUF); } static void hv_pci_delete_device(struct hv_pci_dev *hpdev) { struct hv_pcibus *hbus = hpdev->hbus; struct hv_irq_desc *hid, *tmp_hid; device_t pci_dev; int devfn; devfn = wslot_to_devfn(hpdev->desc.wslot.val); bus_topo_lock(); pci_dev = pci_find_dbsf(hbus->pci_domain, 0, PCI_SLOT(devfn), PCI_FUNC(devfn)); if (pci_dev) device_delete_child(hbus->pci_bus, pci_dev); bus_topo_unlock(); mtx_lock(&hbus->device_list_lock); TAILQ_REMOVE(&hbus->children, hpdev, link); mtx_unlock(&hbus->device_list_lock); TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) hv_int_desc_free(hpdev, hid); free(hpdev, M_DEVBUF); } static struct hv_pci_dev * new_pcichild_device(struct hv_pcibus *hbus, struct hv_pcidev_desc *desc) { struct hv_pci_dev *hpdev; struct pci_child_message *res_req; struct q_res_req_compl comp_pkt; struct { struct pci_packet pkt; uint8_t buffer[sizeof(struct pci_child_message)]; } ctxt; int ret; hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO); hpdev->hbus = hbus; TAILQ_INIT(&hpdev->irq_desc_list); init_completion(&comp_pkt.host_event); comp_pkt.hpdev = hpdev; ctxt.pkt.compl_ctxt = &comp_pkt; ctxt.pkt.completion_func = q_resource_requirements; res_req = (struct pci_child_message *)&ctxt.pkt.message; res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; res_req->wslot.val = desc->wslot.val; ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt); if (ret) goto err; if (wait_for_response(hbus, &comp_pkt.host_event)) goto err; free_completion(&comp_pkt.host_event); hpdev->desc = *desc; mtx_lock(&hbus->device_list_lock); if (TAILQ_EMPTY(&hbus->children)) hbus->pci_domain = desc->ser & 0xFFFF; TAILQ_INSERT_TAIL(&hbus->children, hpdev, link); mtx_unlock(&hbus->device_list_lock); return (hpdev); err: free_completion(&comp_pkt.host_event); free(hpdev, M_DEVBUF); return (NULL); } static int pci_rescan(device_t dev) { return (BUS_RESCAN(dev)); } static void pci_devices_present_work(void *arg, int pending __unused) { struct hv_dr_work *dr_wrk = arg; struct hv_dr_state *dr = NULL; struct hv_pcibus *hbus; uint32_t child_no; bool found; struct hv_pcidev_desc *new_desc; struct hv_pci_dev *hpdev, *tmp_hpdev; struct completion *query_comp; bool need_rescan = false; hbus = dr_wrk->bus; free(dr_wrk, M_DEVBUF); /* Pull this off the queue and process it if it was the last one. */ mtx_lock(&hbus->device_list_lock); while (!TAILQ_EMPTY(&hbus->dr_list)) { dr = TAILQ_FIRST(&hbus->dr_list); TAILQ_REMOVE(&hbus->dr_list, dr, link); /* Throw this away if the list still has stuff in it. */ if (!TAILQ_EMPTY(&hbus->dr_list)) { free(dr, M_DEVBUF); continue; } } mtx_unlock(&hbus->device_list_lock); if (!dr) return; /* First, mark all existing children as reported missing. */ mtx_lock(&hbus->device_list_lock); TAILQ_FOREACH(hpdev, &hbus->children, link) hpdev->reported_missing = true; mtx_unlock(&hbus->device_list_lock); /* Next, add back any reported devices. */ for (child_no = 0; child_no < dr->device_count; child_no++) { found = false; new_desc = &dr->func[child_no]; mtx_lock(&hbus->device_list_lock); TAILQ_FOREACH(hpdev, &hbus->children, link) { if ((hpdev->desc.wslot.val == new_desc->wslot.val) && (hpdev->desc.v_id == new_desc->v_id) && (hpdev->desc.d_id == new_desc->d_id) && (hpdev->desc.ser == new_desc->ser)) { hpdev->reported_missing = false; found = true; break; } } mtx_unlock(&hbus->device_list_lock); if (!found) { if (!need_rescan) need_rescan = true; hpdev = new_pcichild_device(hbus, new_desc); if (!hpdev) printf("vmbus_pcib: failed to add a child\n"); } } /* Remove missing device(s), if any */ TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) { if (hpdev->reported_missing) hv_pci_delete_device(hpdev); } /* Rescan the bus to find any new device, if necessary. */ if (hbus->state == hv_pcibus_installed && need_rescan) pci_rescan(hbus->pci_bus); /* Wake up hv_pci_query_relations(), if it's waiting. */ query_comp = hbus->query_comp; if (query_comp) { hbus->query_comp = NULL; complete(query_comp); } free(dr, M_DEVBUF); } static struct hv_pci_dev * get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot) { struct hv_pci_dev *hpdev, *ret = NULL; mtx_lock(&hbus->device_list_lock); TAILQ_FOREACH(hpdev, &hbus->children, link) { if (hpdev->desc.wslot.val == wslot) { ret = hpdev; break; } } mtx_unlock(&hbus->device_list_lock); return (ret); } static void hv_pci_devices_present(struct hv_pcibus *hbus, struct pci_bus_relations *relations) { struct hv_dr_state *dr; struct hv_dr_work *dr_wrk; unsigned long dr_size; if (hbus->detaching && relations->device_count > 0) return; dr_size = offsetof(struct hv_dr_state, func) + (sizeof(struct pci_func_desc) * relations->device_count); dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); dr->device_count = relations->device_count; if (dr->device_count != 0) memcpy(dr->func, relations->func, sizeof(struct hv_pcidev_desc) * dr->device_count); mtx_lock(&hbus->device_list_lock); TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); mtx_unlock(&hbus->device_list_lock); dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); dr_wrk->bus = hbus; TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); } static void hv_pci_devices_present2(struct hv_pcibus *hbus, struct pci_bus_relations2 *relations) { struct hv_dr_state *dr; struct hv_dr_work *dr_wrk; unsigned long dr_size; if (hbus->detaching && relations->device_count > 0) return; dr_size = offsetof(struct hv_dr_state, func) + (sizeof(struct pci_func_desc2) * relations->device_count); dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO); dr->device_count = relations->device_count; if (dr->device_count != 0) memcpy(dr->func, relations->func, sizeof(struct pci_func_desc2) * dr->device_count); mtx_lock(&hbus->device_list_lock); TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link); mtx_unlock(&hbus->device_list_lock); dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO); dr_wrk->bus = hbus; TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk); taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task); } static void hv_eject_device_work(void *arg, int pending __unused) { struct hv_pci_dev *hpdev = arg; union win_slot_encoding wslot = hpdev->desc.wslot; struct hv_pcibus *hbus = hpdev->hbus; struct pci_eject_response *eject_pkt; struct { struct pci_packet pkt; uint8_t buffer[sizeof(struct pci_eject_response)]; } ctxt; hv_pci_delete_device(hpdev); memset(&ctxt, 0, sizeof(ctxt)); eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message; eject_pkt->message_type.type = PCI_EJECTION_COMPLETE; eject_pkt->wslot.val = wslot.val; vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, eject_pkt, sizeof(*eject_pkt), 0); } static void hv_pci_eject_device(struct hv_pci_dev *hpdev) { struct hv_pcibus *hbus = hpdev->hbus; struct taskqueue *taskq; if (hbus->detaching) return; /* * Push this task into the same taskqueue on which * vmbus_pcib_attach() runs, so we're sure this task can't run * concurrently with vmbus_pcib_attach(). */ TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev); taskq = vmbus_chan_mgmt_tq(hbus->sc->chan); taskqueue_enqueue(taskq, &hpdev->eject_task); } #define PCIB_PACKET_SIZE 0x100 static void vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg) { struct vmbus_pcib_softc *sc = arg; struct hv_pcibus *hbus = sc->hbus; void *buffer; int bufferlen = PCIB_PACKET_SIZE; struct pci_packet *comp_packet; struct pci_response *response; struct pci_incoming_message *new_msg; struct pci_bus_relations *bus_rel; struct pci_bus_relations2 *bus_rel2; struct pci_dev_incoming *dev_msg; struct hv_pci_dev *hpdev; buffer = sc->rx_buf; do { struct vmbus_chanpkt_hdr *pkt = buffer; uint32_t bytes_rxed; int ret; bytes_rxed = bufferlen; ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); if (ret == ENOBUFS) { /* Handle large packet */ if (bufferlen > PCIB_PACKET_SIZE) { free(buffer, M_DEVBUF); buffer = NULL; } /* alloc new buffer */ buffer = malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO); bufferlen = bytes_rxed; continue; } if (ret != 0) { /* ignore EIO or EAGAIN */ break; } if (bytes_rxed <= sizeof(struct pci_response)) continue; switch (pkt->cph_type) { case VMBUS_CHANPKT_TYPE_COMP: comp_packet = (struct pci_packet *)(uintptr_t)pkt->cph_xactid; response = (struct pci_response *)pkt; comp_packet->completion_func(comp_packet->compl_ctxt, response, bytes_rxed); break; case VMBUS_CHANPKT_TYPE_INBAND: new_msg = (struct pci_incoming_message *)buffer; switch (new_msg->message_type.type) { case PCI_BUS_RELATIONS: bus_rel = (struct pci_bus_relations *)buffer; if (bus_rel->device_count == 0) break; if (bytes_rxed < offsetof(struct pci_bus_relations, func) + (sizeof(struct pci_func_desc) * (bus_rel->device_count))) break; hv_pci_devices_present(hbus, bus_rel); break; case PCI_BUS_RELATIONS2: bus_rel2 = (struct pci_bus_relations2 *)buffer; if (bus_rel2->device_count == 0) break; if (bytes_rxed < offsetof(struct pci_bus_relations2, func) + (sizeof(struct pci_func_desc2) * (bus_rel2->device_count))) break; hv_pci_devices_present2(hbus, bus_rel2); case PCI_EJECT: dev_msg = (struct pci_dev_incoming *)buffer; hpdev = get_pcichild_wslot(hbus, dev_msg->wslot.val); if (hpdev) hv_pci_eject_device(hpdev); break; default: printf("vmbus_pcib: Unknown msg type 0x%x\n", new_msg->message_type.type); break; } break; default: printf("vmbus_pcib: Unknown VMBus msg type %hd\n", pkt->cph_type); break; } } while (1); if (bufferlen > PCIB_PACKET_SIZE) free(buffer, M_DEVBUF); } static int hv_pci_protocol_negotiation(struct hv_pcibus *hbus, enum pci_protocol_version_t version[], int num_version) { struct pci_version_request *version_req; struct hv_pci_compl comp_pkt; struct { struct pci_packet pkt; uint8_t buffer[sizeof(struct pci_version_request)]; } ctxt; int ret; int i; init_completion(&comp_pkt.host_event); ctxt.pkt.completion_func = hv_pci_generic_compl; ctxt.pkt.compl_ctxt = &comp_pkt; version_req = (struct pci_version_request *)&ctxt.pkt.message; version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; for(i=0; i< num_version; i++) { version_req->protocol_version = version[i]; ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, version_req, sizeof(*version_req), (uint64_t)(uintptr_t)&ctxt.pkt); if (!ret) ret = wait_for_response(hbus, &comp_pkt.host_event); if (ret) { device_printf(hbus->pcib, "vmbus_pcib failed to request version: %d\n", ret); goto out; } if (comp_pkt.completion_status >= 0) { hbus->protocol_version = version[i]; device_printf(hbus->pcib, "PCI VMBus using version 0x%x\n", hbus->protocol_version); ret = 0; goto out; } if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) { device_printf(hbus->pcib, "vmbus_pcib version negotiation failed: %x\n", comp_pkt.completion_status); ret = EPROTO; goto out; } reinit_completion(&comp_pkt.host_event); } device_printf(hbus->pcib, "PCI pass-trhpugh VSP failed to find supported version\n"); out: free_completion(&comp_pkt.host_event); return (ret); } /* Ask the host to send along the list of child devices */ static int hv_pci_query_relations(struct hv_pcibus *hbus) { struct pci_message message; int ret; message.type = PCI_QUERY_BUS_RELATIONS; ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, &message, sizeof(message), 0); return (ret); } static int hv_pci_enter_d0(struct hv_pcibus *hbus) { struct pci_bus_d0_entry *d0_entry; struct hv_pci_compl comp_pkt; struct { struct pci_packet pkt; uint8_t buffer[sizeof(struct pci_bus_d0_entry)]; } ctxt; int ret; /* * Tell the host that the bus is ready to use, and moved into the * powered-on state. This includes telling the host which region * of memory-mapped I/O space has been chosen for configuration space * access. */ init_completion(&comp_pkt.host_event); ctxt.pkt.completion_func = hv_pci_generic_compl; ctxt.pkt.compl_ctxt = &comp_pkt; d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message; memset(d0_entry, 0, sizeof(*d0_entry)); d0_entry->message_type.type = PCI_BUS_D0ENTRY; d0_entry->mmio_base = rman_get_start(hbus->cfg_res); ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry), (uint64_t)(uintptr_t)&ctxt.pkt); if (!ret) ret = wait_for_response(hbus, &comp_pkt.host_event); if (ret) goto out; if (comp_pkt.completion_status < 0) { device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n"); ret = EPROTO; } else { ret = 0; } out: free_completion(&comp_pkt.host_event); return (ret); } /* * It looks this is only needed by Windows VM, but let's send the message too * just to make the host happy. */ static int hv_send_resources_allocated(struct hv_pcibus *hbus) { struct pci_resources_assigned *res_assigned; struct pci_resources_assigned2 *res_assigned2; struct hv_pci_compl comp_pkt; struct hv_pci_dev *hpdev; struct pci_packet *pkt; uint32_t wslot; int ret = 0; size_t size_res; size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) ? sizeof(*res_assigned) : sizeof(*res_assigned2); pkt = malloc(sizeof(*pkt) + size_res, M_DEVBUF, M_WAITOK | M_ZERO); for (wslot = 0; wslot < 256; wslot++) { hpdev = get_pcichild_wslot(hbus, wslot); if (!hpdev) continue; init_completion(&comp_pkt.host_event); memset(pkt, 0, sizeof(*pkt) + size_res); pkt->completion_func = hv_pci_generic_compl; pkt->compl_ctxt = &comp_pkt; if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) { res_assigned = (struct pci_resources_assigned *)&pkt->message; res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; res_assigned->wslot.val = hpdev->desc.wslot.val; } else { res_assigned2 = (struct pci_resources_assigned2 *)&pkt->message; res_assigned2->message_type.type = PCI_RESOURCES_ASSIGNED2; res_assigned2->wslot.val = hpdev->desc.wslot.val; } ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, &pkt->message, size_res, (uint64_t)(uintptr_t)pkt); if (!ret) ret = wait_for_response(hbus, &comp_pkt.host_event); free_completion(&comp_pkt.host_event); if (ret) break; if (comp_pkt.completion_status < 0) { ret = EPROTO; device_printf(hbus->pcib, "failed to send PCI_RESOURCES_ASSIGNED\n"); break; } } free(pkt, M_DEVBUF); return (ret); } static int hv_send_resources_released(struct hv_pcibus *hbus) { struct pci_child_message pkt; struct hv_pci_dev *hpdev; uint32_t wslot; int ret; for (wslot = 0; wslot < 256; wslot++) { hpdev = get_pcichild_wslot(hbus, wslot); if (!hpdev) continue; pkt.message_type.type = PCI_RESOURCES_RELEASED; pkt.wslot.val = hpdev->desc.wslot.val; ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0); if (ret) return (ret); } return (0); } #define hv_cfg_read(x, s) \ static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus, \ bus_size_t offset) \ { \ return (bus_read_##s(bus->cfg_res, offset)); \ } #define hv_cfg_write(x, s) \ static inline void hv_cfg_write_##s(struct hv_pcibus *bus, \ bus_size_t offset, uint##x##_t val) \ { \ return (bus_write_##s(bus->cfg_res, offset, val)); \ } hv_cfg_read(8, 1) hv_cfg_read(16, 2) hv_cfg_read(32, 4) hv_cfg_write(8, 1) hv_cfg_write(16, 2) hv_cfg_write(32, 4) static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, uint32_t *val) { struct hv_pcibus *hbus = hpdev->hbus; bus_size_t addr = CFG_PAGE_OFFSET + where; /* * If the attempt is to read the IDs or the ROM BAR, simulate that. */ if (where + size <= PCIR_COMMAND) { memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size); } else if (where >= PCIR_REVID && where + size <= PCIR_CACHELNSZ) { memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where - PCIR_REVID, size); } else if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_BIOS) { memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where - PCIR_SUBVEND_0, size); } else if (where >= PCIR_BIOS && where + size <= PCIR_CAP_PTR) { /* ROM BARs are unimplemented */ *val = 0; } else if ((where >= PCIR_INTLINE && where + size <= PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) { /* * Interrupt Line and Interrupt PIN are hard-wired to zero * because this front-end only supports message-signaled * interrupts. */ *val = 0; } else if (where + size <= CFG_PAGE_SIZE) { mtx_lock(&hbus->config_lock); /* Choose the function to be read. */ hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); /* Make sure the function was chosen before we start reading.*/ mb(); /* Read from that function's config space. */ switch (size) { case 1: *((uint8_t *)val) = hv_cfg_read_1(hbus, addr); break; case 2: *((uint16_t *)val) = hv_cfg_read_2(hbus, addr); break; default: *((uint32_t *)val) = hv_cfg_read_4(hbus, addr); break; } /* * Make sure the write was done before we release the lock, * allowing consecutive reads/writes. */ mb(); mtx_unlock(&hbus->config_lock); } else { /* Invalid config read: it's unlikely to reach here. */ memset(val, 0, size); } } static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, uint32_t val) { struct hv_pcibus *hbus = hpdev->hbus; bus_size_t addr = CFG_PAGE_OFFSET + where; /* SSIDs and ROM BARs are read-only */ if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR) return; if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) { mtx_lock(&hbus->config_lock); /* Choose the function to be written. */ hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val); /* Make sure the function was chosen before we start writing.*/ wmb(); /* Write to that function's config space. */ switch (size) { case 1: hv_cfg_write_1(hbus, addr, (uint8_t)val); break; case 2: hv_cfg_write_2(hbus, addr, (uint16_t)val); break; default: hv_cfg_write_4(hbus, addr, (uint32_t)val); break; } /* * Make sure the write was done before we release the lock, * allowing consecutive reads/writes. */ mb(); mtx_unlock(&hbus->config_lock); } else { /* Invalid config write: it's unlikely to reach here. */ return; } } /* * The vPCI in some Hyper-V releases do not initialize the last 4 * bit of BAR registers. This could result weird problems causing PCI * code fail to configure BAR correctly. * * Just write all 1's to those BARs whose probed values are not zero. * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate * correctly. */ static void vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus) { struct hv_pci_dev *hpdev; int i; mtx_lock(&hbus->device_list_lock); TAILQ_FOREACH(hpdev, &hbus->children, link) { for (i = 0; i < 6; i++) { /* Ignore empty bar */ if (hpdev->probed_bar[i] == 0) continue; uint32_t bar_val = 0; _hv_pcifront_read_config(hpdev, PCIR_BAR(i), 4, &bar_val); if (hpdev->probed_bar[i] != bar_val) { if (bootverbose) printf("vmbus_pcib: initialize bar %d " "by writing all 1s\n", i); _hv_pcifront_write_config(hpdev, PCIR_BAR(i), 4, 0xffffffff); /* Now write the original value back */ _hv_pcifront_write_config(hpdev, PCIR_BAR(i), 4, bar_val); } } } mtx_unlock(&hbus->device_list_lock); } static void vmbus_pcib_set_detaching(void *arg, int pending __unused) { struct hv_pcibus *hbus = arg; atomic_set_int(&hbus->detaching, 1); } static void vmbus_pcib_pre_detach(struct hv_pcibus *hbus) { struct task task; TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus); /* * Make sure the channel callback won't push any possible new * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq. */ vmbus_chan_run_task(hbus->sc->chan, &task); taskqueue_drain_all(hbus->sc->taskq); } /* * Standard probe entry point. * */ static int vmbus_pcib_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &g_pass_through_dev_type) == 0) { device_set_desc(dev, "Hyper-V PCI Express Pass Through"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } /* * Standard attach entry point. * */ static int vmbus_pcib_attach(device_t dev) { const int pci_ring_size = (4 * PAGE_SIZE); const struct hyperv_guid *inst_guid; struct vmbus_channel *channel; struct vmbus_pcib_softc *sc; struct hv_pcibus *hbus; int rid = 0; int ret; hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO); hbus->pcib = dev; channel = vmbus_get_channel(dev); inst_guid = vmbus_chan_guid_inst(channel); hbus->pci_domain = inst_guid->hv_guid[9] | (inst_guid->hv_guid[8] << 8); mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF); mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF); TAILQ_INIT(&hbus->children); TAILQ_INIT(&hbus->dr_list); hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH, RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE)); if (!hbus->cfg_res) { device_printf(dev, "failed to get resource for cfg window\n"); ret = ENXIO; goto free_bus; } sc = device_get_softc(dev); sc->chan = channel; sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); sc->hbus = hbus; /* * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT * messages. NB: we can't handle the messages in the channel callback * directly, because the message handlers need to send new messages * to the host and waits for the host's completion messages, which * must also be handled by the channel callback. */ sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK, taskqueue_thread_enqueue, &sc->taskq); taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq"); hbus->sc = sc; init_completion(&hbus->query_completion); hbus->query_comp = &hbus->query_completion; ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size, NULL, 0, vmbus_pcib_on_channel_callback, sc); if (ret) goto free_res; ret = hv_pci_protocol_negotiation(hbus, pci_protocol_versions, ARRAY_SIZE(pci_protocol_versions)); if (ret) goto vmbus_close; ret = hv_pci_query_relations(hbus); if (!ret) ret = wait_for_response(hbus, hbus->query_comp); if (ret) goto vmbus_close; ret = hv_pci_enter_d0(hbus); if (ret) goto vmbus_close; ret = hv_send_resources_allocated(hbus); if (ret) goto vmbus_close; vmbus_pcib_prepopulate_bars(hbus); hbus->pci_bus = device_add_child(dev, "pci", DEVICE_UNIT_ANY); if (!hbus->pci_bus) { device_printf(dev, "failed to create pci bus\n"); ret = ENXIO; goto vmbus_close; } bus_generic_attach(dev); hbus->state = hv_pcibus_installed; return (0); vmbus_close: vmbus_pcib_pre_detach(hbus); vmbus_chan_close(sc->chan); free_res: taskqueue_free(sc->taskq); free_completion(&hbus->query_completion); free(sc->rx_buf, M_DEVBUF); bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); free_bus: mtx_destroy(&hbus->device_list_lock); mtx_destroy(&hbus->config_lock); free(hbus, M_DEVBUF); return (ret); } /* * Standard detach entry point */ static int vmbus_pcib_detach(device_t dev) { struct vmbus_pcib_softc *sc = device_get_softc(dev); struct hv_pcibus *hbus = sc->hbus; struct pci_message teardown_packet; struct pci_bus_relations relations; int ret; vmbus_pcib_pre_detach(hbus); if (hbus->state == hv_pcibus_installed) bus_generic_detach(dev); /* Delete any children which might still exist. */ memset(&relations, 0, sizeof(relations)); hv_pci_devices_present(hbus, &relations); ret = hv_send_resources_released(hbus); if (ret) device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n"); teardown_packet.type = PCI_BUS_D0EXIT; ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0, &teardown_packet, sizeof(struct pci_message), 0); if (ret) device_printf(dev, "failed to send PCI_BUS_D0EXIT\n"); taskqueue_drain_all(hbus->sc->taskq); vmbus_chan_close(sc->chan); taskqueue_free(sc->taskq); free_completion(&hbus->query_completion); free(sc->rx_buf, M_DEVBUF); bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res); mtx_destroy(&hbus->device_list_lock); mtx_destroy(&hbus->config_lock); free(hbus, M_DEVBUF); return (0); } static int vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val) { struct vmbus_pcib_softc *sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: *val = sc->hbus->pci_domain; return (0); case PCIB_IVAR_BUS: /* There is only bus 0. */ *val = 0; return (0); } return (ENOENT); } static int vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val) { return (ENOENT); } static struct resource * vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { unsigned int bar_no; struct hv_pci_dev *hpdev; struct vmbus_pcib_softc *sc = device_get_softc(dev); struct resource *res; unsigned int devfn; if (type == PCI_RES_BUS) return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid, start, end, count, flags)); /* Devices with port I/O BAR are not supported. */ if (type == SYS_RES_IOPORT) return (NULL); if (type == SYS_RES_MEMORY) { devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); if (!hpdev) return (NULL); bar_no = PCI_RID2BAR(*rid); if (bar_no >= MAX_NUM_BARS) return (NULL); /* Make sure a 32-bit BAR gets a 32-bit address */ if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64)) end = ulmin(end, 0xFFFFFFFF); } res = bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags); /* * If this is a request for a specific range, assume it is * correct and pass it up to the parent. */ if (res == NULL && start + count - 1 == end) res = bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags); if (res == NULL) device_printf(dev, "vmbus_pcib_alloc_resource failed\n"); return (res); } static int vmbus_pcib_adjust_resource(device_t dev, device_t child, struct resource *r, rman_res_t start, rman_res_t end) { struct vmbus_pcib_softc *sc = device_get_softc(dev); if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_adjust_bus(sc->hbus->pci_domain, child, r, start, end)); return (bus_generic_adjust_resource(dev, child, r, start, end)); } static int vmbus_pcib_release_resource(device_t dev, device_t child, struct resource *r) { struct vmbus_pcib_softc *sc = device_get_softc(dev); switch (rman_get_type(r)) { case PCI_RES_BUS: return (pci_domain_release_bus(sc->hbus->pci_domain, child, r)); case SYS_RES_IOPORT: return (EINVAL); default: return (bus_generic_release_resource(dev, child, r)); } } static int vmbus_pcib_activate_resource(device_t dev, device_t child, struct resource *r) { struct vmbus_pcib_softc *sc = device_get_softc(dev); if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_activate_bus(sc->hbus->pci_domain, child, r)); return (bus_generic_activate_resource(dev, child, r)); } static int vmbus_pcib_deactivate_resource(device_t dev, device_t child, struct resource *r) { struct vmbus_pcib_softc *sc = device_get_softc(dev); if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_deactivate_bus(sc->hbus->pci_domain, child, r)); return (bus_generic_deactivate_resource(dev, child, r)); } static int vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op, size_t setsize, cpuset_t *cpuset) { return (bus_get_cpus(pcib, op, setsize, cpuset)); } static uint32_t vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { struct vmbus_pcib_softc *sc = device_get_softc(dev); struct hv_pci_dev *hpdev; unsigned int devfn = PCI_DEVFN(slot, func); uint32_t data = 0; KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); if (!hpdev) return (~0); _hv_pcifront_read_config(hpdev, reg, bytes, &data); return (data); } static void vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t data, int bytes) { struct vmbus_pcib_softc *sc = device_get_softc(dev); struct hv_pci_dev *hpdev; unsigned int devfn = PCI_DEVFN(slot, func); KASSERT(bus == 0, ("bus should be 0, but is %u", bus)); hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); if (!hpdev) return; _hv_pcifront_write_config(hpdev, reg, bytes, data); } static int vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin) { /* We only support MSI/MSI-X and don't support INTx interrupt. */ return (PCI_INVALID_IRQ); } static int vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { #if defined(__amd64__) || defined(__i386__) return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount, irqs)); #endif #if defined(__aarch64__) return (intr_alloc_msi(pcib, dev, ACPI_MSI_XREF, count, maxcount, irqs)); #endif } static int vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) { #if defined(__amd64__) || defined(__i386__) return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs)); #endif #if defined(__aarch64__) return(intr_release_msi(pcib, dev, ACPI_MSI_XREF, count, irqs)); #endif } static int vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { #if defined(__aarch64__) int ret; #if defined(INTRNG) ret = intr_alloc_msix(pcib, dev, ACPI_MSI_XREF, irq); return ret; #else return (ENXIO); #endif #else return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq)); #endif /* __aarch64__ */ } static int vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq) { #if defined(__aarch64__) return (intr_release_msix(pcib, dev, ACPI_MSI_XREF, irq)); #else return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq)); #endif /* __aarch64__ */ } #if defined(__aarch64__) #define MSI_INTEL_ADDR_DEST 0x00000000 #define MSI_INTEL_DATA_DELFIXED 0x0 #endif #if defined(__amd64__) || defined(__i386__) #define MSI_INTEL_ADDR_DEST 0x000ff000 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED #endif static int vmbus_pcib_map_msi(device_t pcib, device_t child, int irq, uint64_t *addr, uint32_t *data) { unsigned int devfn; struct hv_pci_dev *hpdev; uint64_t v_addr; uint32_t v_data; struct hv_irq_desc *hid, *tmp_hid; unsigned int cpu, vcpu_id; unsigned int vector; struct vmbus_pcib_softc *sc = device_get_softc(pcib); struct compose_comp_ctxt comp; struct { struct pci_packet pkt; union { struct pci_create_interrupt v1; struct pci_create_interrupt3 v3; }int_pkts; } ctxt; int ret; uint32_t size; devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child)); hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn)); if (!hpdev) return (ENOENT); #if defined(__aarch64__) ret = intr_map_msi(pcib, child, ACPI_MSI_XREF, irq, &v_addr, &v_data); #else ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq, &v_addr, &v_data); #endif if (ret) return (ret); TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) { if (hid->irq == irq) { TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link); hv_int_desc_free(hpdev, hid); break; } } #if defined(__aarch64__) cpu = 0; vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); vector = v_data; #else cpu = apic_cpuid((v_addr & MSI_INTEL_ADDR_DEST) >> 12); vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu); vector = v_data & MSI_INTEL_DATA_INTVEC; #endif if (hpdev->hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4 && vcpu_id > 63) { /* We only support vcpu_id < 64 before vPCI version 1.4 */ device_printf(pcib, "Error: " "vcpu_id %u overflowed on PCI VMBus version 0x%x\n", vcpu_id, hpdev->hbus->protocol_version); return (ENODEV); } init_completion(&comp.comp_pkt.host_event); memset(&ctxt, 0, sizeof(ctxt)); ctxt.pkt.completion_func = hv_pci_compose_compl; ctxt.pkt.compl_ctxt = ∁ switch (hpdev->hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: ctxt.int_pkts.v1.message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; ctxt.int_pkts.v1.wslot.val = hpdev->desc.wslot.val; ctxt.int_pkts.v1.int_desc.vector = vector; ctxt.int_pkts.v1.int_desc.vector_count = 1; ctxt.int_pkts.v1.int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; ctxt.int_pkts.v1.int_desc.cpu_mask = 1ULL << vcpu_id; size = sizeof(ctxt.int_pkts.v1); break; case PCI_PROTOCOL_VERSION_1_4: ctxt.int_pkts.v3.message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; ctxt.int_pkts.v3.wslot.val = hpdev->desc.wslot.val; ctxt.int_pkts.v3.int_desc.vector = vector; ctxt.int_pkts.v3.int_desc.vector_count = 1; ctxt.int_pkts.v3.int_desc.reserved = 0; ctxt.int_pkts.v3.int_desc.delivery_mode = MSI_INTEL_DATA_DELFIXED; ctxt.int_pkts.v3.int_desc.processor_count = 1; ctxt.int_pkts.v3.int_desc.processor_array[0] = vcpu_id; size = sizeof(ctxt.int_pkts.v3); break; } ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, &ctxt.int_pkts, size, (uint64_t)(uintptr_t)&ctxt.pkt); if (ret) { free_completion(&comp.comp_pkt.host_event); return (ret); } wait_for_completion(&comp.comp_pkt.host_event); free_completion(&comp.comp_pkt.host_event); if (comp.comp_pkt.completion_status < 0) { device_printf(pcib, "vmbus_pcib_map_msi completion_status %d\n", comp.comp_pkt.completion_status); return (EPROTO); } *addr = comp.int_desc.address; *data = comp.int_desc.data; hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO); hid->irq = irq; hid->desc = comp.int_desc; TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link); return (0); } static device_method_t vmbus_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vmbus_pcib_probe), DEVMETHOD(device_attach, vmbus_pcib_attach), DEVMETHOD(device_detach, vmbus_pcib_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, vmbus_pcib_read_ivar), DEVMETHOD(bus_write_ivar, vmbus_pcib_write_ivar), DEVMETHOD(bus_alloc_resource, vmbus_pcib_alloc_resource), DEVMETHOD(bus_adjust_resource, vmbus_pcib_adjust_resource), DEVMETHOD(bus_release_resource, vmbus_pcib_release_resource), DEVMETHOD(bus_activate_resource, vmbus_pcib_activate_resource), DEVMETHOD(bus_deactivate_resource, vmbus_pcib_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), DEVMETHOD(bus_get_cpus, vmbus_pcib_get_cpus), /* pcib interface */ DEVMETHOD(pcib_maxslots, pcib_maxslots), DEVMETHOD(pcib_read_config, vmbus_pcib_read_config), DEVMETHOD(pcib_write_config, vmbus_pcib_write_config), DEVMETHOD(pcib_route_interrupt, vmbus_pcib_route_intr), DEVMETHOD(pcib_alloc_msi, vmbus_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, vmbus_pcib_release_msi), DEVMETHOD(pcib_alloc_msix, vmbus_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, vmbus_pcib_release_msix), DEVMETHOD(pcib_map_msi, vmbus_pcib_map_msi), DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), DEVMETHOD_END }; DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods, sizeof(struct vmbus_pcib_softc)); DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0); MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1); MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);