xref: /freebsd/sys/dev/hyperv/pcib/vmbus_pcib.c (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 /*-
2  * Copyright (c) 2016-2017 Microsoft Corp.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include "opt_acpi.h"
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/types.h>
33 #include <sys/malloc.h>
34 #include <sys/module.h>
35 #include <sys/kernel.h>
36 #include <sys/queue.h>
37 #include <sys/lock.h>
38 #include <sys/sx.h>
39 #include <sys/smp.h>
40 #include <sys/sysctl.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 #include <sys/mutex.h>
44 #include <sys/errno.h>
45 
46 #include <vm/vm.h>
47 #include <vm/vm_param.h>
48 #include <vm/vm_kern.h>
49 #include <vm/pmap.h>
50 
51 #if defined(__aarch64__)
52 #include <arm64/include/intr.h>
53 #endif
54 #include <machine/atomic.h>
55 #include <machine/bus.h>
56 #include <machine/frame.h>
57 #include <machine/pci_cfgreg.h>
58 #include <machine/resource.h>
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 #include <dev/pci/pcib_private.h>
65 #include "pcib_if.h"
66 #if defined(__i386__) || defined(__amd64__)
67 #include <machine/intr_machdep.h>
68 #include <x86/apicreg.h>
69 #include <x86/apicvar.h>
70 #endif
71 #if defined(__aarch64__)
72 #include <contrib/dev/acpica/include/acpi.h>
73 #include <contrib/dev/acpica/include/accommon.h>
74 #include <dev/acpica/acpivar.h>
75 #include <dev/acpica/acpi_pcibvar.h>
76 #endif
77 #include <dev/hyperv/include/hyperv.h>
78 #include <dev/hyperv/include/vmbus_xact.h>
79 #include <dev/hyperv/vmbus/vmbus_reg.h>
80 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
81 
82 #include "vmbus_if.h"
83 
84 struct completion {
85 	unsigned int done;
86 	struct mtx lock;
87 };
88 
89 static void
90 init_completion(struct completion *c)
91 {
92 	memset(c, 0, sizeof(*c));
93 	mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
94 	c->done = 0;
95 }
96 static void
97 reinit_completion(struct completion *c)
98 {
99 	c->done = 0;
100 }
101 static void
102 free_completion(struct completion *c)
103 {
104 	mtx_destroy(&c->lock);
105 }
106 
107 static void
108 complete(struct completion *c)
109 {
110 	mtx_lock(&c->lock);
111 	c->done++;
112 	mtx_unlock(&c->lock);
113 	wakeup(c);
114 }
115 
116 static void
117 wait_for_completion(struct completion *c)
118 {
119 	mtx_lock(&c->lock);
120 	while (c->done == 0)
121 		mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
122 	c->done--;
123 	mtx_unlock(&c->lock);
124 }
125 
126 /*
127  * Return: 0 if completed, a non-zero value if timed out.
128  */
129 static int
130 wait_for_completion_timeout(struct completion *c, int timeout)
131 {
132 	int ret;
133 
134 	mtx_lock(&c->lock);
135 
136 	if (c->done == 0)
137 		mtx_sleep(c, &c->lock, 0, "hvwfc", timeout);
138 
139 	if (c->done > 0) {
140 		c->done--;
141 		ret = 0;
142 	} else {
143 		ret = 1;
144 	}
145 
146 	mtx_unlock(&c->lock);
147 
148 	return (ret);
149 }
150 
151 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
152 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor)))
153 
154 enum pci_protocol_version_t {
155 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
156 	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),
157 };
158 
159 static enum pci_protocol_version_t pci_protocol_versions[] = {
160 	PCI_PROTOCOL_VERSION_1_4,
161 	PCI_PROTOCOL_VERSION_1_1,
162 };
163 
164 #define PCI_CONFIG_MMIO_LENGTH	0x2000
165 #define CFG_PAGE_OFFSET 0x1000
166 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
167 
168 /*
169  * Message Types
170  */
171 
172 enum pci_message_type {
173 	/*
174 	 * Version 1.1
175 	 */
176 	PCI_MESSAGE_BASE                = 0x42490000,
177 	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
178 	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
179 	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
180 	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
181 	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
182 	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
183 	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
184 	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
185 	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
186 	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
187 	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
188 	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
189 	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
190 	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
191 	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
192 	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
193 	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
194 	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
195 	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
196 	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
197 	PCI_RESOURCES_ASSIGNED2         = PCI_MESSAGE_BASE + 0x16,
198 	PCI_CREATE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x17,
199 	PCI_DELETE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x18, /* unused */
200 	PCI_BUS_RELATIONS2              = PCI_MESSAGE_BASE + 0x19,
201 	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
202 	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
203 	PCI_MESSAGE_MAXIMUM
204 };
205 
206 #define STATUS_REVISION_MISMATCH 0xC0000059
207 
208 /*
209  * Structures defining the virtual PCI Express protocol.
210  */
211 
212 union pci_version {
213 	struct {
214 		uint16_t minor_version;
215 		uint16_t major_version;
216 	} parts;
217 	uint32_t version;
218 } __packed;
219 
220 /*
221  * This representation is the one used in Windows, which is
222  * what is expected when sending this back and forth with
223  * the Hyper-V parent partition.
224  */
225 union win_slot_encoding {
226 	struct {
227 		uint32_t	slot:5;
228 		uint32_t	func:3;
229 		uint32_t	reserved:24;
230 	} bits;
231 	uint32_t val;
232 } __packed;
233 
234 struct pci_func_desc {
235 	uint16_t	v_id;	/* vendor ID */
236 	uint16_t	d_id;	/* device ID */
237 	uint8_t		rev;
238 	uint8_t		prog_intf;
239 	uint8_t		subclass;
240 	uint8_t		base_class;
241 	uint32_t	subsystem_id;
242 	union win_slot_encoding wslot;
243 	uint32_t	ser;	/* serial number */
244 } __packed;
245 
246 struct pci_func_desc2 {
247 	uint16_t	v_id;	/* vendor ID */
248 	uint16_t	d_id;	/* device ID */
249 	uint8_t		rev;
250 	uint8_t		prog_intf;
251 	uint8_t		subclass;
252 	uint8_t		base_class;
253 	uint32_t	subsystem_id;
254 	union		win_slot_encoding wslot;
255 	uint32_t	ser;	/* serial number */
256 	uint32_t	flags;
257 	uint16_t	virtual_numa_node;
258 	uint16_t	reserved;
259 } __packed;
260 
261 
262 struct hv_msi_desc {
263 	uint8_t		vector;
264 	uint8_t		delivery_mode;
265 	uint16_t	vector_count;
266 	uint32_t	reserved;
267 	uint64_t	cpu_mask;
268 } __packed;
269 
270 struct hv_msi_desc3 {
271 	uint32_t	vector;
272 	uint8_t		delivery_mode;
273 	uint8_t		reserved;
274 	uint16_t	vector_count;
275 	uint16_t	processor_count;
276 	uint16_t	processor_array[32];
277 } __packed;
278 
279 struct tran_int_desc {
280 	uint16_t	reserved;
281 	uint16_t	vector_count;
282 	uint32_t	data;
283 	uint64_t	address;
284 } __packed;
285 
286 struct pci_message {
287 	uint32_t type;
288 } __packed;
289 
290 struct pci_child_message {
291 	struct pci_message message_type;
292 	union win_slot_encoding wslot;
293 } __packed;
294 
295 struct pci_incoming_message {
296 	struct vmbus_chanpkt_hdr hdr;
297 	struct pci_message message_type;
298 } __packed;
299 
300 struct pci_response {
301 	struct vmbus_chanpkt_hdr hdr;
302 	int32_t status;	/* negative values are failures */
303 } __packed;
304 
305 struct pci_packet {
306 	void (*completion_func)(void *context, struct pci_response *resp,
307 	    int resp_packet_size);
308 	void *compl_ctxt;
309 
310 	struct pci_message message[0];
311 };
312 
313 /*
314  * Specific message types supporting the PCI protocol.
315  */
316 
317 struct pci_version_request {
318 	struct pci_message message_type;
319 	uint32_t protocol_version;
320 	uint32_t reservedz:31;
321 } __packed;
322 
323 struct pci_bus_d0_entry {
324 	struct pci_message message_type;
325 	uint32_t reserved;
326 	uint64_t mmio_base;
327 } __packed;
328 
329 struct pci_bus_relations {
330 	struct pci_incoming_message incoming;
331 	uint32_t device_count;
332 	struct pci_func_desc func[0];
333 } __packed;
334 
335 struct pci_bus_relations2 {
336 	struct pci_incoming_message incoming;
337 	uint32_t device_count;
338 	struct pci_func_desc2 func[0];
339 } __packed;
340 
341 #define MAX_NUM_BARS	(PCIR_MAX_BAR_0 + 1)
342 struct pci_q_res_req_response {
343 	struct vmbus_chanpkt_hdr hdr;
344 	int32_t status; /* negative values are failures */
345 	uint32_t probed_bar[MAX_NUM_BARS];
346 } __packed;
347 
348 struct pci_resources_assigned {
349 	struct pci_message message_type;
350 	union win_slot_encoding wslot;
351 	uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
352 	uint32_t msi_descriptors;
353 	uint32_t reserved[4];
354 } __packed;
355 
356 struct pci_resources_assigned2 {
357 	struct pci_message message_type;
358 	union win_slot_encoding wslot;
359 	uint8_t memory_range[0x14][6];   /* not used here */
360 	uint32_t msi_descriptor_count;
361 	uint8_t reserved[70];
362 } __packed;
363 
364 struct pci_create_interrupt {
365 	struct pci_message message_type;
366 	union win_slot_encoding wslot;
367 	struct hv_msi_desc int_desc;
368 } __packed;
369 
370 struct pci_create_interrupt3 {
371 	struct pci_message message_type;
372 	union win_slot_encoding wslot;
373 	struct hv_msi_desc3 int_desc;
374 } __packed;
375 
376 struct pci_create_int_response {
377 	struct pci_response response;
378 	uint32_t reserved;
379 	struct tran_int_desc int_desc;
380 } __packed;
381 
382 struct pci_delete_interrupt {
383 	struct pci_message message_type;
384 	union win_slot_encoding wslot;
385 	struct tran_int_desc int_desc;
386 } __packed;
387 
388 struct pci_dev_incoming {
389 	struct pci_incoming_message incoming;
390 	union win_slot_encoding wslot;
391 } __packed;
392 
393 struct pci_eject_response {
394 	struct pci_message message_type;
395 	union win_slot_encoding wslot;
396 	uint32_t status;
397 } __packed;
398 
399 /*
400  * Driver specific state.
401  */
402 
403 enum hv_pcibus_state {
404 	hv_pcibus_init = 0,
405 	hv_pcibus_installed,
406 };
407 
408 struct hv_pcibus {
409 	device_t pcib;
410 	device_t pci_bus;
411 	struct vmbus_pcib_softc *sc;
412 
413 	uint16_t pci_domain;
414 
415 	enum hv_pcibus_state state;
416 
417 	struct resource *cfg_res;
418 
419 	struct completion query_completion, *query_comp;
420 
421 	struct mtx config_lock; /* Avoid two threads writing index page */
422 	struct mtx device_list_lock;    /* Protect lists below */
423 	uint32_t protocol_version;
424 	TAILQ_HEAD(, hv_pci_dev) children;
425 	TAILQ_HEAD(, hv_dr_state) dr_list;
426 
427 	volatile int detaching;
428 };
429 
430 struct hv_pcidev_desc {
431 	uint16_t v_id;	/* vendor ID */
432 	uint16_t d_id;	/* device ID */
433 	uint8_t rev;
434 	uint8_t prog_intf;
435 	uint8_t subclass;
436 	uint8_t base_class;
437 	uint32_t subsystem_id;
438 	union win_slot_encoding wslot;
439 	uint32_t ser;	/* serial number */
440 	uint32_t flags;
441 	uint16_t virtual_numa_node;
442 } __packed;
443 
444 struct hv_pci_dev {
445 	TAILQ_ENTRY(hv_pci_dev) link;
446 
447 	struct hv_pcidev_desc desc;
448 
449 	bool reported_missing;
450 
451 	struct hv_pcibus *hbus;
452 	struct task eject_task;
453 
454 	TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
455 
456 	/*
457 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
458 	 * read it back, for each of the BAR offsets within config space.
459 	 */
460 	uint32_t probed_bar[MAX_NUM_BARS];
461 };
462 
463 /*
464  * Tracks "Device Relations" messages from the host, which must be both
465  * processed in order.
466  */
467 struct hv_dr_work {
468 	struct task task;
469 	struct hv_pcibus *bus;
470 };
471 
472 struct hv_dr_state {
473 	TAILQ_ENTRY(hv_dr_state) link;
474 	uint32_t device_count;
475 	struct hv_pcidev_desc func[0];
476 };
477 
478 struct hv_irq_desc {
479 	TAILQ_ENTRY(hv_irq_desc) link;
480 	struct tran_int_desc desc;
481 	int irq;
482 };
483 
484 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
485 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
486 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
487 
488 static uint32_t
489 devfn_to_wslot(unsigned int devfn)
490 {
491 	union win_slot_encoding wslot;
492 
493 	wslot.val = 0;
494 	wslot.bits.slot = PCI_SLOT(devfn);
495 	wslot.bits.func = PCI_FUNC(devfn);
496 
497 	return (wslot.val);
498 }
499 
500 static unsigned int
501 wslot_to_devfn(uint32_t wslot)
502 {
503 	union win_slot_encoding encoding;
504 	unsigned int slot;
505 	unsigned int func;
506 
507 	encoding.val = wslot;
508 
509 	slot = encoding.bits.slot;
510 	func = encoding.bits.func;
511 
512 	return (PCI_DEVFN(slot, func));
513 }
514 
515 struct vmbus_pcib_softc {
516 	struct vmbus_channel	*chan;
517 	void *rx_buf;
518 
519 	struct taskqueue	*taskq;
520 
521 	struct hv_pcibus	*hbus;
522 };
523 
524 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
525 static const struct hyperv_guid g_pass_through_dev_type = {
526 	.hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
527 	    0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
528 };
529 
530 struct hv_pci_compl {
531 	struct completion host_event;
532 	int32_t completion_status;
533 };
534 
535 struct q_res_req_compl {
536 	struct completion host_event;
537 	struct hv_pci_dev *hpdev;
538 };
539 
540 struct compose_comp_ctxt {
541 	struct hv_pci_compl comp_pkt;
542 	struct tran_int_desc int_desc;
543 };
544 
545 /*
546  * It is possible the device is revoked during initialization.
547  * Check if this happens during wait.
548  * Return: 0 if response arrived, ENODEV if device revoked.
549  */
550 static int
551 wait_for_response(struct hv_pcibus *hbus, struct completion *c)
552 {
553 	do {
554 		if (vmbus_chan_is_revoked(hbus->sc->chan)) {
555 			device_printf(hbus->pcib,
556 			    "The device is revoked.\n");
557 			return (ENODEV);
558 		}
559 	} while (wait_for_completion_timeout(c, hz /10) != 0);
560 
561 	return 0;
562 }
563 
564 static void
565 hv_pci_generic_compl(void *context, struct pci_response *resp,
566     int resp_packet_size)
567 {
568 	struct hv_pci_compl *comp_pkt = context;
569 
570 	if (resp_packet_size >= sizeof(struct pci_response))
571 		comp_pkt->completion_status = resp->status;
572 	else
573 		comp_pkt->completion_status = -1;
574 
575 	complete(&comp_pkt->host_event);
576 }
577 
578 static void
579 q_resource_requirements(void *context, struct pci_response *resp,
580     int resp_packet_size)
581 {
582 	struct q_res_req_compl *completion = context;
583 	struct pci_q_res_req_response *q_res_req =
584 	    (struct pci_q_res_req_response *)resp;
585 	int i;
586 
587 	if (resp->status < 0) {
588 		printf("vmbus_pcib: failed to query resource requirements\n");
589 	} else {
590 		for (i = 0; i < MAX_NUM_BARS; i++)
591 			completion->hpdev->probed_bar[i] =
592 			    q_res_req->probed_bar[i];
593 	}
594 
595 	complete(&completion->host_event);
596 }
597 
598 static void
599 hv_pci_compose_compl(void *context, struct pci_response *resp,
600     int resp_packet_size)
601 {
602 	struct compose_comp_ctxt *comp_pkt = context;
603 	struct pci_create_int_response *int_resp =
604 	    (struct pci_create_int_response *)resp;
605 
606 	comp_pkt->comp_pkt.completion_status = resp->status;
607 	comp_pkt->int_desc = int_resp->int_desc;
608 	complete(&comp_pkt->comp_pkt.host_event);
609 }
610 
611 static void
612 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
613 {
614 	struct pci_delete_interrupt *int_pkt;
615 	struct {
616 		struct pci_packet pkt;
617 		uint8_t buffer[sizeof(struct pci_delete_interrupt)];
618 	} ctxt;
619 
620 	memset(&ctxt, 0, sizeof(ctxt));
621 	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
622 	int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
623 	int_pkt->wslot.val = hpdev->desc.wslot.val;
624 	int_pkt->int_desc = hid->desc;
625 
626 	vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
627 	    int_pkt, sizeof(*int_pkt), 0);
628 
629 	free(hid, M_DEVBUF);
630 }
631 
632 static void
633 hv_pci_delete_device(struct hv_pci_dev *hpdev)
634 {
635 	struct hv_pcibus *hbus = hpdev->hbus;
636 	struct hv_irq_desc *hid, *tmp_hid;
637 	device_t pci_dev;
638 	int devfn;
639 
640 	devfn = wslot_to_devfn(hpdev->desc.wslot.val);
641 
642 	bus_topo_lock();
643 
644 	pci_dev = pci_find_dbsf(hbus->pci_domain,
645 	    0, PCI_SLOT(devfn), PCI_FUNC(devfn));
646 	if (pci_dev)
647 		device_delete_child(hbus->pci_bus, pci_dev);
648 
649 	bus_topo_unlock();
650 
651 	mtx_lock(&hbus->device_list_lock);
652 	TAILQ_REMOVE(&hbus->children, hpdev, link);
653 	mtx_unlock(&hbus->device_list_lock);
654 
655 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
656 		hv_int_desc_free(hpdev, hid);
657 
658 	free(hpdev, M_DEVBUF);
659 }
660 
661 static struct hv_pci_dev *
662 new_pcichild_device(struct hv_pcibus *hbus, struct hv_pcidev_desc *desc)
663 {
664 	struct hv_pci_dev *hpdev;
665 	struct pci_child_message *res_req;
666 	struct q_res_req_compl comp_pkt;
667 	struct {
668 		struct pci_packet pkt;
669 		uint8_t buffer[sizeof(struct pci_child_message)];
670 	} ctxt;
671 	int ret;
672 
673 	hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
674 	hpdev->hbus = hbus;
675 
676 	TAILQ_INIT(&hpdev->irq_desc_list);
677 
678 	init_completion(&comp_pkt.host_event);
679 	comp_pkt.hpdev = hpdev;
680 
681 	ctxt.pkt.compl_ctxt = &comp_pkt;
682 	ctxt.pkt.completion_func = q_resource_requirements;
683 
684 	res_req = (struct pci_child_message *)&ctxt.pkt.message;
685 	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
686 	res_req->wslot.val = desc->wslot.val;
687 
688 	ret = vmbus_chan_send(hbus->sc->chan,
689 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
690 	    res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
691 	if (ret)
692 		goto err;
693 
694 	if (wait_for_response(hbus, &comp_pkt.host_event))
695 		goto err;
696 
697 	free_completion(&comp_pkt.host_event);
698 
699 	hpdev->desc = *desc;
700 
701 	mtx_lock(&hbus->device_list_lock);
702 	if (TAILQ_EMPTY(&hbus->children))
703 		hbus->pci_domain = desc->ser & 0xFFFF;
704 	TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
705 	mtx_unlock(&hbus->device_list_lock);
706 	return (hpdev);
707 err:
708 	free_completion(&comp_pkt.host_event);
709 	free(hpdev, M_DEVBUF);
710 	return (NULL);
711 }
712 
713 static int
714 pci_rescan(device_t dev)
715 {
716 	return (BUS_RESCAN(dev));
717 }
718 
719 static void
720 pci_devices_present_work(void *arg, int pending __unused)
721 {
722 	struct hv_dr_work *dr_wrk = arg;
723 	struct hv_dr_state *dr = NULL;
724 	struct hv_pcibus *hbus;
725 	uint32_t child_no;
726 	bool found;
727 	struct hv_pcidev_desc *new_desc;
728 	struct hv_pci_dev *hpdev, *tmp_hpdev;
729 	struct completion *query_comp;
730 	bool need_rescan = false;
731 
732 	hbus = dr_wrk->bus;
733 	free(dr_wrk, M_DEVBUF);
734 
735 	/* Pull this off the queue and process it if it was the last one. */
736 	mtx_lock(&hbus->device_list_lock);
737 	while (!TAILQ_EMPTY(&hbus->dr_list)) {
738 		dr = TAILQ_FIRST(&hbus->dr_list);
739 		TAILQ_REMOVE(&hbus->dr_list, dr, link);
740 
741 		/* Throw this away if the list still has stuff in it. */
742 		if (!TAILQ_EMPTY(&hbus->dr_list)) {
743 			free(dr, M_DEVBUF);
744 			continue;
745 		}
746 	}
747 	mtx_unlock(&hbus->device_list_lock);
748 
749 	if (!dr)
750 		return;
751 
752 	/* First, mark all existing children as reported missing. */
753 	mtx_lock(&hbus->device_list_lock);
754 	TAILQ_FOREACH(hpdev, &hbus->children, link)
755 		hpdev->reported_missing = true;
756 	mtx_unlock(&hbus->device_list_lock);
757 
758 	/* Next, add back any reported devices. */
759 	for (child_no = 0; child_no < dr->device_count; child_no++) {
760 		found = false;
761 		new_desc = &dr->func[child_no];
762 
763 		mtx_lock(&hbus->device_list_lock);
764 		TAILQ_FOREACH(hpdev, &hbus->children, link) {
765 			if ((hpdev->desc.wslot.val ==
766 			    new_desc->wslot.val) &&
767 			    (hpdev->desc.v_id == new_desc->v_id) &&
768 			    (hpdev->desc.d_id == new_desc->d_id) &&
769 			    (hpdev->desc.ser == new_desc->ser)) {
770 				hpdev->reported_missing = false;
771 				found = true;
772 				break;
773 			}
774 		}
775 		mtx_unlock(&hbus->device_list_lock);
776 
777 		if (!found) {
778 			if (!need_rescan)
779 				need_rescan = true;
780 
781 			hpdev = new_pcichild_device(hbus, new_desc);
782 			if (!hpdev)
783 				printf("vmbus_pcib: failed to add a child\n");
784 		}
785 	}
786 
787 	/* Remove missing device(s), if any */
788 	TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
789 		if (hpdev->reported_missing)
790 			hv_pci_delete_device(hpdev);
791 	}
792 
793 	/* Rescan the bus to find any new device, if necessary. */
794 	if (hbus->state == hv_pcibus_installed && need_rescan)
795 		pci_rescan(hbus->pci_bus);
796 
797 	/* Wake up hv_pci_query_relations(), if it's waiting. */
798 	query_comp = hbus->query_comp;
799 	if (query_comp) {
800 		hbus->query_comp = NULL;
801 		complete(query_comp);
802 	}
803 
804 	free(dr, M_DEVBUF);
805 }
806 
807 static struct hv_pci_dev *
808 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
809 {
810 	struct hv_pci_dev *hpdev, *ret = NULL;
811 
812 	mtx_lock(&hbus->device_list_lock);
813 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
814 		if (hpdev->desc.wslot.val == wslot) {
815 			ret = hpdev;
816 			break;
817 		}
818 	}
819 	mtx_unlock(&hbus->device_list_lock);
820 
821 	return (ret);
822 }
823 
824 static void
825 hv_pci_devices_present(struct hv_pcibus *hbus,
826     struct pci_bus_relations *relations)
827 {
828 	struct hv_dr_state *dr;
829 	struct hv_dr_work *dr_wrk;
830 	unsigned long dr_size;
831 
832 	if (hbus->detaching && relations->device_count > 0)
833 		return;
834 
835 	dr_size = offsetof(struct hv_dr_state, func) +
836 	    (sizeof(struct pci_func_desc) * relations->device_count);
837 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
838 
839 	dr->device_count = relations->device_count;
840 	if (dr->device_count != 0)
841 		memcpy(dr->func, relations->func,
842 		    sizeof(struct hv_pcidev_desc) * dr->device_count);
843 
844 	mtx_lock(&hbus->device_list_lock);
845 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
846 	mtx_unlock(&hbus->device_list_lock);
847 
848 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
849 	dr_wrk->bus = hbus;
850 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
851 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
852 }
853 
854 static void
855 hv_pci_devices_present2(struct hv_pcibus *hbus,
856     struct pci_bus_relations2 *relations)
857 {
858 	struct hv_dr_state *dr;
859 	struct hv_dr_work *dr_wrk;
860 	unsigned long dr_size;
861 
862 	if (hbus->detaching && relations->device_count > 0)
863 		return;
864 
865 	dr_size = offsetof(struct hv_dr_state, func) +
866 	    (sizeof(struct pci_func_desc2) * relations->device_count);
867 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
868 
869 	dr->device_count = relations->device_count;
870 	if (dr->device_count != 0)
871 		memcpy(dr->func, relations->func,
872 		    sizeof(struct pci_func_desc2) * dr->device_count);
873 
874 	mtx_lock(&hbus->device_list_lock);
875 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
876 	mtx_unlock(&hbus->device_list_lock);
877 
878 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
879 	dr_wrk->bus = hbus;
880 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
881 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
882 }
883 
884 static void
885 hv_eject_device_work(void *arg, int pending __unused)
886 {
887 	struct hv_pci_dev *hpdev = arg;
888 	union win_slot_encoding wslot = hpdev->desc.wslot;
889 	struct hv_pcibus *hbus = hpdev->hbus;
890 	struct pci_eject_response *eject_pkt;
891 	struct {
892 		struct pci_packet pkt;
893 		uint8_t buffer[sizeof(struct pci_eject_response)];
894 	} ctxt;
895 
896 	hv_pci_delete_device(hpdev);
897 
898 	memset(&ctxt, 0, sizeof(ctxt));
899 	eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
900 	eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
901 	eject_pkt->wslot.val = wslot.val;
902 	vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
903 	    eject_pkt, sizeof(*eject_pkt), 0);
904 }
905 
906 static void
907 hv_pci_eject_device(struct hv_pci_dev *hpdev)
908 {
909 	struct hv_pcibus *hbus = hpdev->hbus;
910 	struct taskqueue *taskq;
911 
912 	if (hbus->detaching)
913 		return;
914 
915 	/*
916 	 * Push this task into the same taskqueue on which
917 	 * vmbus_pcib_attach() runs, so we're sure this task can't run
918 	 * concurrently with vmbus_pcib_attach().
919 	 */
920 	TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
921 	taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
922 	taskqueue_enqueue(taskq, &hpdev->eject_task);
923 }
924 
925 #define PCIB_PACKET_SIZE	0x100
926 
927 static void
928 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
929 {
930 	struct vmbus_pcib_softc *sc = arg;
931 	struct hv_pcibus *hbus = sc->hbus;
932 
933 	void *buffer;
934 	int bufferlen = PCIB_PACKET_SIZE;
935 
936 	struct pci_packet *comp_packet;
937 	struct pci_response *response;
938 	struct pci_incoming_message *new_msg;
939 	struct pci_bus_relations *bus_rel;
940 	struct pci_bus_relations2 *bus_rel2;
941 	struct pci_dev_incoming *dev_msg;
942 	struct hv_pci_dev *hpdev;
943 
944 	buffer = sc->rx_buf;
945 	do {
946 		struct vmbus_chanpkt_hdr *pkt = buffer;
947 		uint32_t bytes_rxed;
948 		int ret;
949 
950 		bytes_rxed = bufferlen;
951 		ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
952 
953 		if (ret == ENOBUFS) {
954 			/* Handle large packet */
955 			if (bufferlen > PCIB_PACKET_SIZE) {
956 				free(buffer, M_DEVBUF);
957 				buffer = NULL;
958 			}
959 
960 			/* alloc new buffer */
961 			buffer =
962 			    malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
963 			bufferlen = bytes_rxed;
964 
965 			continue;
966 		}
967 
968 		if (ret != 0) {
969 			/* ignore EIO or EAGAIN */
970 			break;
971 		}
972 
973 		if (bytes_rxed <= sizeof(struct pci_response))
974 			continue;
975 
976 		switch (pkt->cph_type) {
977 		case VMBUS_CHANPKT_TYPE_COMP:
978 			comp_packet =
979 			    (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
980 			response = (struct pci_response *)pkt;
981 			comp_packet->completion_func(comp_packet->compl_ctxt,
982 			    response, bytes_rxed);
983 			break;
984 		case VMBUS_CHANPKT_TYPE_INBAND:
985 			new_msg = (struct pci_incoming_message *)buffer;
986 
987 			switch (new_msg->message_type.type) {
988 			case PCI_BUS_RELATIONS:
989 				bus_rel = (struct pci_bus_relations *)buffer;
990 
991 				if (bus_rel->device_count == 0)
992 					break;
993 
994 				if (bytes_rxed <
995 				    offsetof(struct pci_bus_relations, func) +
996 				        (sizeof(struct pci_func_desc) *
997 				            (bus_rel->device_count)))
998 					break;
999 
1000 				hv_pci_devices_present(hbus, bus_rel);
1001 				break;
1002 
1003 			case PCI_BUS_RELATIONS2:
1004 				bus_rel2 = (struct pci_bus_relations2 *)buffer;
1005 
1006 				if (bus_rel2->device_count == 0)
1007 					break;
1008 
1009 				if (bytes_rxed <
1010 				    offsetof(struct pci_bus_relations2, func) +
1011 				    (sizeof(struct pci_func_desc2) *
1012 				    (bus_rel2->device_count)))
1013 					break;
1014 
1015 				hv_pci_devices_present2(hbus, bus_rel2);
1016 
1017 			case PCI_EJECT:
1018 				dev_msg = (struct pci_dev_incoming *)buffer;
1019 				hpdev = get_pcichild_wslot(hbus,
1020 				    dev_msg->wslot.val);
1021 
1022 				if (hpdev)
1023 					hv_pci_eject_device(hpdev);
1024 
1025 				break;
1026 			default:
1027 				printf("vmbus_pcib: Unknown msg type 0x%x\n",
1028 				    new_msg->message_type.type);
1029 				break;
1030 			}
1031 			break;
1032 		default:
1033 			printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
1034 			    pkt->cph_type);
1035 			break;
1036 		}
1037 	} while (1);
1038 
1039 	if (bufferlen > PCIB_PACKET_SIZE)
1040 		free(buffer, M_DEVBUF);
1041 }
1042 
1043 static int
1044 hv_pci_protocol_negotiation(struct hv_pcibus *hbus,
1045     enum pci_protocol_version_t version[],
1046     int num_version)
1047 {
1048 	struct pci_version_request *version_req;
1049 	struct hv_pci_compl comp_pkt;
1050 	struct {
1051 		struct pci_packet pkt;
1052 		uint8_t buffer[sizeof(struct pci_version_request)];
1053 	} ctxt;
1054 	int ret;
1055 	int i;
1056 
1057 	init_completion(&comp_pkt.host_event);
1058 
1059 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1060 	ctxt.pkt.compl_ctxt = &comp_pkt;
1061 	version_req = (struct pci_version_request *)&ctxt.pkt.message;
1062 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1063 
1064 	for(i=0; i< num_version; i++) {
1065 		version_req->protocol_version = version[i];
1066 		ret = vmbus_chan_send(hbus->sc->chan,
1067 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1068 		    version_req, sizeof(*version_req),
1069 		    (uint64_t)(uintptr_t)&ctxt.pkt);
1070 		if (!ret)
1071 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1072 
1073 		if (ret) {
1074 			device_printf(hbus->pcib,
1075 				"vmbus_pcib failed to request version: %d\n",
1076 				ret);
1077 			goto out;
1078 		}
1079 
1080 		if (comp_pkt.completion_status >= 0) {
1081 			hbus->protocol_version = version[i];
1082 			device_printf(hbus->pcib,
1083 				"PCI VMBus using version 0x%x\n",
1084 				hbus->protocol_version);
1085 			ret = 0;
1086 			goto out;
1087 		}
1088 
1089 		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
1090 			device_printf(hbus->pcib,
1091 				"vmbus_pcib version negotiation failed: %x\n",
1092 				comp_pkt.completion_status);
1093 			ret = EPROTO;
1094 			goto out;
1095 		}
1096 		reinit_completion(&comp_pkt.host_event);
1097 	}
1098 
1099 	device_printf(hbus->pcib,
1100 		"PCI pass-trhpugh VSP failed to find supported version\n");
1101 out:
1102 	free_completion(&comp_pkt.host_event);
1103 	return (ret);
1104 }
1105 
1106 /* Ask the host to send along the list of child devices */
1107 static int
1108 hv_pci_query_relations(struct hv_pcibus *hbus)
1109 {
1110 	struct pci_message message;
1111 	int ret;
1112 
1113 	message.type = PCI_QUERY_BUS_RELATIONS;
1114 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1115 	    &message, sizeof(message), 0);
1116 	return (ret);
1117 }
1118 
1119 static int
1120 hv_pci_enter_d0(struct hv_pcibus *hbus)
1121 {
1122 	struct pci_bus_d0_entry *d0_entry;
1123 	struct hv_pci_compl comp_pkt;
1124 	struct {
1125 		struct pci_packet pkt;
1126 		uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1127 	} ctxt;
1128 	int ret;
1129 
1130 	/*
1131 	 * Tell the host that the bus is ready to use, and moved into the
1132 	 * powered-on state.  This includes telling the host which region
1133 	 * of memory-mapped I/O space has been chosen for configuration space
1134 	 * access.
1135 	 */
1136 	init_completion(&comp_pkt.host_event);
1137 
1138 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1139 	ctxt.pkt.compl_ctxt = &comp_pkt;
1140 
1141 	d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1142 	memset(d0_entry, 0, sizeof(*d0_entry));
1143 	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1144 	d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1145 
1146 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1147 	    VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1148 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1149 	if (!ret)
1150 		ret = wait_for_response(hbus, &comp_pkt.host_event);
1151 
1152 	if (ret)
1153 		goto out;
1154 
1155 	if (comp_pkt.completion_status < 0) {
1156 		device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1157 		ret = EPROTO;
1158 	} else {
1159 		ret = 0;
1160 	}
1161 
1162 out:
1163 	free_completion(&comp_pkt.host_event);
1164 	return (ret);
1165 }
1166 
1167 /*
1168  * It looks this is only needed by Windows VM, but let's send the message too
1169  * just to make the host happy.
1170  */
1171 static int
1172 hv_send_resources_allocated(struct hv_pcibus *hbus)
1173 {
1174 	struct pci_resources_assigned *res_assigned;
1175 	struct pci_resources_assigned2 *res_assigned2;
1176 	struct hv_pci_compl comp_pkt;
1177 	struct hv_pci_dev *hpdev;
1178 	struct pci_packet *pkt;
1179 	uint32_t wslot;
1180 	int ret = 0;
1181 	size_t size_res;
1182 
1183 	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4)
1184 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
1185 	pkt = malloc(sizeof(*pkt) + size_res,
1186 	    M_DEVBUF, M_WAITOK | M_ZERO);
1187 
1188 	for (wslot = 0; wslot < 256; wslot++) {
1189 		hpdev = get_pcichild_wslot(hbus, wslot);
1190 		if (!hpdev)
1191 			continue;
1192 
1193 		init_completion(&comp_pkt.host_event);
1194 
1195 		memset(pkt, 0, sizeof(*pkt) + size_res);
1196 		pkt->completion_func = hv_pci_generic_compl;
1197 		pkt->compl_ctxt = &comp_pkt;
1198 
1199 		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) {
1200 			res_assigned =
1201 			    (struct pci_resources_assigned *)&pkt->message;
1202 			res_assigned->message_type.type =
1203 			    PCI_RESOURCES_ASSIGNED;
1204 			res_assigned->wslot.val = hpdev->desc.wslot.val;
1205 		} else {
1206 			res_assigned2 =
1207 			    (struct pci_resources_assigned2 *)&pkt->message;
1208 			res_assigned2->message_type.type =
1209 			    PCI_RESOURCES_ASSIGNED2;
1210 			res_assigned2->wslot.val = hpdev->desc.wslot.val;
1211 		}
1212 
1213 		ret = vmbus_chan_send(hbus->sc->chan,
1214 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1215 		    &pkt->message, size_res,
1216 		    (uint64_t)(uintptr_t)pkt);
1217 		if (!ret)
1218 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1219 
1220 		free_completion(&comp_pkt.host_event);
1221 
1222 		if (ret)
1223 			break;
1224 
1225 		if (comp_pkt.completion_status < 0) {
1226 			ret = EPROTO;
1227 			device_printf(hbus->pcib,
1228 			    "failed to send PCI_RESOURCES_ASSIGNED\n");
1229 			break;
1230 		}
1231 	}
1232 
1233 	free(pkt, M_DEVBUF);
1234 	return (ret);
1235 }
1236 
1237 static int
1238 hv_send_resources_released(struct hv_pcibus *hbus)
1239 {
1240 	struct pci_child_message pkt;
1241 	struct hv_pci_dev *hpdev;
1242 	uint32_t wslot;
1243 	int ret;
1244 
1245 	for (wslot = 0; wslot < 256; wslot++) {
1246 		hpdev = get_pcichild_wslot(hbus, wslot);
1247 		if (!hpdev)
1248 			continue;
1249 
1250 		pkt.message_type.type = PCI_RESOURCES_RELEASED;
1251 		pkt.wslot.val = hpdev->desc.wslot.val;
1252 
1253 		ret = vmbus_chan_send(hbus->sc->chan,
1254 		    VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1255 		if (ret)
1256 			return (ret);
1257 	}
1258 
1259 	return (0);
1260 }
1261 
1262 #define hv_cfg_read(x, s)						\
1263 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus,	\
1264     bus_size_t offset)							\
1265 {									\
1266 	return (bus_read_##s(bus->cfg_res, offset));			\
1267 }
1268 
1269 #define hv_cfg_write(x, s)						\
1270 static inline void hv_cfg_write_##s(struct hv_pcibus *bus,		\
1271     bus_size_t offset, uint##x##_t val)					\
1272 {									\
1273 	return (bus_write_##s(bus->cfg_res, offset, val));		\
1274 }
1275 
1276 hv_cfg_read(8, 1)
1277 hv_cfg_read(16, 2)
1278 hv_cfg_read(32, 4)
1279 
1280 hv_cfg_write(8, 1)
1281 hv_cfg_write(16, 2)
1282 hv_cfg_write(32, 4)
1283 
1284 static void
1285 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1286     uint32_t *val)
1287 {
1288 	struct hv_pcibus *hbus = hpdev->hbus;
1289 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1290 
1291 	/*
1292 	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1293 	 */
1294 	if (where + size <= PCIR_COMMAND) {
1295 		memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1296 	} else if (where >= PCIR_REVID && where + size <=
1297 		   PCIR_CACHELNSZ) {
1298 		memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1299 		       PCIR_REVID, size);
1300 	} else if (where >= PCIR_SUBVEND_0 && where + size <=
1301 		   PCIR_BIOS) {
1302 		memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1303 		       PCIR_SUBVEND_0, size);
1304 	} else if (where >= PCIR_BIOS && where + size <=
1305 		   PCIR_CAP_PTR) {
1306 		/* ROM BARs are unimplemented */
1307 		*val = 0;
1308 	} else if ((where >= PCIR_INTLINE && where + size <=
1309 		   PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1310 		/*
1311 		 * Interrupt Line and Interrupt PIN are hard-wired to zero
1312 		 * because this front-end only supports message-signaled
1313 		 * interrupts.
1314 		 */
1315 		*val = 0;
1316 	} else if (where + size <= CFG_PAGE_SIZE) {
1317 		mtx_lock(&hbus->config_lock);
1318 
1319 		/* Choose the function to be read. */
1320 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1321 
1322 		/* Make sure the function was chosen before we start reading.*/
1323 		mb();
1324 
1325 		/* Read from that function's config space. */
1326 		switch (size) {
1327 		case 1:
1328 			*((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1329 			break;
1330 		case 2:
1331 			*((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1332 			break;
1333 		default:
1334 			*((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1335 			break;
1336 		}
1337 		/*
1338 		 * Make sure the write was done before we release the lock,
1339 		 * allowing consecutive reads/writes.
1340 		 */
1341 		mb();
1342 
1343 		mtx_unlock(&hbus->config_lock);
1344 	} else {
1345 		/* Invalid config read: it's unlikely to reach here. */
1346 		memset(val, 0, size);
1347 	}
1348 }
1349 
1350 static void
1351 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1352     uint32_t val)
1353 {
1354 	struct hv_pcibus *hbus = hpdev->hbus;
1355 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1356 
1357 	/* SSIDs and ROM BARs are read-only */
1358 	if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1359 		return;
1360 
1361 	if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1362 		mtx_lock(&hbus->config_lock);
1363 
1364 		/* Choose the function to be written. */
1365 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1366 
1367 		/* Make sure the function was chosen before we start writing.*/
1368 		wmb();
1369 
1370 		/* Write to that function's config space. */
1371 		switch (size) {
1372 		case 1:
1373 			hv_cfg_write_1(hbus, addr, (uint8_t)val);
1374 			break;
1375 		case 2:
1376 			hv_cfg_write_2(hbus, addr, (uint16_t)val);
1377 			break;
1378 		default:
1379 			hv_cfg_write_4(hbus, addr, (uint32_t)val);
1380 			break;
1381 		}
1382 
1383 		/*
1384 		 * Make sure the write was done before we release the lock,
1385 		 * allowing consecutive reads/writes.
1386 		 */
1387 		mb();
1388 
1389 		mtx_unlock(&hbus->config_lock);
1390 	} else {
1391 		/* Invalid config write: it's unlikely to reach here. */
1392 		return;
1393 	}
1394 }
1395 
1396 /*
1397  * The vPCI in some Hyper-V releases do not initialize the last 4
1398  * bit of BAR registers. This could result weird problems causing PCI
1399  * code fail to configure BAR correctly.
1400  *
1401  * Just write all 1's to those BARs whose probed values are not zero.
1402  * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate
1403  * correctly.
1404  */
1405 
1406 static void
1407 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus)
1408 {
1409 	struct hv_pci_dev *hpdev;
1410 	int i;
1411 
1412 	mtx_lock(&hbus->device_list_lock);
1413 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
1414 		for (i = 0; i < 6; i++) {
1415 			/* Ignore empty bar */
1416 			if (hpdev->probed_bar[i] == 0)
1417 				continue;
1418 
1419 			uint32_t bar_val = 0;
1420 
1421 			_hv_pcifront_read_config(hpdev, PCIR_BAR(i),
1422 			    4, &bar_val);
1423 
1424 			if (hpdev->probed_bar[i] != bar_val) {
1425 				if (bootverbose)
1426 					printf("vmbus_pcib: initialize bar %d "
1427 					    "by writing all 1s\n", i);
1428 
1429 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1430 				    4, 0xffffffff);
1431 
1432 				/* Now write the original value back */
1433 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1434 				    4, bar_val);
1435 			}
1436 		}
1437 	}
1438 	mtx_unlock(&hbus->device_list_lock);
1439 }
1440 
1441 static void
1442 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1443 {
1444 	struct hv_pcibus *hbus = arg;
1445 
1446 	atomic_set_int(&hbus->detaching, 1);
1447 }
1448 
1449 static void
1450 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1451 {
1452 	struct task task;
1453 
1454 	TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1455 
1456 	/*
1457 	 * Make sure the channel callback won't push any possible new
1458 	 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1459 	 */
1460 	vmbus_chan_run_task(hbus->sc->chan, &task);
1461 
1462 	taskqueue_drain_all(hbus->sc->taskq);
1463 }
1464 
1465 
1466 /*
1467  * Standard probe entry point.
1468  *
1469  */
1470 static int
1471 vmbus_pcib_probe(device_t dev)
1472 {
1473 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1474 	    &g_pass_through_dev_type) == 0) {
1475 		device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1476 		return (BUS_PROBE_DEFAULT);
1477 	}
1478 	return (ENXIO);
1479 }
1480 
1481 /*
1482  * Standard attach entry point.
1483  *
1484  */
1485 static int
1486 vmbus_pcib_attach(device_t dev)
1487 {
1488 	const int pci_ring_size = (4 * PAGE_SIZE);
1489 	const struct hyperv_guid *inst_guid;
1490 	struct vmbus_channel *channel;
1491 	struct vmbus_pcib_softc *sc;
1492 	struct hv_pcibus *hbus;
1493 	int rid = 0;
1494 	int ret;
1495 
1496 	hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1497 	hbus->pcib = dev;
1498 
1499 	channel = vmbus_get_channel(dev);
1500 	inst_guid = vmbus_chan_guid_inst(channel);
1501 	hbus->pci_domain = inst_guid->hv_guid[9] |
1502 			  (inst_guid->hv_guid[8] << 8);
1503 
1504 	mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1505 	mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1506 	TAILQ_INIT(&hbus->children);
1507 	TAILQ_INIT(&hbus->dr_list);
1508 
1509 	hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1510 	    0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1511 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1512 
1513 	if (!hbus->cfg_res) {
1514 		device_printf(dev, "failed to get resource for cfg window\n");
1515 		ret = ENXIO;
1516 		goto free_bus;
1517 	}
1518 
1519 	sc = device_get_softc(dev);
1520 	sc->chan = channel;
1521 	sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1522 	sc->hbus = hbus;
1523 
1524 	/*
1525 	 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1526 	 * messages. NB: we can't handle the messages in the channel callback
1527 	 * directly, because the message handlers need to send new messages
1528 	 * to the host and waits for the host's completion messages, which
1529 	 * must also be handled by the channel callback.
1530 	 */
1531 	sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1532 	    taskqueue_thread_enqueue, &sc->taskq);
1533 	taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1534 
1535 	hbus->sc = sc;
1536 
1537 	init_completion(&hbus->query_completion);
1538 	hbus->query_comp = &hbus->query_completion;
1539 
1540 	ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1541 		NULL, 0, vmbus_pcib_on_channel_callback, sc);
1542 	if (ret)
1543 		goto free_res;
1544 
1545 	ret = hv_pci_protocol_negotiation(hbus, pci_protocol_versions,
1546 	    ARRAY_SIZE(pci_protocol_versions));
1547 	if (ret)
1548 		goto vmbus_close;
1549 
1550 	ret = hv_pci_query_relations(hbus);
1551 	if (!ret)
1552 		ret = wait_for_response(hbus, hbus->query_comp);
1553 
1554 	if (ret)
1555 		goto vmbus_close;
1556 
1557 	ret = hv_pci_enter_d0(hbus);
1558 	if (ret)
1559 		goto vmbus_close;
1560 
1561 	ret = hv_send_resources_allocated(hbus);
1562 	if (ret)
1563 		goto vmbus_close;
1564 
1565 	vmbus_pcib_prepopulate_bars(hbus);
1566 
1567 	hbus->pci_bus = device_add_child(dev, "pci", DEVICE_UNIT_ANY);
1568 	if (!hbus->pci_bus) {
1569 		device_printf(dev, "failed to create pci bus\n");
1570 		ret = ENXIO;
1571 		goto vmbus_close;
1572 	}
1573 
1574 	bus_generic_attach(dev);
1575 
1576 	hbus->state = hv_pcibus_installed;
1577 
1578 	return (0);
1579 
1580 vmbus_close:
1581 	vmbus_pcib_pre_detach(hbus);
1582 	vmbus_chan_close(sc->chan);
1583 free_res:
1584 	taskqueue_free(sc->taskq);
1585 	free_completion(&hbus->query_completion);
1586 	free(sc->rx_buf, M_DEVBUF);
1587 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1588 free_bus:
1589 	mtx_destroy(&hbus->device_list_lock);
1590 	mtx_destroy(&hbus->config_lock);
1591 	free(hbus, M_DEVBUF);
1592 	return (ret);
1593 }
1594 
1595 /*
1596  * Standard detach entry point
1597  */
1598 static int
1599 vmbus_pcib_detach(device_t dev)
1600 {
1601 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1602 	struct hv_pcibus *hbus = sc->hbus;
1603 	struct pci_message teardown_packet;
1604 	struct pci_bus_relations relations;
1605 	int ret;
1606 
1607 	vmbus_pcib_pre_detach(hbus);
1608 
1609 	if (hbus->state == hv_pcibus_installed)
1610 		bus_generic_detach(dev);
1611 
1612 	/* Delete any children which might still exist. */
1613 	memset(&relations, 0, sizeof(relations));
1614 	hv_pci_devices_present(hbus, &relations);
1615 
1616 	ret = hv_send_resources_released(hbus);
1617 	if (ret)
1618 		device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1619 
1620 	teardown_packet.type = PCI_BUS_D0EXIT;
1621 	ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1622 	    &teardown_packet, sizeof(struct pci_message), 0);
1623 	if (ret)
1624 		device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1625 
1626 	taskqueue_drain_all(hbus->sc->taskq);
1627 	vmbus_chan_close(sc->chan);
1628 	taskqueue_free(sc->taskq);
1629 
1630 	free_completion(&hbus->query_completion);
1631 	free(sc->rx_buf, M_DEVBUF);
1632 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1633 
1634 	mtx_destroy(&hbus->device_list_lock);
1635 	mtx_destroy(&hbus->config_lock);
1636 	free(hbus, M_DEVBUF);
1637 
1638 	return (0);
1639 }
1640 
1641 static int
1642 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1643 {
1644 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1645 
1646 	switch (which) {
1647 	case PCIB_IVAR_DOMAIN:
1648 		*val = sc->hbus->pci_domain;
1649 		return (0);
1650 
1651 	case PCIB_IVAR_BUS:
1652 		/* There is only bus 0. */
1653 		*val = 0;
1654 		return (0);
1655 	}
1656 	return (ENOENT);
1657 }
1658 
1659 static int
1660 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1661 {
1662 	return (ENOENT);
1663 }
1664 
1665 static struct resource *
1666 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1667 	rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1668 {
1669 	unsigned int bar_no;
1670 	struct hv_pci_dev *hpdev;
1671 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1672 	struct resource *res;
1673 	unsigned int devfn;
1674 
1675 	if (type == PCI_RES_BUS)
1676 		return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1677 		    start, end, count, flags));
1678 
1679 	/* Devices with port I/O BAR are not supported. */
1680 	if (type == SYS_RES_IOPORT)
1681 		return (NULL);
1682 
1683 	if (type == SYS_RES_MEMORY) {
1684 		devfn = PCI_DEVFN(pci_get_slot(child),
1685 		    pci_get_function(child));
1686 		hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1687 		if (!hpdev)
1688 			return (NULL);
1689 
1690 		bar_no = PCI_RID2BAR(*rid);
1691 		if (bar_no >= MAX_NUM_BARS)
1692 			return (NULL);
1693 
1694 		/* Make sure a 32-bit BAR gets a 32-bit address */
1695 		if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1696 			end = ulmin(end, 0xFFFFFFFF);
1697 	}
1698 
1699 	res = bus_generic_alloc_resource(dev, child, type, rid,
1700 		start, end, count, flags);
1701 	/*
1702 	 * If this is a request for a specific range, assume it is
1703 	 * correct and pass it up to the parent.
1704 	 */
1705 	if (res == NULL && start + count - 1 == end)
1706 		res = bus_generic_alloc_resource(dev, child, type, rid,
1707 		    start, end, count, flags);
1708 	if (res == NULL)
1709 		device_printf(dev, "vmbus_pcib_alloc_resource failed\n");
1710 
1711 	return (res);
1712 }
1713 
1714 static int
1715 vmbus_pcib_adjust_resource(device_t dev, device_t child,
1716     struct resource *r, rman_res_t start, rman_res_t end)
1717 {
1718 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1719 
1720 	if (rman_get_type(r) == PCI_RES_BUS)
1721 		return (pci_domain_adjust_bus(sc->hbus->pci_domain, child, r,
1722 		    start, end));
1723 	return (bus_generic_adjust_resource(dev, child, r, start, end));
1724 }
1725 
1726 static int
1727 vmbus_pcib_release_resource(device_t dev, device_t child, struct resource *r)
1728 {
1729 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1730 
1731 	switch (rman_get_type(r)) {
1732 	case PCI_RES_BUS:
1733 		return (pci_domain_release_bus(sc->hbus->pci_domain, child, r));
1734 	case SYS_RES_IOPORT:
1735 		return (EINVAL);
1736 	default:
1737 		return (bus_generic_release_resource(dev, child, r));
1738 	}
1739 }
1740 
1741 static int
1742 vmbus_pcib_activate_resource(device_t dev, device_t child, struct resource *r)
1743 {
1744 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1745 
1746 	if (rman_get_type(r) == PCI_RES_BUS)
1747 		return (pci_domain_activate_bus(sc->hbus->pci_domain, child,
1748 		    r));
1749 	return (bus_generic_activate_resource(dev, child, r));
1750 }
1751 
1752 static int
1753 vmbus_pcib_deactivate_resource(device_t dev, device_t child, struct resource *r)
1754 {
1755 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1756 
1757 	if (rman_get_type(r) == PCI_RES_BUS)
1758 		return (pci_domain_deactivate_bus(sc->hbus->pci_domain, child,
1759 		    r));
1760 	return (bus_generic_deactivate_resource(dev, child, r));
1761 }
1762 
1763 static int
1764 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1765     size_t setsize, cpuset_t *cpuset)
1766 {
1767 	return (bus_get_cpus(pcib, op, setsize, cpuset));
1768 }
1769 
1770 static uint32_t
1771 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1772     u_int reg, int bytes)
1773 {
1774 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1775 	struct hv_pci_dev *hpdev;
1776 	unsigned int devfn = PCI_DEVFN(slot, func);
1777 	uint32_t data = 0;
1778 
1779 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1780 
1781 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1782 	if (!hpdev)
1783 		return (~0);
1784 
1785 	_hv_pcifront_read_config(hpdev, reg, bytes, &data);
1786 
1787 	return (data);
1788 }
1789 
1790 static void
1791 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1792     u_int reg, uint32_t data, int bytes)
1793 {
1794 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1795 	struct hv_pci_dev *hpdev;
1796 	unsigned int devfn = PCI_DEVFN(slot, func);
1797 
1798 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1799 
1800 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1801 	if (!hpdev)
1802 		return;
1803 
1804 	_hv_pcifront_write_config(hpdev, reg, bytes, data);
1805 }
1806 
1807 static int
1808 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1809 {
1810 	/* We only support MSI/MSI-X and don't support INTx interrupt. */
1811 	return (PCI_INVALID_IRQ);
1812 }
1813 
1814 static int
1815 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1816     int maxcount, int *irqs)
1817 {
1818 #if defined(__amd64__) || defined(__i386__)
1819 	return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1820 	    irqs));
1821 #endif
1822 #if defined(__aarch64__)
1823 	return (intr_alloc_msi(pcib, dev, ACPI_MSI_XREF, count, maxcount,
1824 	    irqs));
1825 #endif
1826 }
1827 
1828 static int
1829 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1830 {
1831 #if defined(__amd64__) || defined(__i386__)
1832 	return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1833 #endif
1834 #if defined(__aarch64__)
1835 	return(intr_release_msi(pcib, dev, ACPI_MSI_XREF, count, irqs));
1836 #endif
1837 }
1838 
1839 static int
1840 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1841 {
1842 #if defined(__aarch64__)
1843 	int ret;
1844 #if defined(INTRNG)
1845 	ret = intr_alloc_msix(pcib, dev, ACPI_MSI_XREF, irq);
1846 	return ret;
1847 #else
1848     return (ENXIO);
1849 #endif
1850 #else
1851 	return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1852 #endif /* __aarch64__ */
1853 }
1854 
1855 static int
1856 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1857 {
1858 #if defined(__aarch64__)
1859 	return (intr_release_msix(pcib, dev, ACPI_MSI_XREF, irq));
1860 #else
1861 	return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1862 #endif /* __aarch64__ */
1863 }
1864 
1865 #if defined(__aarch64__)
1866 #define	MSI_INTEL_ADDR_DEST	0x00000000
1867 #define	MSI_INTEL_DATA_DELFIXED 0x0
1868 #endif
1869 #if defined(__amd64__) || defined(__i386__)
1870 #define MSI_INTEL_ADDR_DEST 0x000ff000
1871 #define MSI_INTEL_DATA_INTVEC   IOART_INTVEC    /* Interrupt vector. */
1872 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1873 #endif
1874 
1875 static int
1876 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1877     uint64_t *addr, uint32_t *data)
1878 {
1879 	unsigned int devfn;
1880 	struct hv_pci_dev *hpdev;
1881 
1882 	uint64_t v_addr;
1883 	uint32_t v_data;
1884 	struct hv_irq_desc *hid, *tmp_hid;
1885 	unsigned int cpu, vcpu_id;
1886 	unsigned int vector;
1887 
1888 	struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1889 	struct compose_comp_ctxt comp;
1890 	struct {
1891 		struct pci_packet pkt;
1892 		union {
1893 			struct pci_create_interrupt v1;
1894 			struct pci_create_interrupt3 v3;
1895 		}int_pkts;
1896 	} ctxt;
1897 	int ret;
1898 	uint32_t size;
1899 
1900 	devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1901 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1902 	if (!hpdev)
1903 		return (ENOENT);
1904 #if defined(__aarch64__)
1905 	ret = intr_map_msi(pcib, child, ACPI_MSI_XREF, irq,
1906 	    &v_addr, &v_data);
1907 #else
1908 	ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
1909             &v_addr, &v_data);
1910 #endif
1911 	if (ret)
1912 		return (ret);
1913 
1914 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
1915 		if (hid->irq == irq) {
1916 			TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
1917 			hv_int_desc_free(hpdev, hid);
1918 			break;
1919 		}
1920 	}
1921 
1922 #if defined(__aarch64__)
1923 	cpu = 0;
1924 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1925 	vector = v_data;
1926 #else
1927 	cpu = apic_cpuid((v_addr & MSI_INTEL_ADDR_DEST) >> 12);
1928 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1929 	vector = v_data & MSI_INTEL_DATA_INTVEC;
1930 #endif
1931 
1932 	if (hpdev->hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4 &&
1933 	    vcpu_id > 63) {
1934 		/* We only support vcpu_id < 64 before vPCI version 1.4 */
1935 		device_printf(pcib,
1936 		    "Error: "
1937 		    "vcpu_id %u overflowed on PCI VMBus version 0x%x\n",
1938 		    vcpu_id, hpdev->hbus->protocol_version);
1939 		return (ENODEV);
1940 	}
1941 
1942 	init_completion(&comp.comp_pkt.host_event);
1943 
1944 	memset(&ctxt, 0, sizeof(ctxt));
1945 	ctxt.pkt.completion_func = hv_pci_compose_compl;
1946 	ctxt.pkt.compl_ctxt = &comp;
1947 	switch (hpdev->hbus->protocol_version) {
1948 	case PCI_PROTOCOL_VERSION_1_1:
1949 		ctxt.int_pkts.v1.message_type.type =
1950 		    PCI_CREATE_INTERRUPT_MESSAGE;
1951 		ctxt.int_pkts.v1.wslot.val = hpdev->desc.wslot.val;
1952 		ctxt.int_pkts.v1.int_desc.vector = vector;
1953 		ctxt.int_pkts.v1.int_desc.vector_count = 1;
1954 		ctxt.int_pkts.v1.int_desc.delivery_mode =
1955 		    MSI_INTEL_DATA_DELFIXED;
1956 		ctxt.int_pkts.v1.int_desc.cpu_mask = 1ULL << vcpu_id;
1957 		size = sizeof(ctxt.int_pkts.v1);
1958 		break;
1959 
1960 	case PCI_PROTOCOL_VERSION_1_4:
1961 		ctxt.int_pkts.v3.message_type.type =
1962 		    PCI_CREATE_INTERRUPT_MESSAGE3;
1963 		ctxt.int_pkts.v3.wslot.val = hpdev->desc.wslot.val;
1964 		ctxt.int_pkts.v3.int_desc.vector = vector;
1965 		ctxt.int_pkts.v3.int_desc.vector_count = 1;
1966 		ctxt.int_pkts.v3.int_desc.reserved = 0;
1967 		ctxt.int_pkts.v3.int_desc.delivery_mode =
1968 		    MSI_INTEL_DATA_DELFIXED;
1969 		ctxt.int_pkts.v3.int_desc.processor_count = 1;
1970 		ctxt.int_pkts.v3.int_desc.processor_array[0] = vcpu_id;
1971 		size = sizeof(ctxt.int_pkts.v3);
1972 		break;
1973 	}
1974 	ret = vmbus_chan_send(sc->chan,	VMBUS_CHANPKT_TYPE_INBAND,
1975 	    VMBUS_CHANPKT_FLAG_RC, &ctxt.int_pkts, size,
1976 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1977 	if (ret) {
1978 		free_completion(&comp.comp_pkt.host_event);
1979 		return (ret);
1980 	}
1981 
1982 	wait_for_completion(&comp.comp_pkt.host_event);
1983 	free_completion(&comp.comp_pkt.host_event);
1984 
1985 	if (comp.comp_pkt.completion_status < 0) {
1986 		device_printf(pcib,
1987 		    "vmbus_pcib_map_msi completion_status %d\n",
1988 		    comp.comp_pkt.completion_status);
1989 		return (EPROTO);
1990 	}
1991 
1992 	*addr = comp.int_desc.address;
1993 	*data = comp.int_desc.data;
1994 
1995 	hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
1996 	hid->irq = irq;
1997 	hid->desc = comp.int_desc;
1998 	TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
1999 
2000 	return (0);
2001 }
2002 
2003 static device_method_t vmbus_pcib_methods[] = {
2004 	/* Device interface */
2005 	DEVMETHOD(device_probe,         vmbus_pcib_probe),
2006 	DEVMETHOD(device_attach,        vmbus_pcib_attach),
2007 	DEVMETHOD(device_detach,        vmbus_pcib_detach),
2008 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
2009 	DEVMETHOD(device_suspend,	bus_generic_suspend),
2010 	DEVMETHOD(device_resume,	bus_generic_resume),
2011 
2012 	/* Bus interface */
2013 	DEVMETHOD(bus_read_ivar,		vmbus_pcib_read_ivar),
2014 	DEVMETHOD(bus_write_ivar,		vmbus_pcib_write_ivar),
2015 	DEVMETHOD(bus_alloc_resource,		vmbus_pcib_alloc_resource),
2016 	DEVMETHOD(bus_adjust_resource,		vmbus_pcib_adjust_resource),
2017 	DEVMETHOD(bus_release_resource,		vmbus_pcib_release_resource),
2018 	DEVMETHOD(bus_activate_resource,   	vmbus_pcib_activate_resource),
2019 	DEVMETHOD(bus_deactivate_resource, 	vmbus_pcib_deactivate_resource),
2020 	DEVMETHOD(bus_setup_intr,	   bus_generic_setup_intr),
2021 	DEVMETHOD(bus_teardown_intr,	   bus_generic_teardown_intr),
2022 	DEVMETHOD(bus_get_cpus,			vmbus_pcib_get_cpus),
2023 
2024 	/* pcib interface */
2025 	DEVMETHOD(pcib_maxslots,		pcib_maxslots),
2026 	DEVMETHOD(pcib_read_config,		vmbus_pcib_read_config),
2027 	DEVMETHOD(pcib_write_config,		vmbus_pcib_write_config),
2028 	DEVMETHOD(pcib_route_interrupt,		vmbus_pcib_route_intr),
2029 	DEVMETHOD(pcib_alloc_msi,		vmbus_pcib_alloc_msi),
2030 	DEVMETHOD(pcib_release_msi,		vmbus_pcib_release_msi),
2031 	DEVMETHOD(pcib_alloc_msix,		vmbus_pcib_alloc_msix),
2032 	DEVMETHOD(pcib_release_msix,		vmbus_pcib_release_msix),
2033 	DEVMETHOD(pcib_map_msi,			vmbus_pcib_map_msi),
2034 	DEVMETHOD(pcib_request_feature,		pcib_request_feature_allow),
2035 
2036 	DEVMETHOD_END
2037 };
2038 
2039 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
2040 		sizeof(struct vmbus_pcib_softc));
2041 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0);
2042 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
2043 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
2044