xref: /freebsd/sys/dev/hyperv/pcib/vmbus_pcib.c (revision 370e009188ba90c3290b1479aa06ec98b66e140a)
1 /*-
2  * Copyright (c) 2016-2017 Microsoft Corp.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #ifdef NEW_PCIB
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/types.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/queue.h>
40 #include <sys/lock.h>
41 #include <sys/sx.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46 #include <sys/mutex.h>
47 #include <sys/errno.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_kern.h>
52 #include <vm/pmap.h>
53 
54 #if defined(__aarch64__)
55 #include <arm64/include/intr.h>
56 #endif
57 #include <machine/atomic.h>
58 #include <machine/bus.h>
59 #include <machine/frame.h>
60 #include <machine/pci_cfgreg.h>
61 #include <machine/resource.h>
62 
63 #include <sys/pciio.h>
64 #include <dev/pci/pcireg.h>
65 #include <dev/pci/pcivar.h>
66 #include <dev/pci/pci_private.h>
67 #include <dev/pci/pcib_private.h>
68 #include "pcib_if.h"
69 #if defined(__i386__) || defined(__amd64__)
70 #include <machine/intr_machdep.h>
71 #include <x86/apicreg.h>
72 #endif
73 #if defined(__aarch64__)
74 #include <contrib/dev/acpica/include/acpi.h>
75 #include <contrib/dev/acpica/include/accommon.h>
76 #include <dev/acpica/acpivar.h>
77 #include <dev/acpica/acpi_pcibvar.h>
78 #endif
79 #include <dev/hyperv/include/hyperv.h>
80 #include <dev/hyperv/include/hyperv_busdma.h>
81 #include <dev/hyperv/include/vmbus_xact.h>
82 #include <dev/hyperv/vmbus/vmbus_reg.h>
83 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
84 
85 #include "vmbus_if.h"
86 
87 struct completion {
88 	unsigned int done;
89 	struct mtx lock;
90 };
91 
92 static void
93 init_completion(struct completion *c)
94 {
95 	memset(c, 0, sizeof(*c));
96 	mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
97 	c->done = 0;
98 }
99 static void
100 reinit_completion(struct completion *c)
101 {
102 	c->done = 0;
103 }
104 static void
105 free_completion(struct completion *c)
106 {
107 	mtx_destroy(&c->lock);
108 }
109 
110 static void
111 complete(struct completion *c)
112 {
113 	mtx_lock(&c->lock);
114 	c->done++;
115 	mtx_unlock(&c->lock);
116 	wakeup(c);
117 }
118 
119 static void
120 wait_for_completion(struct completion *c)
121 {
122 	mtx_lock(&c->lock);
123 	while (c->done == 0)
124 		mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
125 	c->done--;
126 	mtx_unlock(&c->lock);
127 }
128 
129 /*
130  * Return: 0 if completed, a non-zero value if timed out.
131  */
132 static int
133 wait_for_completion_timeout(struct completion *c, int timeout)
134 {
135 	int ret;
136 
137 	mtx_lock(&c->lock);
138 
139 	if (c->done == 0)
140 		mtx_sleep(c, &c->lock, 0, "hvwfc", timeout);
141 
142 	if (c->done > 0) {
143 		c->done--;
144 		ret = 0;
145 	} else {
146 		ret = 1;
147 	}
148 
149 	mtx_unlock(&c->lock);
150 
151 	return (ret);
152 }
153 
154 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
155 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor)))
156 
157 enum pci_protocol_version_t {
158 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
159 	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),
160 };
161 
162 static enum pci_protocol_version_t pci_protocol_versions[] = {
163 	PCI_PROTOCOL_VERSION_1_4,
164 	PCI_PROTOCOL_VERSION_1_1,
165 };
166 
167 #define PCI_CONFIG_MMIO_LENGTH	0x2000
168 #define CFG_PAGE_OFFSET 0x1000
169 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
170 
171 /*
172  * Message Types
173  */
174 
175 enum pci_message_type {
176 	/*
177 	 * Version 1.1
178 	 */
179 	PCI_MESSAGE_BASE                = 0x42490000,
180 	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
181 	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
182 	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
183 	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
184 	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
185 	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
186 	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
187 	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
188 	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
189 	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
190 	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
191 	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
192 	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
193 	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
194 	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
195 	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
196 	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
197 	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
198 	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
199 	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
200 	PCI_RESOURCES_ASSIGNED2         = PCI_MESSAGE_BASE + 0x16,
201 	PCI_CREATE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x17,
202 	PCI_DELETE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x18, /* unused */
203 	PCI_BUS_RELATIONS2              = PCI_MESSAGE_BASE + 0x19,
204 	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
205 	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
206 	PCI_MESSAGE_MAXIMUM
207 };
208 
209 #define STATUS_REVISION_MISMATCH 0xC0000059
210 
211 /*
212  * Structures defining the virtual PCI Express protocol.
213  */
214 
215 union pci_version {
216 	struct {
217 		uint16_t minor_version;
218 		uint16_t major_version;
219 	} parts;
220 	uint32_t version;
221 } __packed;
222 
223 /*
224  * This representation is the one used in Windows, which is
225  * what is expected when sending this back and forth with
226  * the Hyper-V parent partition.
227  */
228 union win_slot_encoding {
229 	struct {
230 		uint32_t	slot:5;
231 		uint32_t	func:3;
232 		uint32_t	reserved:24;
233 	} bits;
234 	uint32_t val;
235 } __packed;
236 
237 struct pci_func_desc {
238 	uint16_t	v_id;	/* vendor ID */
239 	uint16_t	d_id;	/* device ID */
240 	uint8_t		rev;
241 	uint8_t		prog_intf;
242 	uint8_t		subclass;
243 	uint8_t		base_class;
244 	uint32_t	subsystem_id;
245 	union win_slot_encoding wslot;
246 	uint32_t	ser;	/* serial number */
247 } __packed;
248 
249 struct pci_func_desc2 {
250 	uint16_t	v_id;	/* vendor ID */
251 	uint16_t	d_id;	/* device ID */
252 	uint8_t		rev;
253 	uint8_t		prog_intf;
254 	uint8_t		subclass;
255 	uint8_t		base_class;
256 	uint32_t	subsystem_id;
257 	union		win_slot_encoding wslot;
258 	uint32_t	ser;	/* serial number */
259 	uint32_t	flags;
260 	uint16_t	virtual_numa_node;
261 	uint16_t	reserved;
262 } __packed;
263 
264 
265 struct hv_msi_desc {
266 	uint8_t		vector;
267 	uint8_t		delivery_mode;
268 	uint16_t	vector_count;
269 	uint32_t	reserved;
270 	uint64_t	cpu_mask;
271 } __packed;
272 
273 struct hv_msi_desc3 {
274 	uint32_t	vector;
275 	uint8_t		delivery_mode;
276 	uint8_t		reserved;
277 	uint16_t	vector_count;
278 	uint16_t	processor_count;
279 	uint16_t	processor_array[32];
280 } __packed;
281 
282 struct tran_int_desc {
283 	uint16_t	reserved;
284 	uint16_t	vector_count;
285 	uint32_t	data;
286 	uint64_t	address;
287 } __packed;
288 
289 struct pci_message {
290 	uint32_t type;
291 } __packed;
292 
293 struct pci_child_message {
294 	struct pci_message message_type;
295 	union win_slot_encoding wslot;
296 } __packed;
297 
298 struct pci_incoming_message {
299 	struct vmbus_chanpkt_hdr hdr;
300 	struct pci_message message_type;
301 } __packed;
302 
303 struct pci_response {
304 	struct vmbus_chanpkt_hdr hdr;
305 	int32_t status;	/* negative values are failures */
306 } __packed;
307 
308 struct pci_packet {
309 	void (*completion_func)(void *context, struct pci_response *resp,
310 	    int resp_packet_size);
311 	void *compl_ctxt;
312 
313 	struct pci_message message[0];
314 };
315 
316 /*
317  * Specific message types supporting the PCI protocol.
318  */
319 
320 struct pci_version_request {
321 	struct pci_message message_type;
322 	uint32_t protocol_version;
323 	uint32_t reservedz:31;
324 } __packed;
325 
326 struct pci_bus_d0_entry {
327 	struct pci_message message_type;
328 	uint32_t reserved;
329 	uint64_t mmio_base;
330 } __packed;
331 
332 struct pci_bus_relations {
333 	struct pci_incoming_message incoming;
334 	uint32_t device_count;
335 	struct pci_func_desc func[0];
336 } __packed;
337 
338 struct pci_bus_relations2 {
339 	struct pci_incoming_message incoming;
340 	uint32_t device_count;
341 	struct pci_func_desc2 func[0];
342 } __packed;
343 
344 #define MAX_NUM_BARS	(PCIR_MAX_BAR_0 + 1)
345 struct pci_q_res_req_response {
346 	struct vmbus_chanpkt_hdr hdr;
347 	int32_t status; /* negative values are failures */
348 	uint32_t probed_bar[MAX_NUM_BARS];
349 } __packed;
350 
351 struct pci_resources_assigned {
352 	struct pci_message message_type;
353 	union win_slot_encoding wslot;
354 	uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
355 	uint32_t msi_descriptors;
356 	uint32_t reserved[4];
357 } __packed;
358 
359 struct pci_resources_assigned2 {
360 	struct pci_message message_type;
361 	union win_slot_encoding wslot;
362 	uint8_t memory_range[0x14][6];   /* not used here */
363 	uint32_t msi_descriptor_count;
364 	uint8_t reserved[70];
365 } __packed;
366 
367 struct pci_create_interrupt {
368 	struct pci_message message_type;
369 	union win_slot_encoding wslot;
370 	struct hv_msi_desc int_desc;
371 } __packed;
372 
373 struct pci_create_interrupt3 {
374 	struct pci_message message_type;
375 	union win_slot_encoding wslot;
376 	struct hv_msi_desc3 int_desc;
377 } __packed;
378 
379 struct pci_create_int_response {
380 	struct pci_response response;
381 	uint32_t reserved;
382 	struct tran_int_desc int_desc;
383 } __packed;
384 
385 struct pci_delete_interrupt {
386 	struct pci_message message_type;
387 	union win_slot_encoding wslot;
388 	struct tran_int_desc int_desc;
389 } __packed;
390 
391 struct pci_dev_incoming {
392 	struct pci_incoming_message incoming;
393 	union win_slot_encoding wslot;
394 } __packed;
395 
396 struct pci_eject_response {
397 	struct pci_message message_type;
398 	union win_slot_encoding wslot;
399 	uint32_t status;
400 } __packed;
401 
402 /*
403  * Driver specific state.
404  */
405 
406 enum hv_pcibus_state {
407 	hv_pcibus_init = 0,
408 	hv_pcibus_installed,
409 };
410 
411 struct hv_pcibus {
412 	device_t pcib;
413 	device_t pci_bus;
414 	struct vmbus_pcib_softc *sc;
415 
416 	uint16_t pci_domain;
417 
418 	enum hv_pcibus_state state;
419 
420 	struct resource *cfg_res;
421 
422 	struct completion query_completion, *query_comp;
423 
424 	struct mtx config_lock; /* Avoid two threads writing index page */
425 	struct mtx device_list_lock;    /* Protect lists below */
426 	uint32_t protocol_version;
427 	TAILQ_HEAD(, hv_pci_dev) children;
428 	TAILQ_HEAD(, hv_dr_state) dr_list;
429 
430 	volatile int detaching;
431 };
432 
433 struct hv_pcidev_desc {
434 	uint16_t v_id;	/* vendor ID */
435 	uint16_t d_id;	/* device ID */
436 	uint8_t rev;
437 	uint8_t prog_intf;
438 	uint8_t subclass;
439 	uint8_t base_class;
440 	uint32_t subsystem_id;
441 	union win_slot_encoding wslot;
442 	uint32_t ser;	/* serial number */
443 	uint32_t flags;
444 	uint16_t virtual_numa_node;
445 } __packed;
446 
447 struct hv_pci_dev {
448 	TAILQ_ENTRY(hv_pci_dev) link;
449 
450 	struct hv_pcidev_desc desc;
451 
452 	bool reported_missing;
453 
454 	struct hv_pcibus *hbus;
455 	struct task eject_task;
456 
457 	TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
458 
459 	/*
460 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
461 	 * read it back, for each of the BAR offsets within config space.
462 	 */
463 	uint32_t probed_bar[MAX_NUM_BARS];
464 };
465 
466 /*
467  * Tracks "Device Relations" messages from the host, which must be both
468  * processed in order.
469  */
470 struct hv_dr_work {
471 	struct task task;
472 	struct hv_pcibus *bus;
473 };
474 
475 struct hv_dr_state {
476 	TAILQ_ENTRY(hv_dr_state) link;
477 	uint32_t device_count;
478 	struct hv_pcidev_desc func[0];
479 };
480 
481 struct hv_irq_desc {
482 	TAILQ_ENTRY(hv_irq_desc) link;
483 	struct tran_int_desc desc;
484 	int irq;
485 };
486 
487 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
488 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
489 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
490 
491 static uint32_t
492 devfn_to_wslot(unsigned int devfn)
493 {
494 	union win_slot_encoding wslot;
495 
496 	wslot.val = 0;
497 	wslot.bits.slot = PCI_SLOT(devfn);
498 	wslot.bits.func = PCI_FUNC(devfn);
499 
500 	return (wslot.val);
501 }
502 
503 static unsigned int
504 wslot_to_devfn(uint32_t wslot)
505 {
506 	union win_slot_encoding encoding;
507 	unsigned int slot;
508 	unsigned int func;
509 
510 	encoding.val = wslot;
511 
512 	slot = encoding.bits.slot;
513 	func = encoding.bits.func;
514 
515 	return (PCI_DEVFN(slot, func));
516 }
517 
518 struct vmbus_pcib_softc {
519 	struct vmbus_channel	*chan;
520 	void *rx_buf;
521 
522 	struct taskqueue	*taskq;
523 
524 	struct hv_pcibus	*hbus;
525 };
526 
527 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
528 static const struct hyperv_guid g_pass_through_dev_type = {
529 	.hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
530 	    0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
531 };
532 
533 struct hv_pci_compl {
534 	struct completion host_event;
535 	int32_t completion_status;
536 };
537 
538 struct q_res_req_compl {
539 	struct completion host_event;
540 	struct hv_pci_dev *hpdev;
541 };
542 
543 struct compose_comp_ctxt {
544 	struct hv_pci_compl comp_pkt;
545 	struct tran_int_desc int_desc;
546 };
547 
548 /*
549  * It is possible the device is revoked during initialization.
550  * Check if this happens during wait.
551  * Return: 0 if response arrived, ENODEV if device revoked.
552  */
553 static int
554 wait_for_response(struct hv_pcibus *hbus, struct completion *c)
555 {
556 	do {
557 		if (vmbus_chan_is_revoked(hbus->sc->chan)) {
558 			device_printf(hbus->pcib,
559 			    "The device is revoked.\n");
560 			return (ENODEV);
561 		}
562 	} while (wait_for_completion_timeout(c, hz /10) != 0);
563 
564 	return 0;
565 }
566 
567 static void
568 hv_pci_generic_compl(void *context, struct pci_response *resp,
569     int resp_packet_size)
570 {
571 	struct hv_pci_compl *comp_pkt = context;
572 
573 	if (resp_packet_size >= sizeof(struct pci_response))
574 		comp_pkt->completion_status = resp->status;
575 	else
576 		comp_pkt->completion_status = -1;
577 
578 	complete(&comp_pkt->host_event);
579 }
580 
581 static void
582 q_resource_requirements(void *context, struct pci_response *resp,
583     int resp_packet_size)
584 {
585 	struct q_res_req_compl *completion = context;
586 	struct pci_q_res_req_response *q_res_req =
587 	    (struct pci_q_res_req_response *)resp;
588 	int i;
589 
590 	if (resp->status < 0) {
591 		printf("vmbus_pcib: failed to query resource requirements\n");
592 	} else {
593 		for (i = 0; i < MAX_NUM_BARS; i++)
594 			completion->hpdev->probed_bar[i] =
595 			    q_res_req->probed_bar[i];
596 	}
597 
598 	complete(&completion->host_event);
599 }
600 
601 static void
602 hv_pci_compose_compl(void *context, struct pci_response *resp,
603     int resp_packet_size)
604 {
605 	struct compose_comp_ctxt *comp_pkt = context;
606 	struct pci_create_int_response *int_resp =
607 	    (struct pci_create_int_response *)resp;
608 
609 	comp_pkt->comp_pkt.completion_status = resp->status;
610 	comp_pkt->int_desc = int_resp->int_desc;
611 	complete(&comp_pkt->comp_pkt.host_event);
612 }
613 
614 static void
615 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
616 {
617 	struct pci_delete_interrupt *int_pkt;
618 	struct {
619 		struct pci_packet pkt;
620 		uint8_t buffer[sizeof(struct pci_delete_interrupt)];
621 	} ctxt;
622 
623 	memset(&ctxt, 0, sizeof(ctxt));
624 	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
625 	int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
626 	int_pkt->wslot.val = hpdev->desc.wslot.val;
627 	int_pkt->int_desc = hid->desc;
628 
629 	vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
630 	    int_pkt, sizeof(*int_pkt), 0);
631 
632 	free(hid, M_DEVBUF);
633 }
634 
635 static void
636 hv_pci_delete_device(struct hv_pci_dev *hpdev)
637 {
638 	struct hv_pcibus *hbus = hpdev->hbus;
639 	struct hv_irq_desc *hid, *tmp_hid;
640 	device_t pci_dev;
641 	int devfn;
642 
643 	devfn = wslot_to_devfn(hpdev->desc.wslot.val);
644 
645 	bus_topo_lock();
646 
647 	pci_dev = pci_find_dbsf(hbus->pci_domain,
648 	    0, PCI_SLOT(devfn), PCI_FUNC(devfn));
649 	if (pci_dev)
650 		device_delete_child(hbus->pci_bus, pci_dev);
651 
652 	bus_topo_unlock();
653 
654 	mtx_lock(&hbus->device_list_lock);
655 	TAILQ_REMOVE(&hbus->children, hpdev, link);
656 	mtx_unlock(&hbus->device_list_lock);
657 
658 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
659 		hv_int_desc_free(hpdev, hid);
660 
661 	free(hpdev, M_DEVBUF);
662 }
663 
664 static struct hv_pci_dev *
665 new_pcichild_device(struct hv_pcibus *hbus, struct hv_pcidev_desc *desc)
666 {
667 	struct hv_pci_dev *hpdev;
668 	struct pci_child_message *res_req;
669 	struct q_res_req_compl comp_pkt;
670 	struct {
671 		struct pci_packet pkt;
672 		uint8_t buffer[sizeof(struct pci_child_message)];
673 	} ctxt;
674 	int ret;
675 
676 	hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
677 	hpdev->hbus = hbus;
678 
679 	TAILQ_INIT(&hpdev->irq_desc_list);
680 
681 	init_completion(&comp_pkt.host_event);
682 	comp_pkt.hpdev = hpdev;
683 
684 	ctxt.pkt.compl_ctxt = &comp_pkt;
685 	ctxt.pkt.completion_func = q_resource_requirements;
686 
687 	res_req = (struct pci_child_message *)&ctxt.pkt.message;
688 	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
689 	res_req->wslot.val = desc->wslot.val;
690 
691 	ret = vmbus_chan_send(hbus->sc->chan,
692 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
693 	    res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
694 	if (ret)
695 		goto err;
696 
697 	if (wait_for_response(hbus, &comp_pkt.host_event))
698 		goto err;
699 
700 	free_completion(&comp_pkt.host_event);
701 
702 	hpdev->desc = *desc;
703 
704 	mtx_lock(&hbus->device_list_lock);
705 	if (TAILQ_EMPTY(&hbus->children))
706 		hbus->pci_domain = desc->ser & 0xFFFF;
707 	TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
708 	mtx_unlock(&hbus->device_list_lock);
709 	return (hpdev);
710 err:
711 	free_completion(&comp_pkt.host_event);
712 	free(hpdev, M_DEVBUF);
713 	return (NULL);
714 }
715 
716 static int
717 pci_rescan(device_t dev)
718 {
719 	return (BUS_RESCAN(dev));
720 }
721 
722 static void
723 pci_devices_present_work(void *arg, int pending __unused)
724 {
725 	struct hv_dr_work *dr_wrk = arg;
726 	struct hv_dr_state *dr = NULL;
727 	struct hv_pcibus *hbus;
728 	uint32_t child_no;
729 	bool found;
730 	struct hv_pcidev_desc *new_desc;
731 	struct hv_pci_dev *hpdev, *tmp_hpdev;
732 	struct completion *query_comp;
733 	bool need_rescan = false;
734 
735 	hbus = dr_wrk->bus;
736 	free(dr_wrk, M_DEVBUF);
737 
738 	/* Pull this off the queue and process it if it was the last one. */
739 	mtx_lock(&hbus->device_list_lock);
740 	while (!TAILQ_EMPTY(&hbus->dr_list)) {
741 		dr = TAILQ_FIRST(&hbus->dr_list);
742 		TAILQ_REMOVE(&hbus->dr_list, dr, link);
743 
744 		/* Throw this away if the list still has stuff in it. */
745 		if (!TAILQ_EMPTY(&hbus->dr_list)) {
746 			free(dr, M_DEVBUF);
747 			continue;
748 		}
749 	}
750 	mtx_unlock(&hbus->device_list_lock);
751 
752 	if (!dr)
753 		return;
754 
755 	/* First, mark all existing children as reported missing. */
756 	mtx_lock(&hbus->device_list_lock);
757 	TAILQ_FOREACH(hpdev, &hbus->children, link)
758 		hpdev->reported_missing = true;
759 	mtx_unlock(&hbus->device_list_lock);
760 
761 	/* Next, add back any reported devices. */
762 	for (child_no = 0; child_no < dr->device_count; child_no++) {
763 		found = false;
764 		new_desc = &dr->func[child_no];
765 
766 		mtx_lock(&hbus->device_list_lock);
767 		TAILQ_FOREACH(hpdev, &hbus->children, link) {
768 			if ((hpdev->desc.wslot.val ==
769 			    new_desc->wslot.val) &&
770 			    (hpdev->desc.v_id == new_desc->v_id) &&
771 			    (hpdev->desc.d_id == new_desc->d_id) &&
772 			    (hpdev->desc.ser == new_desc->ser)) {
773 				hpdev->reported_missing = false;
774 				found = true;
775 				break;
776 			}
777 		}
778 		mtx_unlock(&hbus->device_list_lock);
779 
780 		if (!found) {
781 			if (!need_rescan)
782 				need_rescan = true;
783 
784 			hpdev = new_pcichild_device(hbus, new_desc);
785 			if (!hpdev)
786 				printf("vmbus_pcib: failed to add a child\n");
787 		}
788 	}
789 
790 	/* Remove missing device(s), if any */
791 	TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
792 		if (hpdev->reported_missing)
793 			hv_pci_delete_device(hpdev);
794 	}
795 
796 	/* Rescan the bus to find any new device, if necessary. */
797 	if (hbus->state == hv_pcibus_installed && need_rescan)
798 		pci_rescan(hbus->pci_bus);
799 
800 	/* Wake up hv_pci_query_relations(), if it's waiting. */
801 	query_comp = hbus->query_comp;
802 	if (query_comp) {
803 		hbus->query_comp = NULL;
804 		complete(query_comp);
805 	}
806 
807 	free(dr, M_DEVBUF);
808 }
809 
810 static struct hv_pci_dev *
811 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
812 {
813 	struct hv_pci_dev *hpdev, *ret = NULL;
814 
815 	mtx_lock(&hbus->device_list_lock);
816 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
817 		if (hpdev->desc.wslot.val == wslot) {
818 			ret = hpdev;
819 			break;
820 		}
821 	}
822 	mtx_unlock(&hbus->device_list_lock);
823 
824 	return (ret);
825 }
826 
827 static void
828 hv_pci_devices_present(struct hv_pcibus *hbus,
829     struct pci_bus_relations *relations)
830 {
831 	struct hv_dr_state *dr;
832 	struct hv_dr_work *dr_wrk;
833 	unsigned long dr_size;
834 
835 	if (hbus->detaching && relations->device_count > 0)
836 		return;
837 
838 	dr_size = offsetof(struct hv_dr_state, func) +
839 	    (sizeof(struct pci_func_desc) * relations->device_count);
840 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
841 
842 	dr->device_count = relations->device_count;
843 	if (dr->device_count != 0)
844 		memcpy(dr->func, relations->func,
845 		    sizeof(struct hv_pcidev_desc) * dr->device_count);
846 
847 	mtx_lock(&hbus->device_list_lock);
848 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
849 	mtx_unlock(&hbus->device_list_lock);
850 
851 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
852 	dr_wrk->bus = hbus;
853 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
854 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
855 }
856 
857 static void
858 hv_pci_devices_present2(struct hv_pcibus *hbus,
859     struct pci_bus_relations2 *relations)
860 {
861 	struct hv_dr_state *dr;
862 	struct hv_dr_work *dr_wrk;
863 	unsigned long dr_size;
864 
865 	if (hbus->detaching && relations->device_count > 0)
866 		return;
867 
868 	dr_size = offsetof(struct hv_dr_state, func) +
869 	    (sizeof(struct pci_func_desc2) * relations->device_count);
870 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
871 
872 	dr->device_count = relations->device_count;
873 	if (dr->device_count != 0)
874 		memcpy(dr->func, relations->func,
875 		    sizeof(struct pci_func_desc2) * dr->device_count);
876 
877 	mtx_lock(&hbus->device_list_lock);
878 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
879 	mtx_unlock(&hbus->device_list_lock);
880 
881 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
882 	dr_wrk->bus = hbus;
883 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
884 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
885 }
886 
887 static void
888 hv_eject_device_work(void *arg, int pending __unused)
889 {
890 	struct hv_pci_dev *hpdev = arg;
891 	union win_slot_encoding wslot = hpdev->desc.wslot;
892 	struct hv_pcibus *hbus = hpdev->hbus;
893 	struct pci_eject_response *eject_pkt;
894 	struct {
895 		struct pci_packet pkt;
896 		uint8_t buffer[sizeof(struct pci_eject_response)];
897 	} ctxt;
898 
899 	hv_pci_delete_device(hpdev);
900 
901 	memset(&ctxt, 0, sizeof(ctxt));
902 	eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
903 	eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
904 	eject_pkt->wslot.val = wslot.val;
905 	vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
906 	    eject_pkt, sizeof(*eject_pkt), 0);
907 }
908 
909 static void
910 hv_pci_eject_device(struct hv_pci_dev *hpdev)
911 {
912 	struct hv_pcibus *hbus = hpdev->hbus;
913 	struct taskqueue *taskq;
914 
915 	if (hbus->detaching)
916 		return;
917 
918 	/*
919 	 * Push this task into the same taskqueue on which
920 	 * vmbus_pcib_attach() runs, so we're sure this task can't run
921 	 * concurrently with vmbus_pcib_attach().
922 	 */
923 	TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
924 	taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
925 	taskqueue_enqueue(taskq, &hpdev->eject_task);
926 }
927 
928 #define PCIB_PACKET_SIZE	0x100
929 
930 static void
931 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
932 {
933 	struct vmbus_pcib_softc *sc = arg;
934 	struct hv_pcibus *hbus = sc->hbus;
935 
936 	void *buffer;
937 	int bufferlen = PCIB_PACKET_SIZE;
938 
939 	struct pci_packet *comp_packet;
940 	struct pci_response *response;
941 	struct pci_incoming_message *new_msg;
942 	struct pci_bus_relations *bus_rel;
943 	struct pci_bus_relations2 *bus_rel2;
944 	struct pci_dev_incoming *dev_msg;
945 	struct hv_pci_dev *hpdev;
946 
947 	buffer = sc->rx_buf;
948 	do {
949 		struct vmbus_chanpkt_hdr *pkt = buffer;
950 		uint32_t bytes_rxed;
951 		int ret;
952 
953 		bytes_rxed = bufferlen;
954 		ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
955 
956 		if (ret == ENOBUFS) {
957 			/* Handle large packet */
958 			if (bufferlen > PCIB_PACKET_SIZE) {
959 				free(buffer, M_DEVBUF);
960 				buffer = NULL;
961 			}
962 
963 			/* alloc new buffer */
964 			buffer =
965 			    malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
966 			bufferlen = bytes_rxed;
967 
968 			continue;
969 		}
970 
971 		if (ret != 0) {
972 			/* ignore EIO or EAGAIN */
973 			break;
974 		}
975 
976 		if (bytes_rxed <= sizeof(struct pci_response))
977 			continue;
978 
979 		switch (pkt->cph_type) {
980 		case VMBUS_CHANPKT_TYPE_COMP:
981 			comp_packet =
982 			    (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
983 			response = (struct pci_response *)pkt;
984 			comp_packet->completion_func(comp_packet->compl_ctxt,
985 			    response, bytes_rxed);
986 			break;
987 		case VMBUS_CHANPKT_TYPE_INBAND:
988 			new_msg = (struct pci_incoming_message *)buffer;
989 
990 			switch (new_msg->message_type.type) {
991 			case PCI_BUS_RELATIONS:
992 				bus_rel = (struct pci_bus_relations *)buffer;
993 
994 				if (bus_rel->device_count == 0)
995 					break;
996 
997 				if (bytes_rxed <
998 				    offsetof(struct pci_bus_relations, func) +
999 				        (sizeof(struct pci_func_desc) *
1000 				            (bus_rel->device_count)))
1001 					break;
1002 
1003 				hv_pci_devices_present(hbus, bus_rel);
1004 				break;
1005 
1006 			case PCI_BUS_RELATIONS2:
1007 				bus_rel2 = (struct pci_bus_relations2 *)buffer;
1008 
1009 				if (bus_rel2->device_count == 0)
1010 					break;
1011 
1012 				if (bytes_rxed <
1013 				    offsetof(struct pci_bus_relations2, func) +
1014 				    (sizeof(struct pci_func_desc2) *
1015 				    (bus_rel2->device_count)))
1016 					break;
1017 
1018 				hv_pci_devices_present2(hbus, bus_rel2);
1019 
1020 			case PCI_EJECT:
1021 				dev_msg = (struct pci_dev_incoming *)buffer;
1022 				hpdev = get_pcichild_wslot(hbus,
1023 				    dev_msg->wslot.val);
1024 
1025 				if (hpdev)
1026 					hv_pci_eject_device(hpdev);
1027 
1028 				break;
1029 			default:
1030 				printf("vmbus_pcib: Unknown msg type 0x%x\n",
1031 				    new_msg->message_type.type);
1032 				break;
1033 			}
1034 			break;
1035 		default:
1036 			printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
1037 			    pkt->cph_type);
1038 			break;
1039 		}
1040 	} while (1);
1041 
1042 	if (bufferlen > PCIB_PACKET_SIZE)
1043 		free(buffer, M_DEVBUF);
1044 }
1045 
1046 static int
1047 hv_pci_protocol_negotiation(struct hv_pcibus *hbus,
1048     enum pci_protocol_version_t version[],
1049     int num_version)
1050 {
1051 	struct pci_version_request *version_req;
1052 	struct hv_pci_compl comp_pkt;
1053 	struct {
1054 		struct pci_packet pkt;
1055 		uint8_t buffer[sizeof(struct pci_version_request)];
1056 	} ctxt;
1057 	int ret;
1058 	int i;
1059 
1060 	init_completion(&comp_pkt.host_event);
1061 
1062 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1063 	ctxt.pkt.compl_ctxt = &comp_pkt;
1064 	version_req = (struct pci_version_request *)&ctxt.pkt.message;
1065 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1066 
1067 	for(i=0; i< num_version; i++) {
1068 		version_req->protocol_version = version[i];
1069 		ret = vmbus_chan_send(hbus->sc->chan,
1070 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1071 		    version_req, sizeof(*version_req),
1072 		    (uint64_t)(uintptr_t)&ctxt.pkt);
1073 		if (!ret)
1074 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1075 
1076 		if (ret) {
1077 			device_printf(hbus->pcib,
1078 				"vmbus_pcib failed to request version: %d\n",
1079 				ret);
1080 			goto out;
1081 		}
1082 
1083 		if (comp_pkt.completion_status >= 0) {
1084 			hbus->protocol_version = version[i];
1085 			device_printf(hbus->pcib,
1086 				"PCI VMBus using version 0x%x\n",
1087 				hbus->protocol_version);
1088 			ret = 0;
1089 			goto out;
1090 		}
1091 
1092 		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
1093 			device_printf(hbus->pcib,
1094 				"vmbus_pcib version negotiation failed: %x\n",
1095 				comp_pkt.completion_status);
1096 			ret = EPROTO;
1097 			goto out;
1098 		}
1099 		reinit_completion(&comp_pkt.host_event);
1100 	}
1101 
1102 	device_printf(hbus->pcib,
1103 		"PCI pass-trhpugh VSP failed to find supported version\n");
1104 out:
1105 	free_completion(&comp_pkt.host_event);
1106 	return (ret);
1107 }
1108 
1109 /* Ask the host to send along the list of child devices */
1110 static int
1111 hv_pci_query_relations(struct hv_pcibus *hbus)
1112 {
1113 	struct pci_message message;
1114 	int ret;
1115 
1116 	message.type = PCI_QUERY_BUS_RELATIONS;
1117 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1118 	    &message, sizeof(message), 0);
1119 	return (ret);
1120 }
1121 
1122 static int
1123 hv_pci_enter_d0(struct hv_pcibus *hbus)
1124 {
1125 	struct pci_bus_d0_entry *d0_entry;
1126 	struct hv_pci_compl comp_pkt;
1127 	struct {
1128 		struct pci_packet pkt;
1129 		uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1130 	} ctxt;
1131 	int ret;
1132 
1133 	/*
1134 	 * Tell the host that the bus is ready to use, and moved into the
1135 	 * powered-on state.  This includes telling the host which region
1136 	 * of memory-mapped I/O space has been chosen for configuration space
1137 	 * access.
1138 	 */
1139 	init_completion(&comp_pkt.host_event);
1140 
1141 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1142 	ctxt.pkt.compl_ctxt = &comp_pkt;
1143 
1144 	d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1145 	memset(d0_entry, 0, sizeof(*d0_entry));
1146 	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1147 	d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1148 
1149 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1150 	    VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1151 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1152 	if (!ret)
1153 		ret = wait_for_response(hbus, &comp_pkt.host_event);
1154 
1155 	if (ret)
1156 		goto out;
1157 
1158 	if (comp_pkt.completion_status < 0) {
1159 		device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1160 		ret = EPROTO;
1161 	} else {
1162 		ret = 0;
1163 	}
1164 
1165 out:
1166 	free_completion(&comp_pkt.host_event);
1167 	return (ret);
1168 }
1169 
1170 /*
1171  * It looks this is only needed by Windows VM, but let's send the message too
1172  * just to make the host happy.
1173  */
1174 static int
1175 hv_send_resources_allocated(struct hv_pcibus *hbus)
1176 {
1177 	struct pci_resources_assigned *res_assigned;
1178 	struct pci_resources_assigned2 *res_assigned2;
1179 	struct hv_pci_compl comp_pkt;
1180 	struct hv_pci_dev *hpdev;
1181 	struct pci_packet *pkt;
1182 	uint32_t wslot;
1183 	int ret = 0;
1184 	size_t size_res;
1185 
1186 	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4)
1187 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
1188 	pkt = malloc(sizeof(*pkt) + size_res,
1189 	    M_DEVBUF, M_WAITOK | M_ZERO);
1190 
1191 	for (wslot = 0; wslot < 256; wslot++) {
1192 		hpdev = get_pcichild_wslot(hbus, wslot);
1193 		if (!hpdev)
1194 			continue;
1195 
1196 		init_completion(&comp_pkt.host_event);
1197 
1198 		memset(pkt, 0, sizeof(*pkt) + size_res);
1199 		pkt->completion_func = hv_pci_generic_compl;
1200 		pkt->compl_ctxt = &comp_pkt;
1201 
1202 		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) {
1203 			res_assigned =
1204 			    (struct pci_resources_assigned *)&pkt->message;
1205 			res_assigned->message_type.type =
1206 			    PCI_RESOURCES_ASSIGNED;
1207 			res_assigned->wslot.val = hpdev->desc.wslot.val;
1208 		} else {
1209 			res_assigned2 =
1210 			    (struct pci_resources_assigned2 *)&pkt->message;
1211 			res_assigned2->message_type.type =
1212 			    PCI_RESOURCES_ASSIGNED2;
1213 			res_assigned2->wslot.val = hpdev->desc.wslot.val;
1214 		}
1215 
1216 		ret = vmbus_chan_send(hbus->sc->chan,
1217 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1218 		    &pkt->message, size_res,
1219 		    (uint64_t)(uintptr_t)pkt);
1220 		if (!ret)
1221 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1222 
1223 		free_completion(&comp_pkt.host_event);
1224 
1225 		if (ret)
1226 			break;
1227 
1228 		if (comp_pkt.completion_status < 0) {
1229 			ret = EPROTO;
1230 			device_printf(hbus->pcib,
1231 			    "failed to send PCI_RESOURCES_ASSIGNED\n");
1232 			break;
1233 		}
1234 	}
1235 
1236 	free(pkt, M_DEVBUF);
1237 	return (ret);
1238 }
1239 
1240 static int
1241 hv_send_resources_released(struct hv_pcibus *hbus)
1242 {
1243 	struct pci_child_message pkt;
1244 	struct hv_pci_dev *hpdev;
1245 	uint32_t wslot;
1246 	int ret;
1247 
1248 	for (wslot = 0; wslot < 256; wslot++) {
1249 		hpdev = get_pcichild_wslot(hbus, wslot);
1250 		if (!hpdev)
1251 			continue;
1252 
1253 		pkt.message_type.type = PCI_RESOURCES_RELEASED;
1254 		pkt.wslot.val = hpdev->desc.wslot.val;
1255 
1256 		ret = vmbus_chan_send(hbus->sc->chan,
1257 		    VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1258 		if (ret)
1259 			return (ret);
1260 	}
1261 
1262 	return (0);
1263 }
1264 
1265 #define hv_cfg_read(x, s)						\
1266 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus,	\
1267     bus_size_t offset)							\
1268 {									\
1269 	return (bus_read_##s(bus->cfg_res, offset));			\
1270 }
1271 
1272 #define hv_cfg_write(x, s)						\
1273 static inline void hv_cfg_write_##s(struct hv_pcibus *bus,		\
1274     bus_size_t offset, uint##x##_t val)					\
1275 {									\
1276 	return (bus_write_##s(bus->cfg_res, offset, val));		\
1277 }
1278 
1279 hv_cfg_read(8, 1)
1280 hv_cfg_read(16, 2)
1281 hv_cfg_read(32, 4)
1282 
1283 hv_cfg_write(8, 1)
1284 hv_cfg_write(16, 2)
1285 hv_cfg_write(32, 4)
1286 
1287 static void
1288 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1289     uint32_t *val)
1290 {
1291 	struct hv_pcibus *hbus = hpdev->hbus;
1292 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1293 
1294 	/*
1295 	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1296 	 */
1297 	if (where + size <= PCIR_COMMAND) {
1298 		memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1299 	} else if (where >= PCIR_REVID && where + size <=
1300 		   PCIR_CACHELNSZ) {
1301 		memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1302 		       PCIR_REVID, size);
1303 	} else if (where >= PCIR_SUBVEND_0 && where + size <=
1304 		   PCIR_BIOS) {
1305 		memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1306 		       PCIR_SUBVEND_0, size);
1307 	} else if (where >= PCIR_BIOS && where + size <=
1308 		   PCIR_CAP_PTR) {
1309 		/* ROM BARs are unimplemented */
1310 		*val = 0;
1311 	} else if ((where >= PCIR_INTLINE && where + size <=
1312 		   PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1313 		/*
1314 		 * Interrupt Line and Interrupt PIN are hard-wired to zero
1315 		 * because this front-end only supports message-signaled
1316 		 * interrupts.
1317 		 */
1318 		*val = 0;
1319 	} else if (where + size <= CFG_PAGE_SIZE) {
1320 		mtx_lock(&hbus->config_lock);
1321 
1322 		/* Choose the function to be read. */
1323 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1324 
1325 		/* Make sure the function was chosen before we start reading.*/
1326 		mb();
1327 
1328 		/* Read from that function's config space. */
1329 		switch (size) {
1330 		case 1:
1331 			*((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1332 			break;
1333 		case 2:
1334 			*((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1335 			break;
1336 		default:
1337 			*((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1338 			break;
1339 		}
1340 		/*
1341 		 * Make sure the write was done before we release the lock,
1342 		 * allowing consecutive reads/writes.
1343 		 */
1344 		mb();
1345 
1346 		mtx_unlock(&hbus->config_lock);
1347 	} else {
1348 		/* Invalid config read: it's unlikely to reach here. */
1349 		memset(val, 0, size);
1350 	}
1351 }
1352 
1353 static void
1354 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1355     uint32_t val)
1356 {
1357 	struct hv_pcibus *hbus = hpdev->hbus;
1358 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1359 
1360 	/* SSIDs and ROM BARs are read-only */
1361 	if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1362 		return;
1363 
1364 	if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1365 		mtx_lock(&hbus->config_lock);
1366 
1367 		/* Choose the function to be written. */
1368 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1369 
1370 		/* Make sure the function was chosen before we start writing.*/
1371 		wmb();
1372 
1373 		/* Write to that function's config space. */
1374 		switch (size) {
1375 		case 1:
1376 			hv_cfg_write_1(hbus, addr, (uint8_t)val);
1377 			break;
1378 		case 2:
1379 			hv_cfg_write_2(hbus, addr, (uint16_t)val);
1380 			break;
1381 		default:
1382 			hv_cfg_write_4(hbus, addr, (uint32_t)val);
1383 			break;
1384 		}
1385 
1386 		/*
1387 		 * Make sure the write was done before we release the lock,
1388 		 * allowing consecutive reads/writes.
1389 		 */
1390 		mb();
1391 
1392 		mtx_unlock(&hbus->config_lock);
1393 	} else {
1394 		/* Invalid config write: it's unlikely to reach here. */
1395 		return;
1396 	}
1397 }
1398 
1399 /*
1400  * The vPCI in some Hyper-V releases do not initialize the last 4
1401  * bit of BAR registers. This could result weird problems causing PCI
1402  * code fail to configure BAR correctly.
1403  *
1404  * Just write all 1's to those BARs whose probed values are not zero.
1405  * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate
1406  * correctly.
1407  */
1408 
1409 static void
1410 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus)
1411 {
1412 	struct hv_pci_dev *hpdev;
1413 	int i;
1414 
1415 	mtx_lock(&hbus->device_list_lock);
1416 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
1417 		for (i = 0; i < 6; i++) {
1418 			/* Ignore empty bar */
1419 			if (hpdev->probed_bar[i] == 0)
1420 				continue;
1421 
1422 			uint32_t bar_val = 0;
1423 
1424 			_hv_pcifront_read_config(hpdev, PCIR_BAR(i),
1425 			    4, &bar_val);
1426 
1427 			if (hpdev->probed_bar[i] != bar_val) {
1428 				if (bootverbose)
1429 					printf("vmbus_pcib: initialize bar %d "
1430 					    "by writing all 1s\n", i);
1431 
1432 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1433 				    4, 0xffffffff);
1434 
1435 				/* Now write the original value back */
1436 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1437 				    4, bar_val);
1438 			}
1439 		}
1440 	}
1441 	mtx_unlock(&hbus->device_list_lock);
1442 }
1443 
1444 static void
1445 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1446 {
1447 	struct hv_pcibus *hbus = arg;
1448 
1449 	atomic_set_int(&hbus->detaching, 1);
1450 }
1451 
1452 static void
1453 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1454 {
1455 	struct task task;
1456 
1457 	TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1458 
1459 	/*
1460 	 * Make sure the channel callback won't push any possible new
1461 	 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1462 	 */
1463 	vmbus_chan_run_task(hbus->sc->chan, &task);
1464 
1465 	taskqueue_drain_all(hbus->sc->taskq);
1466 }
1467 
1468 
1469 /*
1470  * Standard probe entry point.
1471  *
1472  */
1473 static int
1474 vmbus_pcib_probe(device_t dev)
1475 {
1476 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1477 	    &g_pass_through_dev_type) == 0) {
1478 		device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1479 		return (BUS_PROBE_DEFAULT);
1480 	}
1481 	return (ENXIO);
1482 }
1483 
1484 /*
1485  * Standard attach entry point.
1486  *
1487  */
1488 static int
1489 vmbus_pcib_attach(device_t dev)
1490 {
1491 	const int pci_ring_size = (4 * PAGE_SIZE);
1492 	const struct hyperv_guid *inst_guid;
1493 	struct vmbus_channel *channel;
1494 	struct vmbus_pcib_softc *sc;
1495 	struct hv_pcibus *hbus;
1496 	int rid = 0;
1497 	int ret;
1498 
1499 	hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1500 	hbus->pcib = dev;
1501 
1502 	channel = vmbus_get_channel(dev);
1503 	inst_guid = vmbus_chan_guid_inst(channel);
1504 	hbus->pci_domain = inst_guid->hv_guid[9] |
1505 			  (inst_guid->hv_guid[8] << 8);
1506 
1507 	mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1508 	mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1509 	TAILQ_INIT(&hbus->children);
1510 	TAILQ_INIT(&hbus->dr_list);
1511 
1512 	hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1513 	    0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1514 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1515 
1516 	if (!hbus->cfg_res) {
1517 		device_printf(dev, "failed to get resource for cfg window\n");
1518 		ret = ENXIO;
1519 		goto free_bus;
1520 	}
1521 
1522 	sc = device_get_softc(dev);
1523 	sc->chan = channel;
1524 	sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1525 	sc->hbus = hbus;
1526 
1527 	/*
1528 	 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1529 	 * messages. NB: we can't handle the messages in the channel callback
1530 	 * directly, because the message handlers need to send new messages
1531 	 * to the host and waits for the host's completion messages, which
1532 	 * must also be handled by the channel callback.
1533 	 */
1534 	sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1535 	    taskqueue_thread_enqueue, &sc->taskq);
1536 	taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1537 
1538 	hbus->sc = sc;
1539 
1540 	init_completion(&hbus->query_completion);
1541 	hbus->query_comp = &hbus->query_completion;
1542 
1543 	ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1544 		NULL, 0, vmbus_pcib_on_channel_callback, sc);
1545 	if (ret)
1546 		goto free_res;
1547 
1548 	ret = hv_pci_protocol_negotiation(hbus, pci_protocol_versions,
1549 	    ARRAY_SIZE(pci_protocol_versions));
1550 	if (ret)
1551 		goto vmbus_close;
1552 
1553 	ret = hv_pci_query_relations(hbus);
1554 	if (!ret)
1555 		ret = wait_for_response(hbus, hbus->query_comp);
1556 
1557 	if (ret)
1558 		goto vmbus_close;
1559 
1560 	ret = hv_pci_enter_d0(hbus);
1561 	if (ret)
1562 		goto vmbus_close;
1563 
1564 	ret = hv_send_resources_allocated(hbus);
1565 	if (ret)
1566 		goto vmbus_close;
1567 
1568 	vmbus_pcib_prepopulate_bars(hbus);
1569 
1570 	hbus->pci_bus = device_add_child(dev, "pci", -1);
1571 	if (!hbus->pci_bus) {
1572 		device_printf(dev, "failed to create pci bus\n");
1573 		ret = ENXIO;
1574 		goto vmbus_close;
1575 	}
1576 
1577 	bus_generic_attach(dev);
1578 
1579 	hbus->state = hv_pcibus_installed;
1580 
1581 	return (0);
1582 
1583 vmbus_close:
1584 	vmbus_pcib_pre_detach(hbus);
1585 	vmbus_chan_close(sc->chan);
1586 free_res:
1587 	taskqueue_free(sc->taskq);
1588 	free_completion(&hbus->query_completion);
1589 	free(sc->rx_buf, M_DEVBUF);
1590 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1591 free_bus:
1592 	mtx_destroy(&hbus->device_list_lock);
1593 	mtx_destroy(&hbus->config_lock);
1594 	free(hbus, M_DEVBUF);
1595 	return (ret);
1596 }
1597 
1598 /*
1599  * Standard detach entry point
1600  */
1601 static int
1602 vmbus_pcib_detach(device_t dev)
1603 {
1604 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1605 	struct hv_pcibus *hbus = sc->hbus;
1606 	struct pci_message teardown_packet;
1607 	struct pci_bus_relations relations;
1608 	int ret;
1609 
1610 	vmbus_pcib_pre_detach(hbus);
1611 
1612 	if (hbus->state == hv_pcibus_installed)
1613 		bus_generic_detach(dev);
1614 
1615 	/* Delete any children which might still exist. */
1616 	memset(&relations, 0, sizeof(relations));
1617 	hv_pci_devices_present(hbus, &relations);
1618 
1619 	ret = hv_send_resources_released(hbus);
1620 	if (ret)
1621 		device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1622 
1623 	teardown_packet.type = PCI_BUS_D0EXIT;
1624 	ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1625 	    &teardown_packet, sizeof(struct pci_message), 0);
1626 	if (ret)
1627 		device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1628 
1629 	taskqueue_drain_all(hbus->sc->taskq);
1630 	vmbus_chan_close(sc->chan);
1631 	taskqueue_free(sc->taskq);
1632 
1633 	free_completion(&hbus->query_completion);
1634 	free(sc->rx_buf, M_DEVBUF);
1635 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1636 
1637 	mtx_destroy(&hbus->device_list_lock);
1638 	mtx_destroy(&hbus->config_lock);
1639 	free(hbus, M_DEVBUF);
1640 
1641 	return (0);
1642 }
1643 
1644 static int
1645 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1646 {
1647 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1648 
1649 	switch (which) {
1650 	case PCIB_IVAR_DOMAIN:
1651 		*val = sc->hbus->pci_domain;
1652 		return (0);
1653 
1654 	case PCIB_IVAR_BUS:
1655 		/* There is only bus 0. */
1656 		*val = 0;
1657 		return (0);
1658 	}
1659 	return (ENOENT);
1660 }
1661 
1662 static int
1663 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1664 {
1665 	return (ENOENT);
1666 }
1667 
1668 static struct resource *
1669 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1670 	rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1671 {
1672 	unsigned int bar_no;
1673 	struct hv_pci_dev *hpdev;
1674 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1675 	struct resource *res;
1676 	unsigned int devfn;
1677 
1678 	if (type == PCI_RES_BUS)
1679 		return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1680 		    start, end, count, flags));
1681 
1682 	/* Devices with port I/O BAR are not supported. */
1683 	if (type == SYS_RES_IOPORT)
1684 		return (NULL);
1685 
1686 	if (type == SYS_RES_MEMORY) {
1687 		devfn = PCI_DEVFN(pci_get_slot(child),
1688 		    pci_get_function(child));
1689 		hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1690 		if (!hpdev)
1691 			return (NULL);
1692 
1693 		bar_no = PCI_RID2BAR(*rid);
1694 		if (bar_no >= MAX_NUM_BARS)
1695 			return (NULL);
1696 
1697 		/* Make sure a 32-bit BAR gets a 32-bit address */
1698 		if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1699 			end = ulmin(end, 0xFFFFFFFF);
1700 	}
1701 
1702 	res = bus_generic_alloc_resource(dev, child, type, rid,
1703 		start, end, count, flags);
1704 	/*
1705 	 * If this is a request for a specific range, assume it is
1706 	 * correct and pass it up to the parent.
1707 	 */
1708 	if (res == NULL && start + count - 1 == end)
1709 		res = bus_generic_alloc_resource(dev, child, type, rid,
1710 		    start, end, count, flags);
1711 	if (res) {
1712 		device_printf(dev,"vmbus_pcib_alloc_resource is successful\n");
1713 	}
1714 	return (res);
1715 }
1716 
1717 static int
1718 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid,
1719     struct resource *r)
1720 {
1721 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1722 
1723 	if (type == PCI_RES_BUS)
1724 		return (pci_domain_release_bus(sc->hbus->pci_domain, child,
1725 		    rid, r));
1726 
1727 	if (type == SYS_RES_IOPORT)
1728 		return (EINVAL);
1729 
1730 	return (bus_generic_release_resource(dev, child, type, rid, r));
1731 }
1732 
1733 static int
1734 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1735     size_t setsize, cpuset_t *cpuset)
1736 {
1737 	return (bus_get_cpus(pcib, op, setsize, cpuset));
1738 }
1739 
1740 static uint32_t
1741 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1742     u_int reg, int bytes)
1743 {
1744 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1745 	struct hv_pci_dev *hpdev;
1746 	unsigned int devfn = PCI_DEVFN(slot, func);
1747 	uint32_t data = 0;
1748 
1749 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1750 
1751 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1752 	if (!hpdev)
1753 		return (~0);
1754 
1755 	_hv_pcifront_read_config(hpdev, reg, bytes, &data);
1756 
1757 	return (data);
1758 }
1759 
1760 static void
1761 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1762     u_int reg, uint32_t data, int bytes)
1763 {
1764 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1765 	struct hv_pci_dev *hpdev;
1766 	unsigned int devfn = PCI_DEVFN(slot, func);
1767 
1768 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1769 
1770 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1771 	if (!hpdev)
1772 		return;
1773 
1774 	_hv_pcifront_write_config(hpdev, reg, bytes, data);
1775 }
1776 
1777 static int
1778 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1779 {
1780 	/* We only support MSI/MSI-X and don't support INTx interrupt. */
1781 	return (PCI_INVALID_IRQ);
1782 }
1783 
1784 static int
1785 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1786     int maxcount, int *irqs)
1787 {
1788 #if defined(__amd64__) || defined(__i386__)
1789 	return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1790 	    irqs));
1791 #endif
1792 #if defined(__aarch64__)
1793 	return (intr_alloc_msi(pcib, dev, ACPI_MSI_XREF, count, maxcount,
1794 	    irqs));
1795 #endif
1796 }
1797 
1798 static int
1799 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1800 {
1801 #if defined(__amd64__) || defined(__i386__)
1802 	return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1803 #endif
1804 #if defined(__aarch64__)
1805 	return(intr_release_msi(pcib, dev, ACPI_MSI_XREF, count, irqs));
1806 #endif
1807 }
1808 
1809 static int
1810 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1811 {
1812 #if defined(__aarch64__)
1813 	int ret;
1814 #if defined(INTRNG)
1815 	ret = intr_alloc_msix(pcib, dev, ACPI_MSI_XREF, irq);
1816 	return ret;
1817 #else
1818     return (ENXIO);
1819 #endif
1820 #else
1821 	return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1822 #endif /* __aarch64__ */
1823 }
1824 
1825 static int
1826 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1827 {
1828 #if defined(__aarch64__)
1829 	return (intr_release_msix(pcib, dev, ACPI_MSI_XREF, irq));
1830 #else
1831 	return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1832 #endif /* __aarch64__ */
1833 }
1834 
1835 #if defined(__aarch64__)
1836 #define	MSI_INTEL_ADDR_DEST	0x00000000
1837 #define	MSI_INTEL_DATA_DELFIXED 0x0
1838 #endif
1839 #if defined(__amd64__) || defined(__i386__)
1840 #define MSI_INTEL_ADDR_DEST 0x000ff000
1841 #define MSI_INTEL_DATA_INTVEC   IOART_INTVEC    /* Interrupt vector. */
1842 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1843 #endif
1844 
1845 static int
1846 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1847     uint64_t *addr, uint32_t *data)
1848 {
1849 	unsigned int devfn;
1850 	struct hv_pci_dev *hpdev;
1851 
1852 	uint64_t v_addr;
1853 	uint32_t v_data;
1854 	struct hv_irq_desc *hid, *tmp_hid;
1855 	unsigned int cpu, vcpu_id;
1856 	unsigned int vector;
1857 
1858 	struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1859 	struct compose_comp_ctxt comp;
1860 	struct {
1861 		struct pci_packet pkt;
1862 		union {
1863 			struct pci_create_interrupt v1;
1864 			struct pci_create_interrupt3 v3;
1865 		}int_pkts;
1866 	} ctxt;
1867 	int ret;
1868 	uint32_t size;
1869 
1870 	devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1871 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1872 	if (!hpdev)
1873 		return (ENOENT);
1874 #if defined(__aarch64__)
1875 	ret = intr_map_msi(pcib, child, ACPI_MSI_XREF, irq,
1876 	    &v_addr, &v_data);
1877 #else
1878 	ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
1879             &v_addr, &v_data);
1880 #endif
1881 	if (ret)
1882 		return (ret);
1883 
1884 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
1885 		if (hid->irq == irq) {
1886 			TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
1887 			hv_int_desc_free(hpdev, hid);
1888 			break;
1889 		}
1890 	}
1891 
1892 #if defined(__aarch64__)
1893 	cpu = 0;
1894 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1895 	vector = v_data;
1896 #else
1897 	cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12;
1898 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1899 	vector = v_data & MSI_INTEL_DATA_INTVEC;
1900 #endif
1901 
1902 	init_completion(&comp.comp_pkt.host_event);
1903 
1904 	memset(&ctxt, 0, sizeof(ctxt));
1905 	ctxt.pkt.completion_func = hv_pci_compose_compl;
1906 	ctxt.pkt.compl_ctxt = &comp;
1907 	switch (hpdev->hbus->protocol_version) {
1908 	case PCI_PROTOCOL_VERSION_1_1:
1909 		ctxt.int_pkts.v1.message_type.type =
1910 		    PCI_CREATE_INTERRUPT_MESSAGE;
1911 		ctxt.int_pkts.v1.wslot.val = hpdev->desc.wslot.val;
1912 		ctxt.int_pkts.v1.int_desc.vector = vector;
1913 		ctxt.int_pkts.v1.int_desc.vector_count = 1;
1914 		ctxt.int_pkts.v1.int_desc.delivery_mode =
1915 		    MSI_INTEL_DATA_DELFIXED;
1916 		ctxt.int_pkts.v1.int_desc.cpu_mask = 1ULL << vcpu_id;
1917 		size = sizeof(ctxt.int_pkts.v1);
1918 		break;
1919 
1920 	case PCI_PROTOCOL_VERSION_1_4:
1921 		ctxt.int_pkts.v3.message_type.type =
1922 		    PCI_CREATE_INTERRUPT_MESSAGE3;
1923 		ctxt.int_pkts.v3.wslot.val = hpdev->desc.wslot.val;
1924 		ctxt.int_pkts.v3.int_desc.vector = vector;
1925 		ctxt.int_pkts.v3.int_desc.vector_count = 1;
1926 		ctxt.int_pkts.v3.int_desc.reserved = 0;
1927 		ctxt.int_pkts.v3.int_desc.delivery_mode =
1928 		    MSI_INTEL_DATA_DELFIXED;
1929 		ctxt.int_pkts.v3.int_desc.processor_count = 1;
1930 		ctxt.int_pkts.v3.int_desc.processor_array[0] = vcpu_id;
1931 		size = sizeof(ctxt.int_pkts.v3);
1932 		break;
1933 	}
1934 	ret = vmbus_chan_send(sc->chan,	VMBUS_CHANPKT_TYPE_INBAND,
1935 	    VMBUS_CHANPKT_FLAG_RC, &ctxt.int_pkts, size,
1936 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1937 	if (ret) {
1938 		free_completion(&comp.comp_pkt.host_event);
1939 		return (ret);
1940 	}
1941 
1942 	wait_for_completion(&comp.comp_pkt.host_event);
1943 	free_completion(&comp.comp_pkt.host_event);
1944 
1945 	if (comp.comp_pkt.completion_status < 0) {
1946 		device_printf(pcib,
1947 		    "vmbus_pcib_map_msi completion_status %d\n",
1948 		    comp.comp_pkt.completion_status);
1949 		return (EPROTO);
1950 	}
1951 
1952 	*addr = comp.int_desc.address;
1953 	*data = comp.int_desc.data;
1954 
1955 	hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
1956 	hid->irq = irq;
1957 	hid->desc = comp.int_desc;
1958 	TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
1959 
1960 	return (0);
1961 }
1962 
1963 static device_method_t vmbus_pcib_methods[] = {
1964 	/* Device interface */
1965 	DEVMETHOD(device_probe,         vmbus_pcib_probe),
1966 	DEVMETHOD(device_attach,        vmbus_pcib_attach),
1967 	DEVMETHOD(device_detach,        vmbus_pcib_detach),
1968 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
1969 	DEVMETHOD(device_suspend,	bus_generic_suspend),
1970 	DEVMETHOD(device_resume,	bus_generic_resume),
1971 
1972 	/* Bus interface */
1973 	DEVMETHOD(bus_read_ivar,		vmbus_pcib_read_ivar),
1974 	DEVMETHOD(bus_write_ivar,		vmbus_pcib_write_ivar),
1975 	DEVMETHOD(bus_alloc_resource,		vmbus_pcib_alloc_resource),
1976 	DEVMETHOD(bus_release_resource,		vmbus_pcib_release_resource),
1977 	DEVMETHOD(bus_activate_resource,   bus_generic_activate_resource),
1978 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1979 	DEVMETHOD(bus_setup_intr,	   bus_generic_setup_intr),
1980 	DEVMETHOD(bus_teardown_intr,	   bus_generic_teardown_intr),
1981 	DEVMETHOD(bus_get_cpus,			vmbus_pcib_get_cpus),
1982 
1983 	/* pcib interface */
1984 	DEVMETHOD(pcib_maxslots,		pcib_maxslots),
1985 	DEVMETHOD(pcib_read_config,		vmbus_pcib_read_config),
1986 	DEVMETHOD(pcib_write_config,		vmbus_pcib_write_config),
1987 	DEVMETHOD(pcib_route_interrupt,		vmbus_pcib_route_intr),
1988 	DEVMETHOD(pcib_alloc_msi,		vmbus_pcib_alloc_msi),
1989 	DEVMETHOD(pcib_release_msi,		vmbus_pcib_release_msi),
1990 	DEVMETHOD(pcib_alloc_msix,		vmbus_pcib_alloc_msix),
1991 	DEVMETHOD(pcib_release_msix,		vmbus_pcib_release_msix),
1992 	DEVMETHOD(pcib_map_msi,			vmbus_pcib_map_msi),
1993 	DEVMETHOD(pcib_request_feature,		pcib_request_feature_allow),
1994 
1995 	DEVMETHOD_END
1996 };
1997 
1998 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
1999 		sizeof(struct vmbus_pcib_softc));
2000 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0);
2001 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
2002 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
2003 
2004 #endif /* NEW_PCIB */
2005