xref: /freebsd/sys/dev/hyperv/pcib/vmbus_pcib.c (revision a1f8a0c793c67ab5854035e017f34d3d016b6d0d)
1 /*-
2  * Copyright (c) 2016-2017 Microsoft Corp.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #ifdef NEW_PCIB
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/types.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/queue.h>
40 #include <sys/lock.h>
41 #include <sys/sx.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46 #include <sys/mutex.h>
47 #include <sys/errno.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_kern.h>
52 #include <vm/pmap.h>
53 
54 #if defined(__aarch64__)
55 #include <arm64/include/intr.h>
56 #endif
57 #include <machine/atomic.h>
58 #include <machine/bus.h>
59 #include <machine/frame.h>
60 #include <machine/pci_cfgreg.h>
61 #include <machine/resource.h>
62 
63 #include <sys/pciio.h>
64 #include <dev/pci/pcireg.h>
65 #include <dev/pci/pcivar.h>
66 #include <dev/pci/pci_private.h>
67 #include <dev/pci/pcib_private.h>
68 #include "pcib_if.h"
69 #if defined(__i386__) || defined(__amd64__)
70 #include <machine/intr_machdep.h>
71 #include <x86/apicreg.h>
72 #endif
73 #if defined(__aarch64__)
74 #include <contrib/dev/acpica/include/acpi.h>
75 #include <contrib/dev/acpica/include/accommon.h>
76 #include <dev/acpica/acpivar.h>
77 #include <dev/acpica/acpi_pcibvar.h>
78 #endif
79 #include <dev/hyperv/include/hyperv.h>
80 #include <dev/hyperv/include/hyperv_busdma.h>
81 #include <dev/hyperv/include/vmbus_xact.h>
82 #include <dev/hyperv/vmbus/vmbus_reg.h>
83 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
84 
85 #include "vmbus_if.h"
86 
87 #if __FreeBSD_version < 1100000
88 typedef u_long rman_res_t;
89 #define RM_MAX_END	(~(rman_res_t)0)
90 #endif
91 
92 struct completion {
93 	unsigned int done;
94 	struct mtx lock;
95 };
96 
97 static void
98 init_completion(struct completion *c)
99 {
100 	memset(c, 0, sizeof(*c));
101 	mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
102 	c->done = 0;
103 }
104 static void
105 reinit_completion(struct completion *c)
106 {
107 	c->done = 0;
108 }
109 static void
110 free_completion(struct completion *c)
111 {
112 	mtx_destroy(&c->lock);
113 }
114 
115 static void
116 complete(struct completion *c)
117 {
118 	mtx_lock(&c->lock);
119 	c->done++;
120 	mtx_unlock(&c->lock);
121 	wakeup(c);
122 }
123 
124 static void
125 wait_for_completion(struct completion *c)
126 {
127 	mtx_lock(&c->lock);
128 	while (c->done == 0)
129 		mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
130 	c->done--;
131 	mtx_unlock(&c->lock);
132 }
133 
134 /*
135  * Return: 0 if completed, a non-zero value if timed out.
136  */
137 static int
138 wait_for_completion_timeout(struct completion *c, int timeout)
139 {
140 	int ret;
141 
142 	mtx_lock(&c->lock);
143 
144 	if (c->done == 0)
145 		mtx_sleep(c, &c->lock, 0, "hvwfc", timeout);
146 
147 	if (c->done > 0) {
148 		c->done--;
149 		ret = 0;
150 	} else {
151 		ret = 1;
152 	}
153 
154 	mtx_unlock(&c->lock);
155 
156 	return (ret);
157 }
158 
159 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
160 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor)))
161 
162 enum pci_protocol_version_t {
163 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
164 	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),
165 };
166 
167 static enum pci_protocol_version_t pci_protocol_versions[] = {
168 	PCI_PROTOCOL_VERSION_1_4,
169 	PCI_PROTOCOL_VERSION_1_1,
170 };
171 
172 #define PCI_CONFIG_MMIO_LENGTH	0x2000
173 #define CFG_PAGE_OFFSET 0x1000
174 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
175 
176 /*
177  * Message Types
178  */
179 
180 enum pci_message_type {
181 	/*
182 	 * Version 1.1
183 	 */
184 	PCI_MESSAGE_BASE                = 0x42490000,
185 	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
186 	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
187 	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
188 	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
189 	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
190 	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
191 	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
192 	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
193 	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
194 	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
195 	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
196 	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
197 	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
198 	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
199 	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
200 	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
201 	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
202 	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
203 	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
204 	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
205 	PCI_RESOURCES_ASSIGNED2         = PCI_MESSAGE_BASE + 0x16,
206 	PCI_CREATE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x17,
207 	PCI_DELETE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x18, /* unused */
208 	PCI_BUS_RELATIONS2              = PCI_MESSAGE_BASE + 0x19,
209 	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
210 	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
211 	PCI_MESSAGE_MAXIMUM
212 };
213 
214 #define STATUS_REVISION_MISMATCH 0xC0000059
215 
216 /*
217  * Structures defining the virtual PCI Express protocol.
218  */
219 
220 union pci_version {
221 	struct {
222 		uint16_t minor_version;
223 		uint16_t major_version;
224 	} parts;
225 	uint32_t version;
226 } __packed;
227 
228 /*
229  * This representation is the one used in Windows, which is
230  * what is expected when sending this back and forth with
231  * the Hyper-V parent partition.
232  */
233 union win_slot_encoding {
234 	struct {
235 		uint32_t	slot:5;
236 		uint32_t	func:3;
237 		uint32_t	reserved:24;
238 	} bits;
239 	uint32_t val;
240 } __packed;
241 
242 struct pci_func_desc {
243 	uint16_t	v_id;	/* vendor ID */
244 	uint16_t	d_id;	/* device ID */
245 	uint8_t		rev;
246 	uint8_t		prog_intf;
247 	uint8_t		subclass;
248 	uint8_t		base_class;
249 	uint32_t	subsystem_id;
250 	union win_slot_encoding wslot;
251 	uint32_t	ser;	/* serial number */
252 } __packed;
253 
254 struct pci_func_desc2 {
255 	uint16_t	v_id;	/* vendor ID */
256 	uint16_t	d_id;	/* device ID */
257 	uint8_t		rev;
258 	uint8_t		prog_intf;
259 	uint8_t		subclass;
260 	uint8_t		base_class;
261 	uint32_t	subsystem_id;
262 	union		win_slot_encoding wslot;
263 	uint32_t	ser;	/* serial number */
264 	uint32_t	flags;
265 	uint16_t	virtual_numa_node;
266 	uint16_t	reserved;
267 } __packed;
268 
269 
270 struct hv_msi_desc {
271 	uint8_t		vector;
272 	uint8_t		delivery_mode;
273 	uint16_t	vector_count;
274 	uint32_t	reserved;
275 	uint64_t	cpu_mask;
276 } __packed;
277 
278 struct hv_msi_desc3 {
279 	uint32_t	vector;
280 	uint8_t		delivery_mode;
281 	uint8_t		reserved;
282 	uint16_t	vector_count;
283 	uint16_t	processor_count;
284 	uint16_t	processor_array[32];
285 } __packed;
286 
287 struct tran_int_desc {
288 	uint16_t	reserved;
289 	uint16_t	vector_count;
290 	uint32_t	data;
291 	uint64_t	address;
292 } __packed;
293 
294 struct pci_message {
295 	uint32_t type;
296 } __packed;
297 
298 struct pci_child_message {
299 	struct pci_message message_type;
300 	union win_slot_encoding wslot;
301 } __packed;
302 
303 struct pci_incoming_message {
304 	struct vmbus_chanpkt_hdr hdr;
305 	struct pci_message message_type;
306 } __packed;
307 
308 struct pci_response {
309 	struct vmbus_chanpkt_hdr hdr;
310 	int32_t status;	/* negative values are failures */
311 } __packed;
312 
313 struct pci_packet {
314 	void (*completion_func)(void *context, struct pci_response *resp,
315 	    int resp_packet_size);
316 	void *compl_ctxt;
317 
318 	struct pci_message message[0];
319 };
320 
321 /*
322  * Specific message types supporting the PCI protocol.
323  */
324 
325 struct pci_version_request {
326 	struct pci_message message_type;
327 	uint32_t protocol_version;
328 	uint32_t reservedz:31;
329 } __packed;
330 
331 struct pci_bus_d0_entry {
332 	struct pci_message message_type;
333 	uint32_t reserved;
334 	uint64_t mmio_base;
335 } __packed;
336 
337 struct pci_bus_relations {
338 	struct pci_incoming_message incoming;
339 	uint32_t device_count;
340 	struct pci_func_desc func[0];
341 } __packed;
342 
343 struct pci_bus_relations2 {
344 	struct pci_incoming_message incoming;
345 	uint32_t device_count;
346 	struct pci_func_desc2 func[0];
347 } __packed;
348 
349 #define MAX_NUM_BARS	(PCIR_MAX_BAR_0 + 1)
350 struct pci_q_res_req_response {
351 	struct vmbus_chanpkt_hdr hdr;
352 	int32_t status; /* negative values are failures */
353 	uint32_t probed_bar[MAX_NUM_BARS];
354 } __packed;
355 
356 struct pci_resources_assigned {
357 	struct pci_message message_type;
358 	union win_slot_encoding wslot;
359 	uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
360 	uint32_t msi_descriptors;
361 	uint32_t reserved[4];
362 } __packed;
363 
364 struct pci_resources_assigned2 {
365 	struct pci_message message_type;
366 	union win_slot_encoding wslot;
367 	uint8_t memory_range[0x14][6];   /* not used here */
368 	uint32_t msi_descriptor_count;
369 	uint8_t reserved[70];
370 } __packed;
371 
372 struct pci_create_interrupt {
373 	struct pci_message message_type;
374 	union win_slot_encoding wslot;
375 	struct hv_msi_desc int_desc;
376 } __packed;
377 
378 struct pci_create_interrupt3 {
379 	struct pci_message message_type;
380 	union win_slot_encoding wslot;
381 	struct hv_msi_desc3 int_desc;
382 } __packed;
383 
384 struct pci_create_int_response {
385 	struct pci_response response;
386 	uint32_t reserved;
387 	struct tran_int_desc int_desc;
388 } __packed;
389 
390 struct pci_delete_interrupt {
391 	struct pci_message message_type;
392 	union win_slot_encoding wslot;
393 	struct tran_int_desc int_desc;
394 } __packed;
395 
396 struct pci_dev_incoming {
397 	struct pci_incoming_message incoming;
398 	union win_slot_encoding wslot;
399 } __packed;
400 
401 struct pci_eject_response {
402 	struct pci_message message_type;
403 	union win_slot_encoding wslot;
404 	uint32_t status;
405 } __packed;
406 
407 /*
408  * Driver specific state.
409  */
410 
411 enum hv_pcibus_state {
412 	hv_pcibus_init = 0,
413 	hv_pcibus_installed,
414 };
415 
416 struct hv_pcibus {
417 	device_t pcib;
418 	device_t pci_bus;
419 	struct vmbus_pcib_softc *sc;
420 
421 	uint16_t pci_domain;
422 
423 	enum hv_pcibus_state state;
424 
425 	struct resource *cfg_res;
426 
427 	struct completion query_completion, *query_comp;
428 
429 	struct mtx config_lock; /* Avoid two threads writing index page */
430 	struct mtx device_list_lock;    /* Protect lists below */
431 	uint32_t protocol_version;
432 	TAILQ_HEAD(, hv_pci_dev) children;
433 	TAILQ_HEAD(, hv_dr_state) dr_list;
434 
435 	volatile int detaching;
436 };
437 
438 struct hv_pcidev_desc {
439 	uint16_t v_id;	/* vendor ID */
440 	uint16_t d_id;	/* device ID */
441 	uint8_t rev;
442 	uint8_t prog_intf;
443 	uint8_t subclass;
444 	uint8_t base_class;
445 	uint32_t subsystem_id;
446 	union win_slot_encoding wslot;
447 	uint32_t ser;	/* serial number */
448 	uint32_t flags;
449 	uint16_t virtual_numa_node;
450 } __packed;
451 
452 struct hv_pci_dev {
453 	TAILQ_ENTRY(hv_pci_dev) link;
454 
455 	struct hv_pcidev_desc desc;
456 
457 	bool reported_missing;
458 
459 	struct hv_pcibus *hbus;
460 	struct task eject_task;
461 
462 	TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
463 
464 	/*
465 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
466 	 * read it back, for each of the BAR offsets within config space.
467 	 */
468 	uint32_t probed_bar[MAX_NUM_BARS];
469 };
470 
471 /*
472  * Tracks "Device Relations" messages from the host, which must be both
473  * processed in order.
474  */
475 struct hv_dr_work {
476 	struct task task;
477 	struct hv_pcibus *bus;
478 };
479 
480 struct hv_dr_state {
481 	TAILQ_ENTRY(hv_dr_state) link;
482 	uint32_t device_count;
483 	struct hv_pcidev_desc func[0];
484 };
485 
486 struct hv_irq_desc {
487 	TAILQ_ENTRY(hv_irq_desc) link;
488 	struct tran_int_desc desc;
489 	int irq;
490 };
491 
492 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
493 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
494 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
495 
496 static uint32_t
497 devfn_to_wslot(unsigned int devfn)
498 {
499 	union win_slot_encoding wslot;
500 
501 	wslot.val = 0;
502 	wslot.bits.slot = PCI_SLOT(devfn);
503 	wslot.bits.func = PCI_FUNC(devfn);
504 
505 	return (wslot.val);
506 }
507 
508 static unsigned int
509 wslot_to_devfn(uint32_t wslot)
510 {
511 	union win_slot_encoding encoding;
512 	unsigned int slot;
513 	unsigned int func;
514 
515 	encoding.val = wslot;
516 
517 	slot = encoding.bits.slot;
518 	func = encoding.bits.func;
519 
520 	return (PCI_DEVFN(slot, func));
521 }
522 
523 struct vmbus_pcib_softc {
524 	struct vmbus_channel	*chan;
525 	void *rx_buf;
526 
527 	struct taskqueue	*taskq;
528 
529 	struct hv_pcibus	*hbus;
530 };
531 
532 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
533 static const struct hyperv_guid g_pass_through_dev_type = {
534 	.hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
535 	    0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
536 };
537 
538 struct hv_pci_compl {
539 	struct completion host_event;
540 	int32_t completion_status;
541 };
542 
543 struct q_res_req_compl {
544 	struct completion host_event;
545 	struct hv_pci_dev *hpdev;
546 };
547 
548 struct compose_comp_ctxt {
549 	struct hv_pci_compl comp_pkt;
550 	struct tran_int_desc int_desc;
551 };
552 
553 /*
554  * It is possible the device is revoked during initialization.
555  * Check if this happens during wait.
556  * Return: 0 if response arrived, ENODEV if device revoked.
557  */
558 static int
559 wait_for_response(struct hv_pcibus *hbus, struct completion *c)
560 {
561 	do {
562 		if (vmbus_chan_is_revoked(hbus->sc->chan)) {
563 			device_printf(hbus->pcib,
564 			    "The device is revoked.\n");
565 			return (ENODEV);
566 		}
567 	} while (wait_for_completion_timeout(c, hz /10) != 0);
568 
569 	return 0;
570 }
571 
572 static void
573 hv_pci_generic_compl(void *context, struct pci_response *resp,
574     int resp_packet_size)
575 {
576 	struct hv_pci_compl *comp_pkt = context;
577 
578 	if (resp_packet_size >= sizeof(struct pci_response))
579 		comp_pkt->completion_status = resp->status;
580 	else
581 		comp_pkt->completion_status = -1;
582 
583 	complete(&comp_pkt->host_event);
584 }
585 
586 static void
587 q_resource_requirements(void *context, struct pci_response *resp,
588     int resp_packet_size)
589 {
590 	struct q_res_req_compl *completion = context;
591 	struct pci_q_res_req_response *q_res_req =
592 	    (struct pci_q_res_req_response *)resp;
593 	int i;
594 
595 	if (resp->status < 0) {
596 		printf("vmbus_pcib: failed to query resource requirements\n");
597 	} else {
598 		for (i = 0; i < MAX_NUM_BARS; i++)
599 			completion->hpdev->probed_bar[i] =
600 			    q_res_req->probed_bar[i];
601 	}
602 
603 	complete(&completion->host_event);
604 }
605 
606 static void
607 hv_pci_compose_compl(void *context, struct pci_response *resp,
608     int resp_packet_size)
609 {
610 	struct compose_comp_ctxt *comp_pkt = context;
611 	struct pci_create_int_response *int_resp =
612 	    (struct pci_create_int_response *)resp;
613 
614 	comp_pkt->comp_pkt.completion_status = resp->status;
615 	comp_pkt->int_desc = int_resp->int_desc;
616 	complete(&comp_pkt->comp_pkt.host_event);
617 }
618 
619 static void
620 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
621 {
622 	struct pci_delete_interrupt *int_pkt;
623 	struct {
624 		struct pci_packet pkt;
625 		uint8_t buffer[sizeof(struct pci_delete_interrupt)];
626 	} ctxt;
627 
628 	memset(&ctxt, 0, sizeof(ctxt));
629 	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
630 	int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
631 	int_pkt->wslot.val = hpdev->desc.wslot.val;
632 	int_pkt->int_desc = hid->desc;
633 
634 	vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
635 	    int_pkt, sizeof(*int_pkt), 0);
636 
637 	free(hid, M_DEVBUF);
638 }
639 
640 static void
641 hv_pci_delete_device(struct hv_pci_dev *hpdev)
642 {
643 	struct hv_pcibus *hbus = hpdev->hbus;
644 	struct hv_irq_desc *hid, *tmp_hid;
645 	device_t pci_dev;
646 	int devfn;
647 
648 	devfn = wslot_to_devfn(hpdev->desc.wslot.val);
649 
650 	bus_topo_lock();
651 
652 	pci_dev = pci_find_dbsf(hbus->pci_domain,
653 	    0, PCI_SLOT(devfn), PCI_FUNC(devfn));
654 	if (pci_dev)
655 		device_delete_child(hbus->pci_bus, pci_dev);
656 
657 	bus_topo_unlock();
658 
659 	mtx_lock(&hbus->device_list_lock);
660 	TAILQ_REMOVE(&hbus->children, hpdev, link);
661 	mtx_unlock(&hbus->device_list_lock);
662 
663 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
664 		hv_int_desc_free(hpdev, hid);
665 
666 	free(hpdev, M_DEVBUF);
667 }
668 
669 static struct hv_pci_dev *
670 new_pcichild_device(struct hv_pcibus *hbus, struct hv_pcidev_desc *desc)
671 {
672 	struct hv_pci_dev *hpdev;
673 	struct pci_child_message *res_req;
674 	struct q_res_req_compl comp_pkt;
675 	struct {
676 		struct pci_packet pkt;
677 		uint8_t buffer[sizeof(struct pci_child_message)];
678 	} ctxt;
679 	int ret;
680 
681 	hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
682 	hpdev->hbus = hbus;
683 
684 	TAILQ_INIT(&hpdev->irq_desc_list);
685 
686 	init_completion(&comp_pkt.host_event);
687 	comp_pkt.hpdev = hpdev;
688 
689 	ctxt.pkt.compl_ctxt = &comp_pkt;
690 	ctxt.pkt.completion_func = q_resource_requirements;
691 
692 	res_req = (struct pci_child_message *)&ctxt.pkt.message;
693 	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
694 	res_req->wslot.val = desc->wslot.val;
695 
696 	ret = vmbus_chan_send(hbus->sc->chan,
697 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
698 	    res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
699 	if (ret)
700 		goto err;
701 
702 	if (wait_for_response(hbus, &comp_pkt.host_event))
703 		goto err;
704 
705 	free_completion(&comp_pkt.host_event);
706 
707 	hpdev->desc = *desc;
708 
709 	mtx_lock(&hbus->device_list_lock);
710 	if (TAILQ_EMPTY(&hbus->children))
711 		hbus->pci_domain = desc->ser & 0xFFFF;
712 	TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
713 	mtx_unlock(&hbus->device_list_lock);
714 	return (hpdev);
715 err:
716 	free_completion(&comp_pkt.host_event);
717 	free(hpdev, M_DEVBUF);
718 	return (NULL);
719 }
720 
721 #if __FreeBSD_version < 1100000
722 
723 /* Old versions don't have BUS_RESCAN(). Let's copy it from FreeBSD 11. */
724 
725 static struct pci_devinfo *
726 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
727     int slot, int func, size_t dinfo_size)
728 {
729 	struct pci_devinfo *dinfo;
730 
731 	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
732 	if (dinfo != NULL)
733 		pci_add_child(dev, dinfo);
734 
735 	return (dinfo);
736 }
737 
738 static int
739 pci_rescan(device_t dev)
740 {
741 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
742 	device_t pcib = device_get_parent(dev);
743 	struct pci_softc *sc;
744 	device_t child, *devlist, *unchanged;
745 	int devcount, error, i, j, maxslots, oldcount;
746 	int busno, domain, s, f, pcifunchigh;
747 	uint8_t hdrtype;
748 
749 	/* No need to check for ARI on a rescan. */
750 	error = device_get_children(dev, &devlist, &devcount);
751 	if (error)
752 		return (error);
753 	if (devcount != 0) {
754 		unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
755 		    M_NOWAIT | M_ZERO);
756 		if (unchanged == NULL) {
757 			free(devlist, M_TEMP);
758 			return (ENOMEM);
759 		}
760 	} else
761 		unchanged = NULL;
762 
763 	sc = device_get_softc(dev);
764 	domain = pcib_get_domain(dev);
765 	busno = pcib_get_bus(dev);
766 	maxslots = PCIB_MAXSLOTS(pcib);
767 	for (s = 0; s <= maxslots; s++) {
768 		/* If function 0 is not present, skip to the next slot. */
769 		f = 0;
770 		if (REG(PCIR_VENDOR, 2) == 0xffff)
771 			continue;
772 		pcifunchigh = 0;
773 		hdrtype = REG(PCIR_HDRTYPE, 1);
774 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
775 			continue;
776 		if (hdrtype & PCIM_MFDEV)
777 			pcifunchigh = PCIB_MAXFUNCS(pcib);
778 		for (f = 0; f <= pcifunchigh; f++) {
779 			if (REG(PCIR_VENDOR, 2) == 0xffff)
780 				continue;
781 
782 			/*
783 			 * Found a valid function.  Check if a
784 			 * device_t for this device already exists.
785 			 */
786 			for (i = 0; i < devcount; i++) {
787 				child = devlist[i];
788 				if (child == NULL)
789 					continue;
790 				if (pci_get_slot(child) == s &&
791 				    pci_get_function(child) == f) {
792 					unchanged[i] = child;
793 					goto next_func;
794 				}
795 			}
796 
797 			pci_identify_function(pcib, dev, domain, busno, s, f,
798 			    sizeof(struct pci_devinfo));
799 		next_func:;
800 		}
801 	}
802 
803 	/* Remove devices that are no longer present. */
804 	for (i = 0; i < devcount; i++) {
805 		if (unchanged[i] != NULL)
806 			continue;
807 		device_delete_child(dev, devlist[i]);
808 	}
809 
810 	free(devlist, M_TEMP);
811 	oldcount = devcount;
812 
813 	/* Try to attach the devices just added. */
814 	error = device_get_children(dev, &devlist, &devcount);
815 	if (error) {
816 		free(unchanged, M_TEMP);
817 		return (error);
818 	}
819 
820 	for (i = 0; i < devcount; i++) {
821 		for (j = 0; j < oldcount; j++) {
822 			if (devlist[i] == unchanged[j])
823 				goto next_device;
824 		}
825 
826 		device_probe_and_attach(devlist[i]);
827 	next_device:;
828 	}
829 
830 	free(unchanged, M_TEMP);
831 	free(devlist, M_TEMP);
832 	return (0);
833 #undef REG
834 }
835 
836 #else
837 
838 static int
839 pci_rescan(device_t dev)
840 {
841 	return (BUS_RESCAN(dev));
842 }
843 
844 #endif
845 
846 static void
847 pci_devices_present_work(void *arg, int pending __unused)
848 {
849 	struct hv_dr_work *dr_wrk = arg;
850 	struct hv_dr_state *dr = NULL;
851 	struct hv_pcibus *hbus;
852 	uint32_t child_no;
853 	bool found;
854 	struct hv_pcidev_desc *new_desc;
855 	struct hv_pci_dev *hpdev, *tmp_hpdev;
856 	struct completion *query_comp;
857 	bool need_rescan = false;
858 
859 	hbus = dr_wrk->bus;
860 	free(dr_wrk, M_DEVBUF);
861 
862 	/* Pull this off the queue and process it if it was the last one. */
863 	mtx_lock(&hbus->device_list_lock);
864 	while (!TAILQ_EMPTY(&hbus->dr_list)) {
865 		dr = TAILQ_FIRST(&hbus->dr_list);
866 		TAILQ_REMOVE(&hbus->dr_list, dr, link);
867 
868 		/* Throw this away if the list still has stuff in it. */
869 		if (!TAILQ_EMPTY(&hbus->dr_list)) {
870 			free(dr, M_DEVBUF);
871 			continue;
872 		}
873 	}
874 	mtx_unlock(&hbus->device_list_lock);
875 
876 	if (!dr)
877 		return;
878 
879 	/* First, mark all existing children as reported missing. */
880 	mtx_lock(&hbus->device_list_lock);
881 	TAILQ_FOREACH(hpdev, &hbus->children, link)
882 		hpdev->reported_missing = true;
883 	mtx_unlock(&hbus->device_list_lock);
884 
885 	/* Next, add back any reported devices. */
886 	for (child_no = 0; child_no < dr->device_count; child_no++) {
887 		found = false;
888 		new_desc = &dr->func[child_no];
889 
890 		mtx_lock(&hbus->device_list_lock);
891 		TAILQ_FOREACH(hpdev, &hbus->children, link) {
892 			if ((hpdev->desc.wslot.val ==
893 			    new_desc->wslot.val) &&
894 			    (hpdev->desc.v_id == new_desc->v_id) &&
895 			    (hpdev->desc.d_id == new_desc->d_id) &&
896 			    (hpdev->desc.ser == new_desc->ser)) {
897 				hpdev->reported_missing = false;
898 				found = true;
899 				break;
900 			}
901 		}
902 		mtx_unlock(&hbus->device_list_lock);
903 
904 		if (!found) {
905 			if (!need_rescan)
906 				need_rescan = true;
907 
908 			hpdev = new_pcichild_device(hbus, new_desc);
909 			if (!hpdev)
910 				printf("vmbus_pcib: failed to add a child\n");
911 		}
912 	}
913 
914 	/* Remove missing device(s), if any */
915 	TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
916 		if (hpdev->reported_missing)
917 			hv_pci_delete_device(hpdev);
918 	}
919 
920 	/* Rescan the bus to find any new device, if necessary. */
921 	if (hbus->state == hv_pcibus_installed && need_rescan)
922 		pci_rescan(hbus->pci_bus);
923 
924 	/* Wake up hv_pci_query_relations(), if it's waiting. */
925 	query_comp = hbus->query_comp;
926 	if (query_comp) {
927 		hbus->query_comp = NULL;
928 		complete(query_comp);
929 	}
930 
931 	free(dr, M_DEVBUF);
932 }
933 
934 static struct hv_pci_dev *
935 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
936 {
937 	struct hv_pci_dev *hpdev, *ret = NULL;
938 
939 	mtx_lock(&hbus->device_list_lock);
940 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
941 		if (hpdev->desc.wslot.val == wslot) {
942 			ret = hpdev;
943 			break;
944 		}
945 	}
946 	mtx_unlock(&hbus->device_list_lock);
947 
948 	return (ret);
949 }
950 
951 static void
952 hv_pci_devices_present(struct hv_pcibus *hbus,
953     struct pci_bus_relations *relations)
954 {
955 	struct hv_dr_state *dr;
956 	struct hv_dr_work *dr_wrk;
957 	unsigned long dr_size;
958 
959 	if (hbus->detaching && relations->device_count > 0)
960 		return;
961 
962 	dr_size = offsetof(struct hv_dr_state, func) +
963 	    (sizeof(struct pci_func_desc) * relations->device_count);
964 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
965 
966 	dr->device_count = relations->device_count;
967 	if (dr->device_count != 0)
968 		memcpy(dr->func, relations->func,
969 		    sizeof(struct hv_pcidev_desc) * dr->device_count);
970 
971 	mtx_lock(&hbus->device_list_lock);
972 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
973 	mtx_unlock(&hbus->device_list_lock);
974 
975 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
976 	dr_wrk->bus = hbus;
977 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
978 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
979 }
980 
981 static void
982 hv_pci_devices_present2(struct hv_pcibus *hbus,
983     struct pci_bus_relations2 *relations)
984 {
985 	struct hv_dr_state *dr;
986 	struct hv_dr_work *dr_wrk;
987 	unsigned long dr_size;
988 
989 	if (hbus->detaching && relations->device_count > 0)
990 		return;
991 
992 	dr_size = offsetof(struct hv_dr_state, func) +
993 	    (sizeof(struct pci_func_desc2) * relations->device_count);
994 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
995 
996 	dr->device_count = relations->device_count;
997 	if (dr->device_count != 0)
998 		memcpy(dr->func, relations->func,
999 		    sizeof(struct pci_func_desc2) * dr->device_count);
1000 
1001 	mtx_lock(&hbus->device_list_lock);
1002 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
1003 	mtx_unlock(&hbus->device_list_lock);
1004 
1005 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
1006 	dr_wrk->bus = hbus;
1007 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
1008 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
1009 }
1010 
1011 static void
1012 hv_eject_device_work(void *arg, int pending __unused)
1013 {
1014 	struct hv_pci_dev *hpdev = arg;
1015 	union win_slot_encoding wslot = hpdev->desc.wslot;
1016 	struct hv_pcibus *hbus = hpdev->hbus;
1017 	struct pci_eject_response *eject_pkt;
1018 	struct {
1019 		struct pci_packet pkt;
1020 		uint8_t buffer[sizeof(struct pci_eject_response)];
1021 	} ctxt;
1022 
1023 	hv_pci_delete_device(hpdev);
1024 
1025 	memset(&ctxt, 0, sizeof(ctxt));
1026 	eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
1027 	eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
1028 	eject_pkt->wslot.val = wslot.val;
1029 	vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1030 	    eject_pkt, sizeof(*eject_pkt), 0);
1031 }
1032 
1033 static void
1034 hv_pci_eject_device(struct hv_pci_dev *hpdev)
1035 {
1036 	struct hv_pcibus *hbus = hpdev->hbus;
1037 	struct taskqueue *taskq;
1038 
1039 	if (hbus->detaching)
1040 		return;
1041 
1042 	/*
1043 	 * Push this task into the same taskqueue on which
1044 	 * vmbus_pcib_attach() runs, so we're sure this task can't run
1045 	 * concurrently with vmbus_pcib_attach().
1046 	 */
1047 	TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
1048 	taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
1049 	taskqueue_enqueue(taskq, &hpdev->eject_task);
1050 }
1051 
1052 #define PCIB_PACKET_SIZE	0x100
1053 
1054 static void
1055 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
1056 {
1057 	struct vmbus_pcib_softc *sc = arg;
1058 	struct hv_pcibus *hbus = sc->hbus;
1059 
1060 	void *buffer;
1061 	int bufferlen = PCIB_PACKET_SIZE;
1062 
1063 	struct pci_packet *comp_packet;
1064 	struct pci_response *response;
1065 	struct pci_incoming_message *new_msg;
1066 	struct pci_bus_relations *bus_rel;
1067 	struct pci_bus_relations2 *bus_rel2;
1068 	struct pci_dev_incoming *dev_msg;
1069 	struct hv_pci_dev *hpdev;
1070 
1071 	buffer = sc->rx_buf;
1072 	do {
1073 		struct vmbus_chanpkt_hdr *pkt = buffer;
1074 		uint32_t bytes_rxed;
1075 		int ret;
1076 
1077 		bytes_rxed = bufferlen;
1078 		ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
1079 
1080 		if (ret == ENOBUFS) {
1081 			/* Handle large packet */
1082 			if (bufferlen > PCIB_PACKET_SIZE) {
1083 				free(buffer, M_DEVBUF);
1084 				buffer = NULL;
1085 			}
1086 
1087 			/* alloc new buffer */
1088 			buffer =
1089 			    malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
1090 			bufferlen = bytes_rxed;
1091 
1092 			continue;
1093 		}
1094 
1095 		if (ret != 0) {
1096 			/* ignore EIO or EAGAIN */
1097 			break;
1098 		}
1099 
1100 		if (bytes_rxed <= sizeof(struct pci_response))
1101 			continue;
1102 
1103 		switch (pkt->cph_type) {
1104 		case VMBUS_CHANPKT_TYPE_COMP:
1105 			comp_packet =
1106 			    (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
1107 			response = (struct pci_response *)pkt;
1108 			comp_packet->completion_func(comp_packet->compl_ctxt,
1109 			    response, bytes_rxed);
1110 			break;
1111 		case VMBUS_CHANPKT_TYPE_INBAND:
1112 			new_msg = (struct pci_incoming_message *)buffer;
1113 
1114 			switch (new_msg->message_type.type) {
1115 			case PCI_BUS_RELATIONS:
1116 				bus_rel = (struct pci_bus_relations *)buffer;
1117 
1118 				if (bus_rel->device_count == 0)
1119 					break;
1120 
1121 				if (bytes_rxed <
1122 				    offsetof(struct pci_bus_relations, func) +
1123 				        (sizeof(struct pci_func_desc) *
1124 				            (bus_rel->device_count)))
1125 					break;
1126 
1127 				hv_pci_devices_present(hbus, bus_rel);
1128 				break;
1129 
1130 			case PCI_BUS_RELATIONS2:
1131 				bus_rel2 = (struct pci_bus_relations2 *)buffer;
1132 
1133 				if (bus_rel2->device_count == 0)
1134 					break;
1135 
1136 				if (bytes_rxed <
1137 				    offsetof(struct pci_bus_relations2, func) +
1138 				    (sizeof(struct pci_func_desc2) *
1139 				    (bus_rel2->device_count)))
1140 					break;
1141 
1142 				hv_pci_devices_present2(hbus, bus_rel2);
1143 
1144 			case PCI_EJECT:
1145 				dev_msg = (struct pci_dev_incoming *)buffer;
1146 				hpdev = get_pcichild_wslot(hbus,
1147 				    dev_msg->wslot.val);
1148 
1149 				if (hpdev)
1150 					hv_pci_eject_device(hpdev);
1151 
1152 				break;
1153 			default:
1154 				printf("vmbus_pcib: Unknown msg type 0x%x\n",
1155 				    new_msg->message_type.type);
1156 				break;
1157 			}
1158 			break;
1159 		default:
1160 			printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
1161 			    pkt->cph_type);
1162 			break;
1163 		}
1164 	} while (1);
1165 
1166 	if (bufferlen > PCIB_PACKET_SIZE)
1167 		free(buffer, M_DEVBUF);
1168 }
1169 
1170 static int
1171 hv_pci_protocol_negotiation(struct hv_pcibus *hbus,
1172     enum pci_protocol_version_t version[],
1173     int num_version)
1174 {
1175 	struct pci_version_request *version_req;
1176 	struct hv_pci_compl comp_pkt;
1177 	struct {
1178 		struct pci_packet pkt;
1179 		uint8_t buffer[sizeof(struct pci_version_request)];
1180 	} ctxt;
1181 	int ret;
1182 	int i;
1183 
1184 	init_completion(&comp_pkt.host_event);
1185 
1186 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1187 	ctxt.pkt.compl_ctxt = &comp_pkt;
1188 	version_req = (struct pci_version_request *)&ctxt.pkt.message;
1189 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1190 
1191 	for(i=0; i< num_version; i++) {
1192 		version_req->protocol_version = version[i];
1193 		ret = vmbus_chan_send(hbus->sc->chan,
1194 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1195 		    version_req, sizeof(*version_req),
1196 		    (uint64_t)(uintptr_t)&ctxt.pkt);
1197 		if (!ret)
1198 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1199 
1200 		if (ret) {
1201 			device_printf(hbus->pcib,
1202 				"vmbus_pcib failed to request version: %d\n",
1203 				ret);
1204 			goto out;
1205 		}
1206 
1207 		if (comp_pkt.completion_status >= 0) {
1208 			hbus->protocol_version = version[i];
1209 			device_printf(hbus->pcib,
1210 				"PCI VMBus using version 0x%x\n",
1211 				hbus->protocol_version);
1212 			ret = 0;
1213 			goto out;
1214 		}
1215 
1216 		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
1217 			device_printf(hbus->pcib,
1218 				"vmbus_pcib version negotiation failed: %x\n",
1219 				comp_pkt.completion_status);
1220 			ret = EPROTO;
1221 			goto out;
1222 		}
1223 		reinit_completion(&comp_pkt.host_event);
1224 	}
1225 
1226 	device_printf(hbus->pcib,
1227 		"PCI pass-trhpugh VSP failed to find supported version\n");
1228 out:
1229 	free_completion(&comp_pkt.host_event);
1230 	return (ret);
1231 }
1232 
1233 /* Ask the host to send along the list of child devices */
1234 static int
1235 hv_pci_query_relations(struct hv_pcibus *hbus)
1236 {
1237 	struct pci_message message;
1238 	int ret;
1239 
1240 	message.type = PCI_QUERY_BUS_RELATIONS;
1241 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1242 	    &message, sizeof(message), 0);
1243 	return (ret);
1244 }
1245 
1246 static int
1247 hv_pci_enter_d0(struct hv_pcibus *hbus)
1248 {
1249 	struct pci_bus_d0_entry *d0_entry;
1250 	struct hv_pci_compl comp_pkt;
1251 	struct {
1252 		struct pci_packet pkt;
1253 		uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1254 	} ctxt;
1255 	int ret;
1256 
1257 	/*
1258 	 * Tell the host that the bus is ready to use, and moved into the
1259 	 * powered-on state.  This includes telling the host which region
1260 	 * of memory-mapped I/O space has been chosen for configuration space
1261 	 * access.
1262 	 */
1263 	init_completion(&comp_pkt.host_event);
1264 
1265 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1266 	ctxt.pkt.compl_ctxt = &comp_pkt;
1267 
1268 	d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1269 	memset(d0_entry, 0, sizeof(*d0_entry));
1270 	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1271 	d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1272 
1273 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1274 	    VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1275 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1276 	if (!ret)
1277 		ret = wait_for_response(hbus, &comp_pkt.host_event);
1278 
1279 	if (ret)
1280 		goto out;
1281 
1282 	if (comp_pkt.completion_status < 0) {
1283 		device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1284 		ret = EPROTO;
1285 	} else {
1286 		ret = 0;
1287 	}
1288 
1289 out:
1290 	free_completion(&comp_pkt.host_event);
1291 	return (ret);
1292 }
1293 
1294 /*
1295  * It looks this is only needed by Windows VM, but let's send the message too
1296  * just to make the host happy.
1297  */
1298 static int
1299 hv_send_resources_allocated(struct hv_pcibus *hbus)
1300 {
1301 	struct pci_resources_assigned *res_assigned;
1302 	struct pci_resources_assigned2 *res_assigned2;
1303 	struct hv_pci_compl comp_pkt;
1304 	struct hv_pci_dev *hpdev;
1305 	struct pci_packet *pkt;
1306 	uint32_t wslot;
1307 	int ret = 0;
1308 	size_t size_res;
1309 
1310 	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4)
1311 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
1312 	pkt = malloc(sizeof(*pkt) + size_res,
1313 	    M_DEVBUF, M_WAITOK | M_ZERO);
1314 
1315 	for (wslot = 0; wslot < 256; wslot++) {
1316 		hpdev = get_pcichild_wslot(hbus, wslot);
1317 		if (!hpdev)
1318 			continue;
1319 
1320 		init_completion(&comp_pkt.host_event);
1321 
1322 		memset(pkt, 0, sizeof(*pkt) + size_res);
1323 		pkt->completion_func = hv_pci_generic_compl;
1324 		pkt->compl_ctxt = &comp_pkt;
1325 
1326 		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) {
1327 			res_assigned =
1328 			    (struct pci_resources_assigned *)&pkt->message;
1329 			res_assigned->message_type.type =
1330 			    PCI_RESOURCES_ASSIGNED;
1331 			res_assigned->wslot.val = hpdev->desc.wslot.val;
1332 		} else {
1333 			res_assigned2 =
1334 			    (struct pci_resources_assigned2 *)&pkt->message;
1335 			res_assigned2->message_type.type =
1336 			    PCI_RESOURCES_ASSIGNED2;
1337 			res_assigned2->wslot.val = hpdev->desc.wslot.val;
1338 		}
1339 
1340 		ret = vmbus_chan_send(hbus->sc->chan,
1341 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1342 		    &pkt->message, size_res,
1343 		    (uint64_t)(uintptr_t)pkt);
1344 		if (!ret)
1345 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1346 
1347 		free_completion(&comp_pkt.host_event);
1348 
1349 		if (ret)
1350 			break;
1351 
1352 		if (comp_pkt.completion_status < 0) {
1353 			ret = EPROTO;
1354 			device_printf(hbus->pcib,
1355 			    "failed to send PCI_RESOURCES_ASSIGNED\n");
1356 			break;
1357 		}
1358 	}
1359 
1360 	free(pkt, M_DEVBUF);
1361 	return (ret);
1362 }
1363 
1364 static int
1365 hv_send_resources_released(struct hv_pcibus *hbus)
1366 {
1367 	struct pci_child_message pkt;
1368 	struct hv_pci_dev *hpdev;
1369 	uint32_t wslot;
1370 	int ret;
1371 
1372 	for (wslot = 0; wslot < 256; wslot++) {
1373 		hpdev = get_pcichild_wslot(hbus, wslot);
1374 		if (!hpdev)
1375 			continue;
1376 
1377 		pkt.message_type.type = PCI_RESOURCES_RELEASED;
1378 		pkt.wslot.val = hpdev->desc.wslot.val;
1379 
1380 		ret = vmbus_chan_send(hbus->sc->chan,
1381 		    VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1382 		if (ret)
1383 			return (ret);
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 #define hv_cfg_read(x, s)						\
1390 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus,	\
1391     bus_size_t offset)							\
1392 {									\
1393 	return (bus_read_##s(bus->cfg_res, offset));			\
1394 }
1395 
1396 #define hv_cfg_write(x, s)						\
1397 static inline void hv_cfg_write_##s(struct hv_pcibus *bus,		\
1398     bus_size_t offset, uint##x##_t val)					\
1399 {									\
1400 	return (bus_write_##s(bus->cfg_res, offset, val));		\
1401 }
1402 
1403 hv_cfg_read(8, 1)
1404 hv_cfg_read(16, 2)
1405 hv_cfg_read(32, 4)
1406 
1407 hv_cfg_write(8, 1)
1408 hv_cfg_write(16, 2)
1409 hv_cfg_write(32, 4)
1410 
1411 static void
1412 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1413     uint32_t *val)
1414 {
1415 	struct hv_pcibus *hbus = hpdev->hbus;
1416 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1417 
1418 	/*
1419 	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1420 	 */
1421 	if (where + size <= PCIR_COMMAND) {
1422 		memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1423 	} else if (where >= PCIR_REVID && where + size <=
1424 		   PCIR_CACHELNSZ) {
1425 		memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1426 		       PCIR_REVID, size);
1427 	} else if (where >= PCIR_SUBVEND_0 && where + size <=
1428 		   PCIR_BIOS) {
1429 		memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1430 		       PCIR_SUBVEND_0, size);
1431 	} else if (where >= PCIR_BIOS && where + size <=
1432 		   PCIR_CAP_PTR) {
1433 		/* ROM BARs are unimplemented */
1434 		*val = 0;
1435 	} else if ((where >= PCIR_INTLINE && where + size <=
1436 		   PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1437 		/*
1438 		 * Interrupt Line and Interrupt PIN are hard-wired to zero
1439 		 * because this front-end only supports message-signaled
1440 		 * interrupts.
1441 		 */
1442 		*val = 0;
1443 	} else if (where + size <= CFG_PAGE_SIZE) {
1444 		mtx_lock(&hbus->config_lock);
1445 
1446 		/* Choose the function to be read. */
1447 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1448 
1449 		/* Make sure the function was chosen before we start reading.*/
1450 		mb();
1451 
1452 		/* Read from that function's config space. */
1453 		switch (size) {
1454 		case 1:
1455 			*((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1456 			break;
1457 		case 2:
1458 			*((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1459 			break;
1460 		default:
1461 			*((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1462 			break;
1463 		}
1464 		/*
1465 		 * Make sure the write was done before we release the lock,
1466 		 * allowing consecutive reads/writes.
1467 		 */
1468 		mb();
1469 
1470 		mtx_unlock(&hbus->config_lock);
1471 	} else {
1472 		/* Invalid config read: it's unlikely to reach here. */
1473 		memset(val, 0, size);
1474 	}
1475 }
1476 
1477 static void
1478 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1479     uint32_t val)
1480 {
1481 	struct hv_pcibus *hbus = hpdev->hbus;
1482 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1483 
1484 	/* SSIDs and ROM BARs are read-only */
1485 	if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1486 		return;
1487 
1488 	if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1489 		mtx_lock(&hbus->config_lock);
1490 
1491 		/* Choose the function to be written. */
1492 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1493 
1494 		/* Make sure the function was chosen before we start writing.*/
1495 		wmb();
1496 
1497 		/* Write to that function's config space. */
1498 		switch (size) {
1499 		case 1:
1500 			hv_cfg_write_1(hbus, addr, (uint8_t)val);
1501 			break;
1502 		case 2:
1503 			hv_cfg_write_2(hbus, addr, (uint16_t)val);
1504 			break;
1505 		default:
1506 			hv_cfg_write_4(hbus, addr, (uint32_t)val);
1507 			break;
1508 		}
1509 
1510 		/*
1511 		 * Make sure the write was done before we release the lock,
1512 		 * allowing consecutive reads/writes.
1513 		 */
1514 		mb();
1515 
1516 		mtx_unlock(&hbus->config_lock);
1517 	} else {
1518 		/* Invalid config write: it's unlikely to reach here. */
1519 		return;
1520 	}
1521 }
1522 
1523 /*
1524  * The vPCI in some Hyper-V releases do not initialize the last 4
1525  * bit of BAR registers. This could result weird problems causing PCI
1526  * code fail to configure BAR correctly.
1527  *
1528  * Just write all 1's to those BARs whose probed values are not zero.
1529  * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate
1530  * correctly.
1531  */
1532 
1533 static void
1534 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus)
1535 {
1536 	struct hv_pci_dev *hpdev;
1537 	int i;
1538 
1539 	mtx_lock(&hbus->device_list_lock);
1540 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
1541 		for (i = 0; i < 6; i++) {
1542 			/* Ignore empty bar */
1543 			if (hpdev->probed_bar[i] == 0)
1544 				continue;
1545 
1546 			uint32_t bar_val = 0;
1547 
1548 			_hv_pcifront_read_config(hpdev, PCIR_BAR(i),
1549 			    4, &bar_val);
1550 
1551 			if (hpdev->probed_bar[i] != bar_val) {
1552 				if (bootverbose)
1553 					printf("vmbus_pcib: initialize bar %d "
1554 					    "by writing all 1s\n", i);
1555 
1556 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1557 				    4, 0xffffffff);
1558 
1559 				/* Now write the original value back */
1560 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1561 				    4, bar_val);
1562 			}
1563 		}
1564 	}
1565 	mtx_unlock(&hbus->device_list_lock);
1566 }
1567 
1568 static void
1569 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1570 {
1571 	struct hv_pcibus *hbus = arg;
1572 
1573 	atomic_set_int(&hbus->detaching, 1);
1574 }
1575 
1576 static void
1577 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1578 {
1579 	struct task task;
1580 
1581 	TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1582 
1583 	/*
1584 	 * Make sure the channel callback won't push any possible new
1585 	 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1586 	 */
1587 	vmbus_chan_run_task(hbus->sc->chan, &task);
1588 
1589 	taskqueue_drain_all(hbus->sc->taskq);
1590 }
1591 
1592 
1593 /*
1594  * Standard probe entry point.
1595  *
1596  */
1597 static int
1598 vmbus_pcib_probe(device_t dev)
1599 {
1600 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1601 	    &g_pass_through_dev_type) == 0) {
1602 		device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1603 		return (BUS_PROBE_DEFAULT);
1604 	}
1605 	return (ENXIO);
1606 }
1607 
1608 /*
1609  * Standard attach entry point.
1610  *
1611  */
1612 static int
1613 vmbus_pcib_attach(device_t dev)
1614 {
1615 	const int pci_ring_size = (4 * PAGE_SIZE);
1616 	const struct hyperv_guid *inst_guid;
1617 	struct vmbus_channel *channel;
1618 	struct vmbus_pcib_softc *sc;
1619 	struct hv_pcibus *hbus;
1620 	int rid = 0;
1621 	int ret;
1622 
1623 	hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1624 	hbus->pcib = dev;
1625 
1626 	channel = vmbus_get_channel(dev);
1627 	inst_guid = vmbus_chan_guid_inst(channel);
1628 	hbus->pci_domain = inst_guid->hv_guid[9] |
1629 			  (inst_guid->hv_guid[8] << 8);
1630 
1631 	mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1632 	mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1633 	TAILQ_INIT(&hbus->children);
1634 	TAILQ_INIT(&hbus->dr_list);
1635 
1636 	hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1637 	    0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1638 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1639 
1640 	if (!hbus->cfg_res) {
1641 		device_printf(dev, "failed to get resource for cfg window\n");
1642 		ret = ENXIO;
1643 		goto free_bus;
1644 	}
1645 
1646 	sc = device_get_softc(dev);
1647 	sc->chan = channel;
1648 	sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1649 	sc->hbus = hbus;
1650 
1651 	/*
1652 	 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1653 	 * messages. NB: we can't handle the messages in the channel callback
1654 	 * directly, because the message handlers need to send new messages
1655 	 * to the host and waits for the host's completion messages, which
1656 	 * must also be handled by the channel callback.
1657 	 */
1658 	sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1659 	    taskqueue_thread_enqueue, &sc->taskq);
1660 	taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1661 
1662 	hbus->sc = sc;
1663 
1664 	init_completion(&hbus->query_completion);
1665 	hbus->query_comp = &hbus->query_completion;
1666 
1667 	ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1668 		NULL, 0, vmbus_pcib_on_channel_callback, sc);
1669 	if (ret)
1670 		goto free_res;
1671 
1672 	ret = hv_pci_protocol_negotiation(hbus, pci_protocol_versions,
1673 	    ARRAY_SIZE(pci_protocol_versions));
1674 	if (ret)
1675 		goto vmbus_close;
1676 
1677 	ret = hv_pci_query_relations(hbus);
1678 	if (!ret)
1679 		ret = wait_for_response(hbus, hbus->query_comp);
1680 
1681 	if (ret)
1682 		goto vmbus_close;
1683 
1684 	ret = hv_pci_enter_d0(hbus);
1685 	if (ret)
1686 		goto vmbus_close;
1687 
1688 	ret = hv_send_resources_allocated(hbus);
1689 	if (ret)
1690 		goto vmbus_close;
1691 
1692 	vmbus_pcib_prepopulate_bars(hbus);
1693 
1694 	hbus->pci_bus = device_add_child(dev, "pci", -1);
1695 	if (!hbus->pci_bus) {
1696 		device_printf(dev, "failed to create pci bus\n");
1697 		ret = ENXIO;
1698 		goto vmbus_close;
1699 	}
1700 
1701 	bus_generic_attach(dev);
1702 
1703 	hbus->state = hv_pcibus_installed;
1704 
1705 	return (0);
1706 
1707 vmbus_close:
1708 	vmbus_pcib_pre_detach(hbus);
1709 	vmbus_chan_close(sc->chan);
1710 free_res:
1711 	taskqueue_free(sc->taskq);
1712 	free_completion(&hbus->query_completion);
1713 	free(sc->rx_buf, M_DEVBUF);
1714 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1715 free_bus:
1716 	mtx_destroy(&hbus->device_list_lock);
1717 	mtx_destroy(&hbus->config_lock);
1718 	free(hbus, M_DEVBUF);
1719 	return (ret);
1720 }
1721 
1722 /*
1723  * Standard detach entry point
1724  */
1725 static int
1726 vmbus_pcib_detach(device_t dev)
1727 {
1728 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1729 	struct hv_pcibus *hbus = sc->hbus;
1730 	struct pci_message teardown_packet;
1731 	struct pci_bus_relations relations;
1732 	int ret;
1733 
1734 	vmbus_pcib_pre_detach(hbus);
1735 
1736 	if (hbus->state == hv_pcibus_installed)
1737 		bus_generic_detach(dev);
1738 
1739 	/* Delete any children which might still exist. */
1740 	memset(&relations, 0, sizeof(relations));
1741 	hv_pci_devices_present(hbus, &relations);
1742 
1743 	ret = hv_send_resources_released(hbus);
1744 	if (ret)
1745 		device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1746 
1747 	teardown_packet.type = PCI_BUS_D0EXIT;
1748 	ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1749 	    &teardown_packet, sizeof(struct pci_message), 0);
1750 	if (ret)
1751 		device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1752 
1753 	taskqueue_drain_all(hbus->sc->taskq);
1754 	vmbus_chan_close(sc->chan);
1755 	taskqueue_free(sc->taskq);
1756 
1757 	free_completion(&hbus->query_completion);
1758 	free(sc->rx_buf, M_DEVBUF);
1759 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1760 
1761 	mtx_destroy(&hbus->device_list_lock);
1762 	mtx_destroy(&hbus->config_lock);
1763 	free(hbus, M_DEVBUF);
1764 
1765 	return (0);
1766 }
1767 
1768 static int
1769 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1770 {
1771 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1772 
1773 	switch (which) {
1774 	case PCIB_IVAR_DOMAIN:
1775 		*val = sc->hbus->pci_domain;
1776 		return (0);
1777 
1778 	case PCIB_IVAR_BUS:
1779 		/* There is only bus 0. */
1780 		*val = 0;
1781 		return (0);
1782 	}
1783 	return (ENOENT);
1784 }
1785 
1786 static int
1787 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1788 {
1789 	return (ENOENT);
1790 }
1791 
1792 static struct resource *
1793 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1794 	rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1795 {
1796 	unsigned int bar_no;
1797 	struct hv_pci_dev *hpdev;
1798 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1799 	struct resource *res;
1800 	unsigned int devfn;
1801 
1802 	if (type == PCI_RES_BUS)
1803 		return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1804 		    start, end, count, flags));
1805 
1806 	/* Devices with port I/O BAR are not supported. */
1807 	if (type == SYS_RES_IOPORT)
1808 		return (NULL);
1809 
1810 	if (type == SYS_RES_MEMORY) {
1811 		devfn = PCI_DEVFN(pci_get_slot(child),
1812 		    pci_get_function(child));
1813 		hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1814 		if (!hpdev)
1815 			return (NULL);
1816 
1817 		bar_no = PCI_RID2BAR(*rid);
1818 		if (bar_no >= MAX_NUM_BARS)
1819 			return (NULL);
1820 
1821 		/* Make sure a 32-bit BAR gets a 32-bit address */
1822 		if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1823 			end = ulmin(end, 0xFFFFFFFF);
1824 	}
1825 
1826 	res = bus_generic_alloc_resource(dev, child, type, rid,
1827 		start, end, count, flags);
1828 	/*
1829 	 * If this is a request for a specific range, assume it is
1830 	 * correct and pass it up to the parent.
1831 	 */
1832 	if (res == NULL && start + count - 1 == end)
1833 		res = bus_generic_alloc_resource(dev, child, type, rid,
1834 		    start, end, count, flags);
1835 	if (res) {
1836 		device_printf(dev,"vmbus_pcib_alloc_resource is successful\n");
1837 	}
1838 	return (res);
1839 }
1840 
1841 static int
1842 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid,
1843     struct resource *r)
1844 {
1845 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1846 
1847 	if (type == PCI_RES_BUS)
1848 		return (pci_domain_release_bus(sc->hbus->pci_domain, child,
1849 		    rid, r));
1850 
1851 	if (type == SYS_RES_IOPORT)
1852 		return (EINVAL);
1853 
1854 	return (bus_generic_release_resource(dev, child, type, rid, r));
1855 }
1856 
1857 #if __FreeBSD_version >= 1100000
1858 static int
1859 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1860     size_t setsize, cpuset_t *cpuset)
1861 {
1862 	return (bus_get_cpus(pcib, op, setsize, cpuset));
1863 }
1864 #endif
1865 
1866 static uint32_t
1867 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1868     u_int reg, int bytes)
1869 {
1870 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1871 	struct hv_pci_dev *hpdev;
1872 	unsigned int devfn = PCI_DEVFN(slot, func);
1873 	uint32_t data = 0;
1874 
1875 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1876 
1877 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1878 	if (!hpdev)
1879 		return (~0);
1880 
1881 	_hv_pcifront_read_config(hpdev, reg, bytes, &data);
1882 
1883 	return (data);
1884 }
1885 
1886 static void
1887 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1888     u_int reg, uint32_t data, int bytes)
1889 {
1890 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1891 	struct hv_pci_dev *hpdev;
1892 	unsigned int devfn = PCI_DEVFN(slot, func);
1893 
1894 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1895 
1896 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1897 	if (!hpdev)
1898 		return;
1899 
1900 	_hv_pcifront_write_config(hpdev, reg, bytes, data);
1901 }
1902 
1903 static int
1904 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1905 {
1906 	/* We only support MSI/MSI-X and don't support INTx interrupt. */
1907 	return (PCI_INVALID_IRQ);
1908 }
1909 
1910 static int
1911 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1912     int maxcount, int *irqs)
1913 {
1914 #if defined(__amd64__) || defined(__i386__)
1915 	return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1916 	    irqs));
1917 #endif
1918 #if defined(__aarch64__)
1919 	return (intr_alloc_msi(pcib, dev, ACPI_MSI_XREF, count, maxcount,
1920 	    irqs));
1921 #endif
1922 }
1923 
1924 static int
1925 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1926 {
1927 #if defined(__amd64__) || defined(__i386__)
1928 	return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1929 #endif
1930 #if defined(__aarch64__)
1931 	return(intr_release_msi(pcib, dev, ACPI_MSI_XREF, count, irqs));
1932 #endif
1933 }
1934 
1935 static int
1936 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1937 {
1938 #if defined(__aarch64__)
1939 	int ret;
1940 #if defined(INTRNG)
1941 	ret = intr_alloc_msix(pcib, dev, ACPI_MSI_XREF, irq);
1942 	return ret;
1943 #else
1944     return (ENXIO);
1945 #endif
1946 #else
1947 	return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1948 #endif /* __aarch64__ */
1949 }
1950 
1951 static int
1952 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1953 {
1954 #if defined(__aarch64__)
1955 	return (intr_release_msix(pcib, dev, ACPI_MSI_XREF, irq));
1956 #else
1957 	return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1958 #endif /* __aarch64__ */
1959 }
1960 
1961 #if defined(__aarch64__)
1962 #define	MSI_INTEL_ADDR_DEST	0x00000000
1963 #define	MSI_INTEL_DATA_DELFIXED 0x0
1964 #endif
1965 #if defined(__amd64__) || defined(__i386__)
1966 #define MSI_INTEL_ADDR_DEST 0x000ff000
1967 #define MSI_INTEL_DATA_INTVEC   IOART_INTVEC    /* Interrupt vector. */
1968 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1969 #endif
1970 
1971 static int
1972 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1973     uint64_t *addr, uint32_t *data)
1974 {
1975 	unsigned int devfn;
1976 	struct hv_pci_dev *hpdev;
1977 
1978 	uint64_t v_addr;
1979 	uint32_t v_data;
1980 	struct hv_irq_desc *hid, *tmp_hid;
1981 	unsigned int cpu, vcpu_id;
1982 	unsigned int vector;
1983 
1984 	struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1985 	struct compose_comp_ctxt comp;
1986 	struct {
1987 		struct pci_packet pkt;
1988 		union {
1989 			struct pci_create_interrupt v1;
1990 			struct pci_create_interrupt3 v3;
1991 		}int_pkts;
1992 	} ctxt;
1993 	int ret;
1994 	uint32_t size;
1995 
1996 	devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1997 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1998 	if (!hpdev)
1999 		return (ENOENT);
2000 #if defined(__aarch64__)
2001 	ret = intr_map_msi(pcib, child, ACPI_MSI_XREF, irq,
2002 	    &v_addr, &v_data);
2003 #else
2004 	ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
2005             &v_addr, &v_data);
2006 #endif
2007 	if (ret)
2008 		return (ret);
2009 
2010 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
2011 		if (hid->irq == irq) {
2012 			TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
2013 			hv_int_desc_free(hpdev, hid);
2014 			break;
2015 		}
2016 	}
2017 
2018 #if defined(__aarch64__)
2019 	cpu = 0;
2020 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
2021 	vector = v_data;
2022 #else
2023 	cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12;
2024 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
2025 	vector = v_data & MSI_INTEL_DATA_INTVEC;
2026 #endif
2027 
2028 	init_completion(&comp.comp_pkt.host_event);
2029 
2030 	memset(&ctxt, 0, sizeof(ctxt));
2031 	ctxt.pkt.completion_func = hv_pci_compose_compl;
2032 	ctxt.pkt.compl_ctxt = &comp;
2033 	switch (hpdev->hbus->protocol_version) {
2034 	case PCI_PROTOCOL_VERSION_1_1:
2035 		ctxt.int_pkts.v1.message_type.type =
2036 		    PCI_CREATE_INTERRUPT_MESSAGE;
2037 		ctxt.int_pkts.v1.wslot.val = hpdev->desc.wslot.val;
2038 		ctxt.int_pkts.v1.int_desc.vector = vector;
2039 		ctxt.int_pkts.v1.int_desc.vector_count = 1;
2040 		ctxt.int_pkts.v1.int_desc.delivery_mode =
2041 		    MSI_INTEL_DATA_DELFIXED;
2042 		ctxt.int_pkts.v1.int_desc.cpu_mask = 1ULL << vcpu_id;
2043 		size = sizeof(ctxt.int_pkts.v1);
2044 		break;
2045 
2046 	case PCI_PROTOCOL_VERSION_1_4:
2047 		ctxt.int_pkts.v3.message_type.type =
2048 		    PCI_CREATE_INTERRUPT_MESSAGE3;
2049 		ctxt.int_pkts.v3.wslot.val = hpdev->desc.wslot.val;
2050 		ctxt.int_pkts.v3.int_desc.vector = vector;
2051 		ctxt.int_pkts.v3.int_desc.vector_count = 1;
2052 		ctxt.int_pkts.v3.int_desc.reserved = 0;
2053 		ctxt.int_pkts.v3.int_desc.delivery_mode =
2054 		    MSI_INTEL_DATA_DELFIXED;
2055 		ctxt.int_pkts.v3.int_desc.processor_count = 1;
2056 		ctxt.int_pkts.v3.int_desc.processor_array[0] = vcpu_id;
2057 		size = sizeof(ctxt.int_pkts.v3);
2058 		break;
2059 	}
2060 	ret = vmbus_chan_send(sc->chan,	VMBUS_CHANPKT_TYPE_INBAND,
2061 	    VMBUS_CHANPKT_FLAG_RC, &ctxt.int_pkts, size,
2062 	    (uint64_t)(uintptr_t)&ctxt.pkt);
2063 	if (ret) {
2064 		free_completion(&comp.comp_pkt.host_event);
2065 		return (ret);
2066 	}
2067 
2068 	wait_for_completion(&comp.comp_pkt.host_event);
2069 	free_completion(&comp.comp_pkt.host_event);
2070 
2071 	if (comp.comp_pkt.completion_status < 0) {
2072 		device_printf(pcib,
2073 		    "vmbus_pcib_map_msi completion_status %d\n",
2074 		    comp.comp_pkt.completion_status);
2075 		return (EPROTO);
2076 	}
2077 
2078 	*addr = comp.int_desc.address;
2079 	*data = comp.int_desc.data;
2080 
2081 	hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
2082 	hid->irq = irq;
2083 	hid->desc = comp.int_desc;
2084 	TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
2085 
2086 	return (0);
2087 }
2088 
2089 static device_method_t vmbus_pcib_methods[] = {
2090 	/* Device interface */
2091 	DEVMETHOD(device_probe,         vmbus_pcib_probe),
2092 	DEVMETHOD(device_attach,        vmbus_pcib_attach),
2093 	DEVMETHOD(device_detach,        vmbus_pcib_detach),
2094 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
2095 	DEVMETHOD(device_suspend,	bus_generic_suspend),
2096 	DEVMETHOD(device_resume,	bus_generic_resume),
2097 
2098 	/* Bus interface */
2099 	DEVMETHOD(bus_read_ivar,		vmbus_pcib_read_ivar),
2100 	DEVMETHOD(bus_write_ivar,		vmbus_pcib_write_ivar),
2101 	DEVMETHOD(bus_alloc_resource,		vmbus_pcib_alloc_resource),
2102 	DEVMETHOD(bus_release_resource,		vmbus_pcib_release_resource),
2103 	DEVMETHOD(bus_activate_resource,   bus_generic_activate_resource),
2104 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
2105 	DEVMETHOD(bus_setup_intr,	   bus_generic_setup_intr),
2106 	DEVMETHOD(bus_teardown_intr,	   bus_generic_teardown_intr),
2107 #if __FreeBSD_version >= 1100000
2108 	DEVMETHOD(bus_get_cpus,			vmbus_pcib_get_cpus),
2109 #endif
2110 
2111 	/* pcib interface */
2112 	DEVMETHOD(pcib_maxslots,		pcib_maxslots),
2113 	DEVMETHOD(pcib_read_config,		vmbus_pcib_read_config),
2114 	DEVMETHOD(pcib_write_config,		vmbus_pcib_write_config),
2115 	DEVMETHOD(pcib_route_interrupt,		vmbus_pcib_route_intr),
2116 	DEVMETHOD(pcib_alloc_msi,		vmbus_pcib_alloc_msi),
2117 	DEVMETHOD(pcib_release_msi,		vmbus_pcib_release_msi),
2118 	DEVMETHOD(pcib_alloc_msix,		vmbus_pcib_alloc_msix),
2119 	DEVMETHOD(pcib_release_msix,		vmbus_pcib_release_msix),
2120 	DEVMETHOD(pcib_map_msi,			vmbus_pcib_map_msi),
2121 	DEVMETHOD(pcib_request_feature,		pcib_request_feature_allow),
2122 
2123 	DEVMETHOD_END
2124 };
2125 
2126 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
2127 		sizeof(struct vmbus_pcib_softc));
2128 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0);
2129 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
2130 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
2131 
2132 #endif /* NEW_PCIB */
2133