xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision af6a5351a1fdb1130f18be6c782c4d48916eb971)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/taskqueue.h>
46 
47 #include <machine/bus.h>
48 #include <machine/intr_machdep.h>
49 #include <machine/resource.h>
50 #include <x86/include/apicvar.h>
51 
52 #include <contrib/dev/acpica/include/acpi.h>
53 #include <dev/acpica/acpivar.h>
54 
55 #include <dev/hyperv/include/hyperv.h>
56 #include <dev/hyperv/include/vmbus_xact.h>
57 #include <dev/hyperv/vmbus/hyperv_reg.h>
58 #include <dev/hyperv/vmbus/hyperv_var.h>
59 #include <dev/hyperv/vmbus/vmbus_reg.h>
60 #include <dev/hyperv/vmbus/vmbus_var.h>
61 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
62 
63 #include "acpi_if.h"
64 #include "pcib_if.h"
65 #include "vmbus_if.h"
66 
67 #define VMBUS_GPADL_START		0xe1e10
68 
69 struct vmbus_msghc {
70 	struct vmbus_xact		*mh_xact;
71 	struct hypercall_postmsg_in	mh_inprm_save;
72 };
73 
74 static int			vmbus_probe(device_t);
75 static int			vmbus_attach(device_t);
76 static int			vmbus_detach(device_t);
77 static int			vmbus_read_ivar(device_t, device_t, int,
78 				    uintptr_t *);
79 static int			vmbus_child_pnpinfo_str(device_t, device_t,
80 				    char *, size_t);
81 static struct resource		*vmbus_alloc_resource(device_t dev,
82 				    device_t child, int type, int *rid,
83 				    rman_res_t start, rman_res_t end,
84 				    rman_res_t count, u_int flags);
85 static int			vmbus_alloc_msi(device_t bus, device_t dev,
86 				    int count, int maxcount, int *irqs);
87 static int			vmbus_release_msi(device_t bus, device_t dev,
88 				    int count, int *irqs);
89 static int			vmbus_alloc_msix(device_t bus, device_t dev,
90 				    int *irq);
91 static int			vmbus_release_msix(device_t bus, device_t dev,
92 				    int irq);
93 static int			vmbus_map_msi(device_t bus, device_t dev,
94 				    int irq, uint64_t *addr, uint32_t *data);
95 static uint32_t			vmbus_get_version_method(device_t, device_t);
96 static int			vmbus_probe_guid_method(device_t, device_t,
97 				    const struct hyperv_guid *);
98 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
99 				    device_t dev, int cpu);
100 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
101 				    int);
102 #ifdef EARLY_AP_STARTUP
103 static void			vmbus_intrhook(void *);
104 #endif
105 
106 static int			vmbus_init(struct vmbus_softc *);
107 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
108 static int			vmbus_req_channels(struct vmbus_softc *sc);
109 static void			vmbus_disconnect(struct vmbus_softc *);
110 static int			vmbus_scan(struct vmbus_softc *);
111 static void			vmbus_scan_teardown(struct vmbus_softc *);
112 static void			vmbus_scan_done(struct vmbus_softc *,
113 				    const struct vmbus_message *);
114 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
115 				    const struct vmbus_message *);
116 static void			vmbus_msg_task(void *, int);
117 static void			vmbus_synic_setup(void *);
118 static void			vmbus_synic_teardown(void *);
119 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
120 static int			vmbus_dma_alloc(struct vmbus_softc *);
121 static void			vmbus_dma_free(struct vmbus_softc *);
122 static int			vmbus_intr_setup(struct vmbus_softc *);
123 static void			vmbus_intr_teardown(struct vmbus_softc *);
124 static int			vmbus_doattach(struct vmbus_softc *);
125 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
126 				    int);
127 
128 static struct vmbus_softc	*vmbus_sc;
129 
130 extern inthand_t IDTVEC(vmbus_isr);
131 
132 static const uint32_t		vmbus_version[] = {
133 	VMBUS_VERSION_WIN8_1,
134 	VMBUS_VERSION_WIN8,
135 	VMBUS_VERSION_WIN7,
136 	VMBUS_VERSION_WS2008
137 };
138 
139 static const vmbus_chanmsg_proc_t
140 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
141 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
142 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
143 };
144 
145 static device_method_t vmbus_methods[] = {
146 	/* Device interface */
147 	DEVMETHOD(device_probe,			vmbus_probe),
148 	DEVMETHOD(device_attach,		vmbus_attach),
149 	DEVMETHOD(device_detach,		vmbus_detach),
150 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
151 	DEVMETHOD(device_suspend,		bus_generic_suspend),
152 	DEVMETHOD(device_resume,		bus_generic_resume),
153 
154 	/* Bus interface */
155 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
156 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
157 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
158 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
159 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
160 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
161 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
162 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
163 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
164 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
165 #if __FreeBSD_version >= 1100000
166 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
167 #endif
168 
169 	/* pcib interface */
170 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
171 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
172 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
173 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
174 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
175 
176 	/* Vmbus interface */
177 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
178 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
179 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
180 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
181 
182 	DEVMETHOD_END
183 };
184 
185 static driver_t vmbus_driver = {
186 	"vmbus",
187 	vmbus_methods,
188 	sizeof(struct vmbus_softc)
189 };
190 
191 static devclass_t vmbus_devclass;
192 
193 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
194 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
195 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
196 MODULE_VERSION(vmbus, 1);
197 
198 static __inline struct vmbus_softc *
199 vmbus_get_softc(void)
200 {
201 	return vmbus_sc;
202 }
203 
204 void
205 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
206 {
207 	struct hypercall_postmsg_in *inprm;
208 
209 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
210 		panic("invalid data size %zu", dsize);
211 
212 	inprm = vmbus_xact_req_data(mh->mh_xact);
213 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
214 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
215 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
216 	inprm->hc_dsize = dsize;
217 }
218 
219 struct vmbus_msghc *
220 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
221 {
222 	struct vmbus_msghc *mh;
223 	struct vmbus_xact *xact;
224 
225 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
226 		panic("invalid data size %zu", dsize);
227 
228 	xact = vmbus_xact_get(sc->vmbus_xc,
229 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
230 	if (xact == NULL)
231 		return (NULL);
232 
233 	mh = vmbus_xact_priv(xact, sizeof(*mh));
234 	mh->mh_xact = xact;
235 
236 	vmbus_msghc_reset(mh, dsize);
237 	return (mh);
238 }
239 
240 void
241 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
242 {
243 
244 	vmbus_xact_put(mh->mh_xact);
245 }
246 
247 void *
248 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
249 {
250 	struct hypercall_postmsg_in *inprm;
251 
252 	inprm = vmbus_xact_req_data(mh->mh_xact);
253 	return (inprm->hc_data);
254 }
255 
256 int
257 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
258 {
259 	sbintime_t time = SBT_1MS;
260 	struct hypercall_postmsg_in *inprm;
261 	bus_addr_t inprm_paddr;
262 	int i;
263 
264 	inprm = vmbus_xact_req_data(mh->mh_xact);
265 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
266 
267 	/*
268 	 * Save the input parameter so that we could restore the input
269 	 * parameter if the Hypercall failed.
270 	 *
271 	 * XXX
272 	 * Is this really necessary?!  i.e. Will the Hypercall ever
273 	 * overwrite the input parameter?
274 	 */
275 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
276 
277 	/*
278 	 * In order to cope with transient failures, e.g. insufficient
279 	 * resources on host side, we retry the post message Hypercall
280 	 * several times.  20 retries seem sufficient.
281 	 */
282 #define HC_RETRY_MAX	20
283 
284 	for (i = 0; i < HC_RETRY_MAX; ++i) {
285 		uint64_t status;
286 
287 		status = hypercall_post_message(inprm_paddr);
288 		if (status == HYPERCALL_STATUS_SUCCESS)
289 			return 0;
290 
291 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
292 		if (time < SBT_1S * 2)
293 			time *= 2;
294 
295 		/* Restore input parameter and try again */
296 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
297 	}
298 
299 #undef HC_RETRY_MAX
300 
301 	return EIO;
302 }
303 
304 int
305 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
306 {
307 	int error;
308 
309 	vmbus_xact_activate(mh->mh_xact);
310 	error = vmbus_msghc_exec_noresult(mh);
311 	if (error)
312 		vmbus_xact_deactivate(mh->mh_xact);
313 	return error;
314 }
315 
316 void
317 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
318 {
319 
320 	vmbus_xact_deactivate(mh->mh_xact);
321 }
322 
323 const struct vmbus_message *
324 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
325 {
326 	size_t resp_len;
327 
328 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
329 }
330 
331 const struct vmbus_message *
332 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
333 {
334 	size_t resp_len;
335 
336 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
337 }
338 
339 void
340 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
341 {
342 
343 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
344 }
345 
346 uint32_t
347 vmbus_gpadl_alloc(struct vmbus_softc *sc)
348 {
349 	uint32_t gpadl;
350 
351 again:
352 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
353 	if (gpadl == 0)
354 		goto again;
355 	return (gpadl);
356 }
357 
358 static int
359 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
360 {
361 	struct vmbus_chanmsg_connect *req;
362 	const struct vmbus_message *msg;
363 	struct vmbus_msghc *mh;
364 	int error, done = 0;
365 
366 	mh = vmbus_msghc_get(sc, sizeof(*req));
367 	if (mh == NULL)
368 		return ENXIO;
369 
370 	req = vmbus_msghc_dataptr(mh);
371 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
372 	req->chm_ver = version;
373 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
374 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
375 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
376 
377 	error = vmbus_msghc_exec(sc, mh);
378 	if (error) {
379 		vmbus_msghc_put(sc, mh);
380 		return error;
381 	}
382 
383 	msg = vmbus_msghc_wait_result(sc, mh);
384 	done = ((const struct vmbus_chanmsg_connect_resp *)
385 	    msg->msg_data)->chm_done;
386 
387 	vmbus_msghc_put(sc, mh);
388 
389 	return (done ? 0 : EOPNOTSUPP);
390 }
391 
392 static int
393 vmbus_init(struct vmbus_softc *sc)
394 {
395 	int i;
396 
397 	for (i = 0; i < nitems(vmbus_version); ++i) {
398 		int error;
399 
400 		error = vmbus_connect(sc, vmbus_version[i]);
401 		if (!error) {
402 			sc->vmbus_version = vmbus_version[i];
403 			device_printf(sc->vmbus_dev, "version %u.%u\n",
404 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
405 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
406 			return 0;
407 		}
408 	}
409 	return ENXIO;
410 }
411 
412 static void
413 vmbus_disconnect(struct vmbus_softc *sc)
414 {
415 	struct vmbus_chanmsg_disconnect *req;
416 	struct vmbus_msghc *mh;
417 	int error;
418 
419 	mh = vmbus_msghc_get(sc, sizeof(*req));
420 	if (mh == NULL) {
421 		device_printf(sc->vmbus_dev,
422 		    "can not get msg hypercall for disconnect\n");
423 		return;
424 	}
425 
426 	req = vmbus_msghc_dataptr(mh);
427 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
428 
429 	error = vmbus_msghc_exec_noresult(mh);
430 	vmbus_msghc_put(sc, mh);
431 
432 	if (error) {
433 		device_printf(sc->vmbus_dev,
434 		    "disconnect msg hypercall failed\n");
435 	}
436 }
437 
438 static int
439 vmbus_req_channels(struct vmbus_softc *sc)
440 {
441 	struct vmbus_chanmsg_chrequest *req;
442 	struct vmbus_msghc *mh;
443 	int error;
444 
445 	mh = vmbus_msghc_get(sc, sizeof(*req));
446 	if (mh == NULL)
447 		return ENXIO;
448 
449 	req = vmbus_msghc_dataptr(mh);
450 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
451 
452 	error = vmbus_msghc_exec_noresult(mh);
453 	vmbus_msghc_put(sc, mh);
454 
455 	return error;
456 }
457 
458 static void
459 vmbus_scan_done_task(void *xsc, int pending __unused)
460 {
461 	struct vmbus_softc *sc = xsc;
462 
463 	mtx_lock(&Giant);
464 	sc->vmbus_scandone = true;
465 	mtx_unlock(&Giant);
466 	wakeup(&sc->vmbus_scandone);
467 }
468 
469 static void
470 vmbus_scan_done(struct vmbus_softc *sc,
471     const struct vmbus_message *msg __unused)
472 {
473 
474 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
475 }
476 
477 static int
478 vmbus_scan(struct vmbus_softc *sc)
479 {
480 	int error;
481 
482 	/*
483 	 * Identify, probe and attach for non-channel devices.
484 	 */
485 	bus_generic_probe(sc->vmbus_dev);
486 	bus_generic_attach(sc->vmbus_dev);
487 
488 	/*
489 	 * This taskqueue serializes vmbus devices' attach and detach
490 	 * for channel offer and rescind messages.
491 	 */
492 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
493 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
494 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
495 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
496 
497 	/*
498 	 * This taskqueue handles sub-channel detach, so that vmbus
499 	 * device's detach running in vmbus_devtq can drain its sub-
500 	 * channels.
501 	 */
502 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
503 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
504 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
505 
506 	/*
507 	 * Start vmbus scanning.
508 	 */
509 	error = vmbus_req_channels(sc);
510 	if (error) {
511 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
512 		    error);
513 		return (error);
514 	}
515 
516 	/*
517 	 * Wait for all vmbus devices from the initial channel offers to be
518 	 * attached.
519 	 */
520 	GIANT_REQUIRED;
521 	while (!sc->vmbus_scandone)
522 		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
523 
524 	if (bootverbose) {
525 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
526 		    "done\n");
527 	}
528 	return (0);
529 }
530 
531 static void
532 vmbus_scan_teardown(struct vmbus_softc *sc)
533 {
534 
535 	GIANT_REQUIRED;
536 	if (sc->vmbus_devtq != NULL) {
537 		mtx_unlock(&Giant);
538 		taskqueue_free(sc->vmbus_devtq);
539 		mtx_lock(&Giant);
540 		sc->vmbus_devtq = NULL;
541 	}
542 	if (sc->vmbus_subchtq != NULL) {
543 		mtx_unlock(&Giant);
544 		taskqueue_free(sc->vmbus_subchtq);
545 		mtx_lock(&Giant);
546 		sc->vmbus_subchtq = NULL;
547 	}
548 }
549 
550 static void
551 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
552 {
553 	vmbus_chanmsg_proc_t msg_proc;
554 	uint32_t msg_type;
555 
556 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
557 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
558 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
559 		    msg_type);
560 		return;
561 	}
562 
563 	msg_proc = vmbus_chanmsg_handlers[msg_type];
564 	if (msg_proc != NULL)
565 		msg_proc(sc, msg);
566 
567 	/* Channel specific processing */
568 	vmbus_chan_msgproc(sc, msg);
569 }
570 
571 static void
572 vmbus_msg_task(void *xsc, int pending __unused)
573 {
574 	struct vmbus_softc *sc = xsc;
575 	volatile struct vmbus_message *msg;
576 
577 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
578 	for (;;) {
579 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
580 			/* No message */
581 			break;
582 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
583 			/* Channel message */
584 			vmbus_chanmsg_handle(sc,
585 			    __DEVOLATILE(const struct vmbus_message *, msg));
586 		}
587 
588 		msg->msg_type = HYPERV_MSGTYPE_NONE;
589 		/*
590 		 * Make sure the write to msg_type (i.e. set to
591 		 * HYPERV_MSGTYPE_NONE) happens before we read the
592 		 * msg_flags and EOMing. Otherwise, the EOMing will
593 		 * not deliver any more messages since there is no
594 		 * empty slot
595 		 *
596 		 * NOTE:
597 		 * mb() is used here, since atomic_thread_fence_seq_cst()
598 		 * will become compiler fence on UP kernel.
599 		 */
600 		mb();
601 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
602 			/*
603 			 * This will cause message queue rescan to possibly
604 			 * deliver another msg from the hypervisor
605 			 */
606 			wrmsr(MSR_HV_EOM, 0);
607 		}
608 	}
609 }
610 
611 static __inline int
612 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
613 {
614 	volatile struct vmbus_message *msg;
615 	struct vmbus_message *msg_base;
616 
617 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
618 
619 	/*
620 	 * Check event timer.
621 	 *
622 	 * TODO: move this to independent IDT vector.
623 	 */
624 	msg = msg_base + VMBUS_SINT_TIMER;
625 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
626 		msg->msg_type = HYPERV_MSGTYPE_NONE;
627 
628 		vmbus_et_intr(frame);
629 
630 		/*
631 		 * Make sure the write to msg_type (i.e. set to
632 		 * HYPERV_MSGTYPE_NONE) happens before we read the
633 		 * msg_flags and EOMing. Otherwise, the EOMing will
634 		 * not deliver any more messages since there is no
635 		 * empty slot
636 		 *
637 		 * NOTE:
638 		 * mb() is used here, since atomic_thread_fence_seq_cst()
639 		 * will become compiler fence on UP kernel.
640 		 */
641 		mb();
642 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
643 			/*
644 			 * This will cause message queue rescan to possibly
645 			 * deliver another msg from the hypervisor
646 			 */
647 			wrmsr(MSR_HV_EOM, 0);
648 		}
649 	}
650 
651 	/*
652 	 * Check events.  Hot path for network and storage I/O data; high rate.
653 	 *
654 	 * NOTE:
655 	 * As recommended by the Windows guest fellows, we check events before
656 	 * checking messages.
657 	 */
658 	sc->vmbus_event_proc(sc, cpu);
659 
660 	/*
661 	 * Check messages.  Mainly management stuffs; ultra low rate.
662 	 */
663 	msg = msg_base + VMBUS_SINT_MESSAGE;
664 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
665 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
666 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
667 	}
668 
669 	return (FILTER_HANDLED);
670 }
671 
672 void
673 vmbus_handle_intr(struct trapframe *trap_frame)
674 {
675 	struct vmbus_softc *sc = vmbus_get_softc();
676 	int cpu = curcpu;
677 
678 	/*
679 	 * Disable preemption.
680 	 */
681 	critical_enter();
682 
683 	/*
684 	 * Do a little interrupt counting.
685 	 */
686 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
687 
688 	vmbus_handle_intr1(sc, trap_frame, cpu);
689 
690 	/*
691 	 * Enable preemption.
692 	 */
693 	critical_exit();
694 }
695 
696 static void
697 vmbus_synic_setup(void *xsc)
698 {
699 	struct vmbus_softc *sc = xsc;
700 	int cpu = curcpu;
701 	uint64_t val, orig;
702 	uint32_t sint;
703 
704 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
705 		/* Save virtual processor id. */
706 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
707 	} else {
708 		/* Set virtual processor id to 0 for compatibility. */
709 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
710 	}
711 
712 	/*
713 	 * Setup the SynIC message.
714 	 */
715 	orig = rdmsr(MSR_HV_SIMP);
716 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
717 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
718 	     MSR_HV_SIMP_PGSHIFT);
719 	wrmsr(MSR_HV_SIMP, val);
720 
721 	/*
722 	 * Setup the SynIC event flags.
723 	 */
724 	orig = rdmsr(MSR_HV_SIEFP);
725 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
726 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
727 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
728 	wrmsr(MSR_HV_SIEFP, val);
729 
730 
731 	/*
732 	 * Configure and unmask SINT for message and event flags.
733 	 */
734 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
735 	orig = rdmsr(sint);
736 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
737 	    (orig & MSR_HV_SINT_RSVD_MASK);
738 	wrmsr(sint, val);
739 
740 	/*
741 	 * Configure and unmask SINT for timer.
742 	 */
743 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
744 	orig = rdmsr(sint);
745 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
746 	    (orig & MSR_HV_SINT_RSVD_MASK);
747 	wrmsr(sint, val);
748 
749 	/*
750 	 * All done; enable SynIC.
751 	 */
752 	orig = rdmsr(MSR_HV_SCONTROL);
753 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
754 	wrmsr(MSR_HV_SCONTROL, val);
755 }
756 
757 static void
758 vmbus_synic_teardown(void *arg)
759 {
760 	uint64_t orig;
761 	uint32_t sint;
762 
763 	/*
764 	 * Disable SynIC.
765 	 */
766 	orig = rdmsr(MSR_HV_SCONTROL);
767 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
768 
769 	/*
770 	 * Mask message and event flags SINT.
771 	 */
772 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
773 	orig = rdmsr(sint);
774 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
775 
776 	/*
777 	 * Mask timer SINT.
778 	 */
779 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
780 	orig = rdmsr(sint);
781 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
782 
783 	/*
784 	 * Teardown SynIC message.
785 	 */
786 	orig = rdmsr(MSR_HV_SIMP);
787 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
788 
789 	/*
790 	 * Teardown SynIC event flags.
791 	 */
792 	orig = rdmsr(MSR_HV_SIEFP);
793 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
794 }
795 
796 static int
797 vmbus_dma_alloc(struct vmbus_softc *sc)
798 {
799 	bus_dma_tag_t parent_dtag;
800 	uint8_t *evtflags;
801 	int cpu;
802 
803 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
804 	CPU_FOREACH(cpu) {
805 		void *ptr;
806 
807 		/*
808 		 * Per-cpu messages and event flags.
809 		 */
810 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
811 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
812 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
813 		if (ptr == NULL)
814 			return ENOMEM;
815 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
816 
817 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
818 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
819 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
820 		if (ptr == NULL)
821 			return ENOMEM;
822 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
823 	}
824 
825 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
826 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
827 	if (evtflags == NULL)
828 		return ENOMEM;
829 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
830 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
831 	sc->vmbus_evtflags = evtflags;
832 
833 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
834 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
835 	if (sc->vmbus_mnf1 == NULL)
836 		return ENOMEM;
837 
838 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
839 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
840 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
841 	if (sc->vmbus_mnf2 == NULL)
842 		return ENOMEM;
843 
844 	return 0;
845 }
846 
847 static void
848 vmbus_dma_free(struct vmbus_softc *sc)
849 {
850 	int cpu;
851 
852 	if (sc->vmbus_evtflags != NULL) {
853 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
854 		sc->vmbus_evtflags = NULL;
855 		sc->vmbus_rx_evtflags = NULL;
856 		sc->vmbus_tx_evtflags = NULL;
857 	}
858 	if (sc->vmbus_mnf1 != NULL) {
859 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
860 		sc->vmbus_mnf1 = NULL;
861 	}
862 	if (sc->vmbus_mnf2 != NULL) {
863 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
864 		sc->vmbus_mnf2 = NULL;
865 	}
866 
867 	CPU_FOREACH(cpu) {
868 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
869 			hyperv_dmamem_free(
870 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
871 			    VMBUS_PCPU_GET(sc, message, cpu));
872 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
873 		}
874 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
875 			hyperv_dmamem_free(
876 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
877 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
878 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
879 		}
880 	}
881 }
882 
883 static int
884 vmbus_intr_setup(struct vmbus_softc *sc)
885 {
886 	int cpu;
887 
888 	CPU_FOREACH(cpu) {
889 		char buf[MAXCOMLEN + 1];
890 		cpuset_t cpu_mask;
891 
892 		/* Allocate an interrupt counter for Hyper-V interrupt */
893 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
894 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
895 
896 		/*
897 		 * Setup taskqueue to handle events.  Task will be per-
898 		 * channel.
899 		 */
900 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
901 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
902 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
903 		CPU_SETOF(cpu, &cpu_mask);
904 		taskqueue_start_threads_cpuset(
905 		    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
906 		    "hvevent%d", cpu);
907 
908 		/*
909 		 * Setup tasks and taskqueues to handle messages.
910 		 */
911 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
912 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
913 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
914 		CPU_SETOF(cpu, &cpu_mask);
915 		taskqueue_start_threads_cpuset(
916 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
917 		    "hvmsg%d", cpu);
918 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
919 		    vmbus_msg_task, sc);
920 	}
921 
922 	/*
923 	 * All Hyper-V ISR required resources are setup, now let's find a
924 	 * free IDT vector for Hyper-V ISR and set it up.
925 	 */
926 	sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
927 	if (sc->vmbus_idtvec < 0) {
928 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
929 		return ENXIO;
930 	}
931 	if (bootverbose) {
932 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
933 		    sc->vmbus_idtvec);
934 	}
935 	return 0;
936 }
937 
938 static void
939 vmbus_intr_teardown(struct vmbus_softc *sc)
940 {
941 	int cpu;
942 
943 	if (sc->vmbus_idtvec >= 0) {
944 		lapic_ipi_free(sc->vmbus_idtvec);
945 		sc->vmbus_idtvec = -1;
946 	}
947 
948 	CPU_FOREACH(cpu) {
949 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
950 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
951 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
952 		}
953 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
954 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
955 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
956 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
957 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
958 		}
959 	}
960 }
961 
962 static int
963 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
964 {
965 	return (ENOENT);
966 }
967 
968 static int
969 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
970 {
971 	const struct vmbus_channel *chan;
972 	char guidbuf[HYPERV_GUID_STRLEN];
973 
974 	chan = vmbus_get_channel(child);
975 	if (chan == NULL) {
976 		/* Event timer device, which does not belong to a channel */
977 		return (0);
978 	}
979 
980 	strlcat(buf, "classid=", buflen);
981 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
982 	strlcat(buf, guidbuf, buflen);
983 
984 	strlcat(buf, " deviceid=", buflen);
985 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
986 	strlcat(buf, guidbuf, buflen);
987 
988 	return (0);
989 }
990 
991 int
992 vmbus_add_child(struct vmbus_channel *chan)
993 {
994 	struct vmbus_softc *sc = chan->ch_vmbus;
995 	device_t parent = sc->vmbus_dev;
996 
997 	mtx_lock(&Giant);
998 
999 	chan->ch_dev = device_add_child(parent, NULL, -1);
1000 	if (chan->ch_dev == NULL) {
1001 		mtx_unlock(&Giant);
1002 		device_printf(parent, "device_add_child for chan%u failed\n",
1003 		    chan->ch_id);
1004 		return (ENXIO);
1005 	}
1006 	device_set_ivars(chan->ch_dev, chan);
1007 	device_probe_and_attach(chan->ch_dev);
1008 
1009 	mtx_unlock(&Giant);
1010 	return (0);
1011 }
1012 
1013 int
1014 vmbus_delete_child(struct vmbus_channel *chan)
1015 {
1016 	int error = 0;
1017 
1018 	mtx_lock(&Giant);
1019 	if (chan->ch_dev != NULL) {
1020 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1021 		    chan->ch_dev);
1022 		chan->ch_dev = NULL;
1023 	}
1024 	mtx_unlock(&Giant);
1025 	return (error);
1026 }
1027 
1028 static int
1029 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1030 {
1031 	struct vmbus_softc *sc = arg1;
1032 	char verstr[16];
1033 
1034 	snprintf(verstr, sizeof(verstr), "%u.%u",
1035 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1036 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1037 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1038 }
1039 
1040 /*
1041  * We need the function to make sure the MMIO resource is allocated from the
1042  * ranges found in _CRS.
1043  *
1044  * For the release function, we can use bus_generic_release_resource().
1045  */
1046 static struct resource *
1047 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1048     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1049 {
1050 	device_t parent = device_get_parent(dev);
1051 	struct resource *res;
1052 
1053 #ifdef NEW_PCIB
1054 	if (type == SYS_RES_MEMORY) {
1055 		struct vmbus_softc *sc = device_get_softc(dev);
1056 
1057 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1058 		    rid, start, end, count, flags);
1059 	} else
1060 #endif
1061 	{
1062 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1063 		    end, count, flags);
1064 	}
1065 
1066 	return (res);
1067 }
1068 
1069 static device_t
1070 get_nexus(device_t vmbus)
1071 {
1072 	device_t acpi = device_get_parent(vmbus);
1073 	device_t nexus = device_get_parent(acpi);
1074 	return (nexus);
1075 }
1076 
1077 static int
1078 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1079 {
1080 	return (PCIB_ALLOC_MSI(get_nexus(bus), dev, count, maxcount, irqs));
1081 }
1082 
1083 static int
1084 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1085 {
1086 	return (PCIB_RELEASE_MSI(get_nexus(bus), dev, count, irqs));
1087 }
1088 
1089 static int
1090 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1091 {
1092 	return (PCIB_ALLOC_MSIX(get_nexus(bus), dev, irq));
1093 }
1094 
1095 static int
1096 vmbus_release_msix(device_t bus, device_t dev, int irq)
1097 {
1098 	return (PCIB_RELEASE_MSIX(get_nexus(bus), dev, irq));
1099 }
1100 
1101 static int
1102 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1103 	uint32_t *data)
1104 {
1105 	return (PCIB_MAP_MSI(get_nexus(bus), dev, irq, addr, data));
1106 }
1107 
1108 static uint32_t
1109 vmbus_get_version_method(device_t bus, device_t dev)
1110 {
1111 	struct vmbus_softc *sc = device_get_softc(bus);
1112 
1113 	return sc->vmbus_version;
1114 }
1115 
1116 static int
1117 vmbus_probe_guid_method(device_t bus, device_t dev,
1118     const struct hyperv_guid *guid)
1119 {
1120 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1121 
1122 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1123 		return 0;
1124 	return ENXIO;
1125 }
1126 
1127 static uint32_t
1128 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1129 {
1130 	const struct vmbus_softc *sc = device_get_softc(bus);
1131 
1132 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1133 }
1134 
1135 static struct taskqueue *
1136 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1137 {
1138 	const struct vmbus_softc *sc = device_get_softc(bus);
1139 
1140 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1141 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1142 }
1143 
1144 #ifdef NEW_PCIB
1145 #define VTPM_BASE_ADDR 0xfed40000
1146 #define FOUR_GB (1ULL << 32)
1147 
1148 enum parse_pass { parse_64, parse_32 };
1149 
1150 struct parse_context {
1151 	device_t vmbus_dev;
1152 	enum parse_pass pass;
1153 };
1154 
1155 static ACPI_STATUS
1156 parse_crs(ACPI_RESOURCE *res, void *ctx)
1157 {
1158 	const struct parse_context *pc = ctx;
1159 	device_t vmbus_dev = pc->vmbus_dev;
1160 
1161 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1162 	UINT64 start, end;
1163 
1164 	switch (res->Type) {
1165 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1166 		start = res->Data.Address32.Address.Minimum;
1167 		end = res->Data.Address32.Address.Maximum;
1168 		break;
1169 
1170 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1171 		start = res->Data.Address64.Address.Minimum;
1172 		end = res->Data.Address64.Address.Maximum;
1173 		break;
1174 
1175 	default:
1176 		/* Unused types. */
1177 		return (AE_OK);
1178 	}
1179 
1180 	/*
1181 	 * We don't use <1MB addresses.
1182 	 */
1183 	if (end < 0x100000)
1184 		return (AE_OK);
1185 
1186 	/* Don't conflict with vTPM. */
1187 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1188 		end = VTPM_BASE_ADDR - 1;
1189 
1190 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1191 	    (pc->pass == parse_64 && start >= FOUR_GB))
1192 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1193 		    start, end, 0);
1194 
1195 	return (AE_OK);
1196 }
1197 
1198 static void
1199 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1200 {
1201 	struct parse_context pc;
1202 	ACPI_STATUS status;
1203 
1204 	if (bootverbose)
1205 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1206 
1207 	pc.vmbus_dev = vmbus_dev;
1208 	pc.pass = pass;
1209 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1210 			parse_crs, &pc);
1211 
1212 	if (bootverbose && ACPI_FAILURE(status))
1213 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1214 }
1215 
1216 static void
1217 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1218 {
1219 	device_t acpi0, pcib0 = NULL;
1220 	device_t *children;
1221 	int i, count;
1222 
1223 	/* Try to find _CRS on VMBus device */
1224 	vmbus_get_crs(dev, dev, pass);
1225 
1226 	/* Try to find _CRS on VMBus device's parent */
1227 	acpi0 = device_get_parent(dev);
1228 	vmbus_get_crs(acpi0, dev, pass);
1229 
1230 	/* Try to locate pcib0 and find _CRS on it */
1231 	if (device_get_children(acpi0, &children, &count) != 0)
1232 		return;
1233 
1234 	for (i = 0; i < count; i++) {
1235 		if (!device_is_attached(children[i]))
1236 			continue;
1237 
1238 		if (strcmp("pcib0", device_get_nameunit(children[i])))
1239 			continue;
1240 
1241 		pcib0 = children[i];
1242 		break;
1243 	}
1244 
1245 	if (pcib0)
1246 		vmbus_get_crs(pcib0, dev, pass);
1247 
1248 	free(children, M_TEMP);
1249 }
1250 
1251 static void
1252 vmbus_get_mmio_res(device_t dev)
1253 {
1254 	struct vmbus_softc *sc = device_get_softc(dev);
1255 	/*
1256 	 * We walk the resources twice to make sure that: in the resource
1257 	 * list, the 32-bit resources appear behind the 64-bit resources.
1258 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1259 	 * iterate through the list to find a range for a 64-bit BAR in
1260 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1261 	 * ranges first.
1262 	 */
1263 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1264 
1265 	vmbus_get_mmio_res_pass(dev, parse_64);
1266 	vmbus_get_mmio_res_pass(dev, parse_32);
1267 }
1268 
1269 static void
1270 vmbus_free_mmio_res(device_t dev)
1271 {
1272 	struct vmbus_softc *sc = device_get_softc(dev);
1273 
1274 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1275 }
1276 #endif	/* NEW_PCIB */
1277 
1278 static int
1279 vmbus_probe(device_t dev)
1280 {
1281 	char *id[] = { "VMBUS", NULL };
1282 
1283 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
1284 	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1285 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1286 		return (ENXIO);
1287 
1288 	device_set_desc(dev, "Hyper-V Vmbus");
1289 
1290 	return (BUS_PROBE_DEFAULT);
1291 }
1292 
1293 /**
1294  * @brief Main vmbus driver initialization routine.
1295  *
1296  * Here, we
1297  * - initialize the vmbus driver context
1298  * - setup various driver entry points
1299  * - invoke the vmbus hv main init routine
1300  * - get the irq resource
1301  * - invoke the vmbus to add the vmbus root device
1302  * - setup the vmbus root device
1303  * - retrieve the channel offers
1304  */
1305 static int
1306 vmbus_doattach(struct vmbus_softc *sc)
1307 {
1308 	struct sysctl_oid_list *child;
1309 	struct sysctl_ctx_list *ctx;
1310 	int ret;
1311 
1312 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1313 		return (0);
1314 
1315 #ifdef NEW_PCIB
1316 	vmbus_get_mmio_res(sc->vmbus_dev);
1317 #endif
1318 
1319 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1320 
1321 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1322 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1323 	TAILQ_INIT(&sc->vmbus_prichans);
1324 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1325 	TAILQ_INIT(&sc->vmbus_chans);
1326 	sc->vmbus_chmap = malloc(
1327 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1328 	    M_WAITOK | M_ZERO);
1329 
1330 	/*
1331 	 * Create context for "post message" Hypercalls
1332 	 */
1333 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1334 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1335 	    sizeof(struct vmbus_msghc));
1336 	if (sc->vmbus_xc == NULL) {
1337 		ret = ENXIO;
1338 		goto cleanup;
1339 	}
1340 
1341 	/*
1342 	 * Allocate DMA stuffs.
1343 	 */
1344 	ret = vmbus_dma_alloc(sc);
1345 	if (ret != 0)
1346 		goto cleanup;
1347 
1348 	/*
1349 	 * Setup interrupt.
1350 	 */
1351 	ret = vmbus_intr_setup(sc);
1352 	if (ret != 0)
1353 		goto cleanup;
1354 
1355 	/*
1356 	 * Setup SynIC.
1357 	 */
1358 	if (bootverbose)
1359 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1360 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1361 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1362 
1363 	/*
1364 	 * Initialize vmbus, e.g. connect to Hypervisor.
1365 	 */
1366 	ret = vmbus_init(sc);
1367 	if (ret != 0)
1368 		goto cleanup;
1369 
1370 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1371 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1372 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1373 	else
1374 		sc->vmbus_event_proc = vmbus_event_proc;
1375 
1376 	ret = vmbus_scan(sc);
1377 	if (ret != 0)
1378 		goto cleanup;
1379 
1380 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1381 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1382 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1383 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1384 	    vmbus_sysctl_version, "A", "vmbus version");
1385 
1386 	return (ret);
1387 
1388 cleanup:
1389 	vmbus_scan_teardown(sc);
1390 	vmbus_intr_teardown(sc);
1391 	vmbus_dma_free(sc);
1392 	if (sc->vmbus_xc != NULL) {
1393 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1394 		sc->vmbus_xc = NULL;
1395 	}
1396 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1397 	mtx_destroy(&sc->vmbus_prichan_lock);
1398 	mtx_destroy(&sc->vmbus_chan_lock);
1399 
1400 	return (ret);
1401 }
1402 
1403 static void
1404 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1405 {
1406 }
1407 
1408 #ifdef EARLY_AP_STARTUP
1409 
1410 static void
1411 vmbus_intrhook(void *xsc)
1412 {
1413 	struct vmbus_softc *sc = xsc;
1414 
1415 	if (bootverbose)
1416 		device_printf(sc->vmbus_dev, "intrhook\n");
1417 	vmbus_doattach(sc);
1418 	config_intrhook_disestablish(&sc->vmbus_intrhook);
1419 }
1420 
1421 #endif	/* EARLY_AP_STARTUP */
1422 
1423 static int
1424 vmbus_attach(device_t dev)
1425 {
1426 	vmbus_sc = device_get_softc(dev);
1427 	vmbus_sc->vmbus_dev = dev;
1428 	vmbus_sc->vmbus_idtvec = -1;
1429 
1430 	/*
1431 	 * Event processing logic will be configured:
1432 	 * - After the vmbus protocol version negotiation.
1433 	 * - Before we request channel offers.
1434 	 */
1435 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1436 
1437 #ifdef EARLY_AP_STARTUP
1438 	/*
1439 	 * Defer the real attach until the pause(9) works as expected.
1440 	 */
1441 	vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1442 	vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1443 	config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1444 #else	/* !EARLY_AP_STARTUP */
1445 	/*
1446 	 * If the system has already booted and thread
1447 	 * scheduling is possible indicated by the global
1448 	 * cold set to zero, we just call the driver
1449 	 * initialization directly.
1450 	 */
1451 	if (!cold)
1452 		vmbus_doattach(vmbus_sc);
1453 #endif	/* EARLY_AP_STARTUP */
1454 
1455 	return (0);
1456 }
1457 
1458 static int
1459 vmbus_detach(device_t dev)
1460 {
1461 	struct vmbus_softc *sc = device_get_softc(dev);
1462 
1463 	bus_generic_detach(dev);
1464 	vmbus_chan_destroy_all(sc);
1465 
1466 	vmbus_scan_teardown(sc);
1467 
1468 	vmbus_disconnect(sc);
1469 
1470 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1471 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1472 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1473 	}
1474 
1475 	vmbus_intr_teardown(sc);
1476 	vmbus_dma_free(sc);
1477 
1478 	if (sc->vmbus_xc != NULL) {
1479 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1480 		sc->vmbus_xc = NULL;
1481 	}
1482 
1483 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1484 	mtx_destroy(&sc->vmbus_prichan_lock);
1485 	mtx_destroy(&sc->vmbus_chan_lock);
1486 
1487 #ifdef NEW_PCIB
1488 	vmbus_free_mmio_res(dev);
1489 #endif
1490 
1491 	return (0);
1492 }
1493 
1494 #ifndef EARLY_AP_STARTUP
1495 
1496 static void
1497 vmbus_sysinit(void *arg __unused)
1498 {
1499 	struct vmbus_softc *sc = vmbus_get_softc();
1500 
1501 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1502 		return;
1503 
1504 	/*
1505 	 * If the system has already booted and thread
1506 	 * scheduling is possible, as indicated by the
1507 	 * global cold set to zero, we just call the driver
1508 	 * initialization directly.
1509 	 */
1510 	if (!cold)
1511 		vmbus_doattach(sc);
1512 }
1513 /*
1514  * NOTE:
1515  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1516  * initialized.
1517  */
1518 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1519 
1520 #endif	/* !EARLY_AP_STARTUP */
1521