xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision 8f6355b51dec56dfc33f22e338f0614785e739de)
1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/linker.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/smp.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/taskqueue.h>
47 
48 #include <machine/bus.h>
49 #include <machine/intr_machdep.h>
50 #include <machine/metadata.h>
51 #include <machine/md_var.h>
52 #include <machine/resource.h>
53 #include <x86/include/apicvar.h>
54 
55 #include <contrib/dev/acpica/include/acpi.h>
56 #include <dev/acpica/acpivar.h>
57 
58 #include <dev/hyperv/include/hyperv.h>
59 #include <dev/hyperv/include/vmbus_xact.h>
60 #include <dev/hyperv/vmbus/hyperv_reg.h>
61 #include <dev/hyperv/vmbus/hyperv_var.h>
62 #include <dev/hyperv/vmbus/vmbus_reg.h>
63 #include <dev/hyperv/vmbus/vmbus_var.h>
64 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
65 
66 #include "acpi_if.h"
67 #include "pcib_if.h"
68 #include "vmbus_if.h"
69 
70 #define VMBUS_GPADL_START		0xe1e10
71 
72 struct vmbus_msghc {
73 	struct vmbus_xact		*mh_xact;
74 	struct hypercall_postmsg_in	mh_inprm_save;
75 };
76 
77 static void			vmbus_identify(driver_t *, device_t);
78 static int			vmbus_probe(device_t);
79 static int			vmbus_attach(device_t);
80 static int			vmbus_detach(device_t);
81 static int			vmbus_read_ivar(device_t, device_t, int,
82 				    uintptr_t *);
83 static int			vmbus_child_pnpinfo_str(device_t, device_t,
84 				    char *, size_t);
85 static struct resource		*vmbus_alloc_resource(device_t dev,
86 				    device_t child, int type, int *rid,
87 				    rman_res_t start, rman_res_t end,
88 				    rman_res_t count, u_int flags);
89 static int			vmbus_alloc_msi(device_t bus, device_t dev,
90 				    int count, int maxcount, int *irqs);
91 static int			vmbus_release_msi(device_t bus, device_t dev,
92 				    int count, int *irqs);
93 static int			vmbus_alloc_msix(device_t bus, device_t dev,
94 				    int *irq);
95 static int			vmbus_release_msix(device_t bus, device_t dev,
96 				    int irq);
97 static int			vmbus_map_msi(device_t bus, device_t dev,
98 				    int irq, uint64_t *addr, uint32_t *data);
99 static uint32_t			vmbus_get_version_method(device_t, device_t);
100 static int			vmbus_probe_guid_method(device_t, device_t,
101 				    const struct hyperv_guid *);
102 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
103 				    device_t dev, int cpu);
104 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
105 				    int);
106 #ifdef EARLY_AP_STARTUP
107 static void			vmbus_intrhook(void *);
108 #endif
109 
110 static int			vmbus_init(struct vmbus_softc *);
111 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
112 static int			vmbus_req_channels(struct vmbus_softc *sc);
113 static void			vmbus_disconnect(struct vmbus_softc *);
114 static int			vmbus_scan(struct vmbus_softc *);
115 static void			vmbus_scan_teardown(struct vmbus_softc *);
116 static void			vmbus_scan_done(struct vmbus_softc *,
117 				    const struct vmbus_message *);
118 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
119 				    const struct vmbus_message *);
120 static void			vmbus_msg_task(void *, int);
121 static void			vmbus_synic_setup(void *);
122 static void			vmbus_synic_teardown(void *);
123 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
124 static int			vmbus_dma_alloc(struct vmbus_softc *);
125 static void			vmbus_dma_free(struct vmbus_softc *);
126 static int			vmbus_intr_setup(struct vmbus_softc *);
127 static void			vmbus_intr_teardown(struct vmbus_softc *);
128 static int			vmbus_doattach(struct vmbus_softc *);
129 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
130 				    int);
131 
132 static struct vmbus_softc	*vmbus_sc;
133 
134 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
135     "Hyper-V vmbus");
136 
137 static int			vmbus_pin_evttask = 1;
138 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
139     &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
140 
141 extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
142 
143 uint32_t			vmbus_current_version;
144 
145 static const uint32_t		vmbus_version[] = {
146 	VMBUS_VERSION_WIN10,
147 	VMBUS_VERSION_WIN8_1,
148 	VMBUS_VERSION_WIN8,
149 	VMBUS_VERSION_WIN7,
150 	VMBUS_VERSION_WS2008
151 };
152 
153 static const vmbus_chanmsg_proc_t
154 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
155 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
156 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
157 };
158 
159 static device_method_t vmbus_methods[] = {
160 	/* Device interface */
161 	DEVMETHOD(device_identify,		vmbus_identify),
162 	DEVMETHOD(device_probe,			vmbus_probe),
163 	DEVMETHOD(device_attach,		vmbus_attach),
164 	DEVMETHOD(device_detach,		vmbus_detach),
165 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
166 	DEVMETHOD(device_suspend,		bus_generic_suspend),
167 	DEVMETHOD(device_resume,		bus_generic_resume),
168 
169 	/* Bus interface */
170 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
171 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
172 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
173 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
174 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
175 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
176 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
177 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
178 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
179 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
180 #if __FreeBSD_version >= 1100000
181 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
182 #endif
183 
184 	/* pcib interface */
185 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
186 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
187 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
188 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
189 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
190 
191 	/* Vmbus interface */
192 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
193 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
194 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
195 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
196 
197 	DEVMETHOD_END
198 };
199 
200 static driver_t vmbus_driver = {
201 	"vmbus",
202 	vmbus_methods,
203 	sizeof(struct vmbus_softc)
204 };
205 
206 static devclass_t vmbus_devclass;
207 
208 DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
209 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
210     NULL, NULL);
211 
212 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
213 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
214 MODULE_VERSION(vmbus, 1);
215 
216 static __inline struct vmbus_softc *
217 vmbus_get_softc(void)
218 {
219 	return vmbus_sc;
220 }
221 
222 void
223 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
224 {
225 	struct hypercall_postmsg_in *inprm;
226 
227 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
228 		panic("invalid data size %zu", dsize);
229 
230 	inprm = vmbus_xact_req_data(mh->mh_xact);
231 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
232 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
233 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
234 	inprm->hc_dsize = dsize;
235 }
236 
237 struct vmbus_msghc *
238 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
239 {
240 	struct vmbus_msghc *mh;
241 	struct vmbus_xact *xact;
242 
243 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
244 		panic("invalid data size %zu", dsize);
245 
246 	xact = vmbus_xact_get(sc->vmbus_xc,
247 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
248 	if (xact == NULL)
249 		return (NULL);
250 
251 	mh = vmbus_xact_priv(xact, sizeof(*mh));
252 	mh->mh_xact = xact;
253 
254 	vmbus_msghc_reset(mh, dsize);
255 	return (mh);
256 }
257 
258 void
259 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
260 {
261 
262 	vmbus_xact_put(mh->mh_xact);
263 }
264 
265 void *
266 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
267 {
268 	struct hypercall_postmsg_in *inprm;
269 
270 	inprm = vmbus_xact_req_data(mh->mh_xact);
271 	return (inprm->hc_data);
272 }
273 
274 int
275 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
276 {
277 	sbintime_t time = SBT_1MS;
278 	struct hypercall_postmsg_in *inprm;
279 	bus_addr_t inprm_paddr;
280 	int i;
281 
282 	inprm = vmbus_xact_req_data(mh->mh_xact);
283 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
284 
285 	/*
286 	 * Save the input parameter so that we could restore the input
287 	 * parameter if the Hypercall failed.
288 	 *
289 	 * XXX
290 	 * Is this really necessary?!  i.e. Will the Hypercall ever
291 	 * overwrite the input parameter?
292 	 */
293 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
294 
295 	/*
296 	 * In order to cope with transient failures, e.g. insufficient
297 	 * resources on host side, we retry the post message Hypercall
298 	 * several times.  20 retries seem sufficient.
299 	 */
300 #define HC_RETRY_MAX	20
301 
302 	for (i = 0; i < HC_RETRY_MAX; ++i) {
303 		uint64_t status;
304 
305 		status = hypercall_post_message(inprm_paddr);
306 		if (status == HYPERCALL_STATUS_SUCCESS)
307 			return 0;
308 
309 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
310 		if (time < SBT_1S * 2)
311 			time *= 2;
312 
313 		/* Restore input parameter and try again */
314 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
315 	}
316 
317 #undef HC_RETRY_MAX
318 
319 	return EIO;
320 }
321 
322 int
323 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
324 {
325 	int error;
326 
327 	vmbus_xact_activate(mh->mh_xact);
328 	error = vmbus_msghc_exec_noresult(mh);
329 	if (error)
330 		vmbus_xact_deactivate(mh->mh_xact);
331 	return error;
332 }
333 
334 void
335 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
336 {
337 
338 	vmbus_xact_deactivate(mh->mh_xact);
339 }
340 
341 const struct vmbus_message *
342 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
343 {
344 	size_t resp_len;
345 
346 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
347 }
348 
349 const struct vmbus_message *
350 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
351 {
352 	size_t resp_len;
353 
354 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
355 }
356 
357 void
358 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
359 {
360 
361 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
362 }
363 
364 uint32_t
365 vmbus_gpadl_alloc(struct vmbus_softc *sc)
366 {
367 	uint32_t gpadl;
368 
369 again:
370 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
371 	if (gpadl == 0)
372 		goto again;
373 	return (gpadl);
374 }
375 
376 /* Used for Hyper-V socket when guest client connects to host */
377 int
378 vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
379     struct hyperv_guid *host_srv_id)
380 {
381 	struct vmbus_softc *sc = vmbus_get_softc();
382 	struct vmbus_chanmsg_tl_connect *req;
383 	struct vmbus_msghc *mh;
384 	int error;
385 
386 	if (!sc)
387 		return ENXIO;
388 
389 	mh = vmbus_msghc_get(sc, sizeof(*req));
390 	if (mh == NULL) {
391 		device_printf(sc->vmbus_dev,
392 		    "can not get msg hypercall for tl connect\n");
393 		return ENXIO;
394 	}
395 
396 	req = vmbus_msghc_dataptr(mh);
397 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
398 	req->guest_endpoint_id = *guest_srv_id;
399 	req->host_service_id = *host_srv_id;
400 
401 	error = vmbus_msghc_exec_noresult(mh);
402 	vmbus_msghc_put(sc, mh);
403 
404 	if (error) {
405 		device_printf(sc->vmbus_dev,
406 		    "tl connect msg hypercall failed\n");
407 	}
408 
409 	return error;
410 }
411 
412 static int
413 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
414 {
415 	struct vmbus_chanmsg_connect *req;
416 	const struct vmbus_message *msg;
417 	struct vmbus_msghc *mh;
418 	int error, done = 0;
419 
420 	mh = vmbus_msghc_get(sc, sizeof(*req));
421 	if (mh == NULL)
422 		return ENXIO;
423 
424 	req = vmbus_msghc_dataptr(mh);
425 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
426 	req->chm_ver = version;
427 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
428 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
429 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
430 
431 	error = vmbus_msghc_exec(sc, mh);
432 	if (error) {
433 		vmbus_msghc_put(sc, mh);
434 		return error;
435 	}
436 
437 	msg = vmbus_msghc_wait_result(sc, mh);
438 	done = ((const struct vmbus_chanmsg_connect_resp *)
439 	    msg->msg_data)->chm_done;
440 
441 	vmbus_msghc_put(sc, mh);
442 
443 	return (done ? 0 : EOPNOTSUPP);
444 }
445 
446 static int
447 vmbus_init(struct vmbus_softc *sc)
448 {
449 	int i;
450 
451 	for (i = 0; i < nitems(vmbus_version); ++i) {
452 		int error;
453 
454 		error = vmbus_connect(sc, vmbus_version[i]);
455 		if (!error) {
456 			vmbus_current_version = vmbus_version[i];
457 			sc->vmbus_version = vmbus_version[i];
458 			device_printf(sc->vmbus_dev, "version %u.%u\n",
459 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
460 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
461 			return 0;
462 		}
463 	}
464 	return ENXIO;
465 }
466 
467 static void
468 vmbus_disconnect(struct vmbus_softc *sc)
469 {
470 	struct vmbus_chanmsg_disconnect *req;
471 	struct vmbus_msghc *mh;
472 	int error;
473 
474 	mh = vmbus_msghc_get(sc, sizeof(*req));
475 	if (mh == NULL) {
476 		device_printf(sc->vmbus_dev,
477 		    "can not get msg hypercall for disconnect\n");
478 		return;
479 	}
480 
481 	req = vmbus_msghc_dataptr(mh);
482 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
483 
484 	error = vmbus_msghc_exec_noresult(mh);
485 	vmbus_msghc_put(sc, mh);
486 
487 	if (error) {
488 		device_printf(sc->vmbus_dev,
489 		    "disconnect msg hypercall failed\n");
490 	}
491 }
492 
493 static int
494 vmbus_req_channels(struct vmbus_softc *sc)
495 {
496 	struct vmbus_chanmsg_chrequest *req;
497 	struct vmbus_msghc *mh;
498 	int error;
499 
500 	mh = vmbus_msghc_get(sc, sizeof(*req));
501 	if (mh == NULL)
502 		return ENXIO;
503 
504 	req = vmbus_msghc_dataptr(mh);
505 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
506 
507 	error = vmbus_msghc_exec_noresult(mh);
508 	vmbus_msghc_put(sc, mh);
509 
510 	return error;
511 }
512 
513 static void
514 vmbus_scan_done_task(void *xsc, int pending __unused)
515 {
516 	struct vmbus_softc *sc = xsc;
517 
518 	mtx_lock(&Giant);
519 	sc->vmbus_scandone = true;
520 	mtx_unlock(&Giant);
521 	wakeup(&sc->vmbus_scandone);
522 }
523 
524 static void
525 vmbus_scan_done(struct vmbus_softc *sc,
526     const struct vmbus_message *msg __unused)
527 {
528 
529 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
530 }
531 
532 static int
533 vmbus_scan(struct vmbus_softc *sc)
534 {
535 	int error;
536 
537 	/*
538 	 * Identify, probe and attach for non-channel devices.
539 	 */
540 	bus_generic_probe(sc->vmbus_dev);
541 	bus_generic_attach(sc->vmbus_dev);
542 
543 	/*
544 	 * This taskqueue serializes vmbus devices' attach and detach
545 	 * for channel offer and rescind messages.
546 	 */
547 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
548 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
549 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
550 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
551 
552 	/*
553 	 * This taskqueue handles sub-channel detach, so that vmbus
554 	 * device's detach running in vmbus_devtq can drain its sub-
555 	 * channels.
556 	 */
557 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
558 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
559 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
560 
561 	/*
562 	 * Start vmbus scanning.
563 	 */
564 	error = vmbus_req_channels(sc);
565 	if (error) {
566 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
567 		    error);
568 		return (error);
569 	}
570 
571 	/*
572 	 * Wait for all vmbus devices from the initial channel offers to be
573 	 * attached.
574 	 */
575 	GIANT_REQUIRED;
576 	while (!sc->vmbus_scandone)
577 		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
578 
579 	if (bootverbose) {
580 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
581 		    "done\n");
582 	}
583 	return (0);
584 }
585 
586 static void
587 vmbus_scan_teardown(struct vmbus_softc *sc)
588 {
589 
590 	GIANT_REQUIRED;
591 	if (sc->vmbus_devtq != NULL) {
592 		mtx_unlock(&Giant);
593 		taskqueue_free(sc->vmbus_devtq);
594 		mtx_lock(&Giant);
595 		sc->vmbus_devtq = NULL;
596 	}
597 	if (sc->vmbus_subchtq != NULL) {
598 		mtx_unlock(&Giant);
599 		taskqueue_free(sc->vmbus_subchtq);
600 		mtx_lock(&Giant);
601 		sc->vmbus_subchtq = NULL;
602 	}
603 }
604 
605 static void
606 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
607 {
608 	vmbus_chanmsg_proc_t msg_proc;
609 	uint32_t msg_type;
610 
611 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
612 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
613 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
614 		    msg_type);
615 		return;
616 	}
617 
618 	msg_proc = vmbus_chanmsg_handlers[msg_type];
619 	if (msg_proc != NULL)
620 		msg_proc(sc, msg);
621 
622 	/* Channel specific processing */
623 	vmbus_chan_msgproc(sc, msg);
624 }
625 
626 static void
627 vmbus_msg_task(void *xsc, int pending __unused)
628 {
629 	struct vmbus_softc *sc = xsc;
630 	volatile struct vmbus_message *msg;
631 
632 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
633 	for (;;) {
634 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
635 			/* No message */
636 			break;
637 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
638 			/* Channel message */
639 			vmbus_chanmsg_handle(sc,
640 			    __DEVOLATILE(const struct vmbus_message *, msg));
641 		}
642 
643 		msg->msg_type = HYPERV_MSGTYPE_NONE;
644 		/*
645 		 * Make sure the write to msg_type (i.e. set to
646 		 * HYPERV_MSGTYPE_NONE) happens before we read the
647 		 * msg_flags and EOMing. Otherwise, the EOMing will
648 		 * not deliver any more messages since there is no
649 		 * empty slot
650 		 *
651 		 * NOTE:
652 		 * mb() is used here, since atomic_thread_fence_seq_cst()
653 		 * will become compiler fence on UP kernel.
654 		 */
655 		mb();
656 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
657 			/*
658 			 * This will cause message queue rescan to possibly
659 			 * deliver another msg from the hypervisor
660 			 */
661 			wrmsr(MSR_HV_EOM, 0);
662 		}
663 	}
664 }
665 
666 static __inline int
667 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
668 {
669 	volatile struct vmbus_message *msg;
670 	struct vmbus_message *msg_base;
671 
672 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
673 
674 	/*
675 	 * Check event timer.
676 	 *
677 	 * TODO: move this to independent IDT vector.
678 	 */
679 	msg = msg_base + VMBUS_SINT_TIMER;
680 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
681 		msg->msg_type = HYPERV_MSGTYPE_NONE;
682 
683 		vmbus_et_intr(frame);
684 
685 		/*
686 		 * Make sure the write to msg_type (i.e. set to
687 		 * HYPERV_MSGTYPE_NONE) happens before we read the
688 		 * msg_flags and EOMing. Otherwise, the EOMing will
689 		 * not deliver any more messages since there is no
690 		 * empty slot
691 		 *
692 		 * NOTE:
693 		 * mb() is used here, since atomic_thread_fence_seq_cst()
694 		 * will become compiler fence on UP kernel.
695 		 */
696 		mb();
697 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
698 			/*
699 			 * This will cause message queue rescan to possibly
700 			 * deliver another msg from the hypervisor
701 			 */
702 			wrmsr(MSR_HV_EOM, 0);
703 		}
704 	}
705 
706 	/*
707 	 * Check events.  Hot path for network and storage I/O data; high rate.
708 	 *
709 	 * NOTE:
710 	 * As recommended by the Windows guest fellows, we check events before
711 	 * checking messages.
712 	 */
713 	sc->vmbus_event_proc(sc, cpu);
714 
715 	/*
716 	 * Check messages.  Mainly management stuffs; ultra low rate.
717 	 */
718 	msg = msg_base + VMBUS_SINT_MESSAGE;
719 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
720 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
721 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
722 	}
723 
724 	return (FILTER_HANDLED);
725 }
726 
727 void
728 vmbus_handle_intr(struct trapframe *trap_frame)
729 {
730 	struct vmbus_softc *sc = vmbus_get_softc();
731 	int cpu = curcpu;
732 
733 	/*
734 	 * Disable preemption.
735 	 */
736 	critical_enter();
737 
738 	/*
739 	 * Do a little interrupt counting.
740 	 */
741 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
742 
743 	vmbus_handle_intr1(sc, trap_frame, cpu);
744 
745 	/*
746 	 * Enable preemption.
747 	 */
748 	critical_exit();
749 }
750 
751 static void
752 vmbus_synic_setup(void *xsc)
753 {
754 	struct vmbus_softc *sc = xsc;
755 	int cpu = curcpu;
756 	uint64_t val, orig;
757 	uint32_t sint;
758 
759 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
760 		/* Save virtual processor id. */
761 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
762 	} else {
763 		/* Set virtual processor id to 0 for compatibility. */
764 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
765 	}
766 
767 	/*
768 	 * Setup the SynIC message.
769 	 */
770 	orig = rdmsr(MSR_HV_SIMP);
771 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
772 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
773 	     MSR_HV_SIMP_PGSHIFT);
774 	wrmsr(MSR_HV_SIMP, val);
775 
776 	/*
777 	 * Setup the SynIC event flags.
778 	 */
779 	orig = rdmsr(MSR_HV_SIEFP);
780 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
781 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
782 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
783 	wrmsr(MSR_HV_SIEFP, val);
784 
785 
786 	/*
787 	 * Configure and unmask SINT for message and event flags.
788 	 */
789 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
790 	orig = rdmsr(sint);
791 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
792 	    (orig & MSR_HV_SINT_RSVD_MASK);
793 	wrmsr(sint, val);
794 
795 	/*
796 	 * Configure and unmask SINT for timer.
797 	 */
798 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
799 	orig = rdmsr(sint);
800 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
801 	    (orig & MSR_HV_SINT_RSVD_MASK);
802 	wrmsr(sint, val);
803 
804 	/*
805 	 * All done; enable SynIC.
806 	 */
807 	orig = rdmsr(MSR_HV_SCONTROL);
808 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
809 	wrmsr(MSR_HV_SCONTROL, val);
810 }
811 
812 static void
813 vmbus_synic_teardown(void *arg)
814 {
815 	uint64_t orig;
816 	uint32_t sint;
817 
818 	/*
819 	 * Disable SynIC.
820 	 */
821 	orig = rdmsr(MSR_HV_SCONTROL);
822 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
823 
824 	/*
825 	 * Mask message and event flags SINT.
826 	 */
827 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
828 	orig = rdmsr(sint);
829 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
830 
831 	/*
832 	 * Mask timer SINT.
833 	 */
834 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
835 	orig = rdmsr(sint);
836 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
837 
838 	/*
839 	 * Teardown SynIC message.
840 	 */
841 	orig = rdmsr(MSR_HV_SIMP);
842 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
843 
844 	/*
845 	 * Teardown SynIC event flags.
846 	 */
847 	orig = rdmsr(MSR_HV_SIEFP);
848 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
849 }
850 
851 static int
852 vmbus_dma_alloc(struct vmbus_softc *sc)
853 {
854 	bus_dma_tag_t parent_dtag;
855 	uint8_t *evtflags;
856 	int cpu;
857 
858 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
859 	CPU_FOREACH(cpu) {
860 		void *ptr;
861 
862 		/*
863 		 * Per-cpu messages and event flags.
864 		 */
865 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
866 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
867 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
868 		if (ptr == NULL)
869 			return ENOMEM;
870 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
871 
872 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
873 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
874 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
875 		if (ptr == NULL)
876 			return ENOMEM;
877 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
878 	}
879 
880 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
881 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
882 	if (evtflags == NULL)
883 		return ENOMEM;
884 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
885 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
886 	sc->vmbus_evtflags = evtflags;
887 
888 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
889 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
890 	if (sc->vmbus_mnf1 == NULL)
891 		return ENOMEM;
892 
893 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
894 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
895 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
896 	if (sc->vmbus_mnf2 == NULL)
897 		return ENOMEM;
898 
899 	return 0;
900 }
901 
902 static void
903 vmbus_dma_free(struct vmbus_softc *sc)
904 {
905 	int cpu;
906 
907 	if (sc->vmbus_evtflags != NULL) {
908 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
909 		sc->vmbus_evtflags = NULL;
910 		sc->vmbus_rx_evtflags = NULL;
911 		sc->vmbus_tx_evtflags = NULL;
912 	}
913 	if (sc->vmbus_mnf1 != NULL) {
914 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
915 		sc->vmbus_mnf1 = NULL;
916 	}
917 	if (sc->vmbus_mnf2 != NULL) {
918 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
919 		sc->vmbus_mnf2 = NULL;
920 	}
921 
922 	CPU_FOREACH(cpu) {
923 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
924 			hyperv_dmamem_free(
925 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
926 			    VMBUS_PCPU_GET(sc, message, cpu));
927 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
928 		}
929 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
930 			hyperv_dmamem_free(
931 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
932 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
933 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
934 		}
935 	}
936 }
937 
938 static int
939 vmbus_intr_setup(struct vmbus_softc *sc)
940 {
941 	int cpu;
942 
943 	CPU_FOREACH(cpu) {
944 		char buf[MAXCOMLEN + 1];
945 		cpuset_t cpu_mask;
946 
947 		/* Allocate an interrupt counter for Hyper-V interrupt */
948 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
949 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
950 
951 		/*
952 		 * Setup taskqueue to handle events.  Task will be per-
953 		 * channel.
954 		 */
955 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
956 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
957 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
958 		if (vmbus_pin_evttask) {
959 			CPU_SETOF(cpu, &cpu_mask);
960 			taskqueue_start_threads_cpuset(
961 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
962 			    &cpu_mask, "hvevent%d", cpu);
963 		} else {
964 			taskqueue_start_threads(
965 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
966 			    "hvevent%d", cpu);
967 		}
968 
969 		/*
970 		 * Setup tasks and taskqueues to handle messages.
971 		 */
972 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
973 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
974 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
975 		CPU_SETOF(cpu, &cpu_mask);
976 		taskqueue_start_threads_cpuset(
977 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
978 		    "hvmsg%d", cpu);
979 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
980 		    vmbus_msg_task, sc);
981 	}
982 
983 	/*
984 	 * All Hyper-V ISR required resources are setup, now let's find a
985 	 * free IDT vector for Hyper-V ISR and set it up.
986 	 */
987 	sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
988 	    IDTVEC(vmbus_isr));
989 	if (sc->vmbus_idtvec < 0) {
990 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
991 		return ENXIO;
992 	}
993 	if (bootverbose) {
994 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
995 		    sc->vmbus_idtvec);
996 	}
997 	return 0;
998 }
999 
1000 static void
1001 vmbus_intr_teardown(struct vmbus_softc *sc)
1002 {
1003 	int cpu;
1004 
1005 	if (sc->vmbus_idtvec >= 0) {
1006 		lapic_ipi_free(sc->vmbus_idtvec);
1007 		sc->vmbus_idtvec = -1;
1008 	}
1009 
1010 	CPU_FOREACH(cpu) {
1011 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
1012 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
1013 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
1014 		}
1015 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
1016 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1017 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
1018 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
1019 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
1020 		}
1021 	}
1022 }
1023 
1024 static int
1025 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
1026 {
1027 	return (ENOENT);
1028 }
1029 
1030 static int
1031 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
1032 {
1033 	const struct vmbus_channel *chan;
1034 	char guidbuf[HYPERV_GUID_STRLEN];
1035 
1036 	chan = vmbus_get_channel(child);
1037 	if (chan == NULL) {
1038 		/* Event timer device, which does not belong to a channel */
1039 		return (0);
1040 	}
1041 
1042 	strlcat(buf, "classid=", buflen);
1043 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1044 	strlcat(buf, guidbuf, buflen);
1045 
1046 	strlcat(buf, " deviceid=", buflen);
1047 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1048 	strlcat(buf, guidbuf, buflen);
1049 
1050 	return (0);
1051 }
1052 
1053 int
1054 vmbus_add_child(struct vmbus_channel *chan)
1055 {
1056 	struct vmbus_softc *sc = chan->ch_vmbus;
1057 	device_t parent = sc->vmbus_dev;
1058 
1059 	mtx_lock(&Giant);
1060 
1061 	chan->ch_dev = device_add_child(parent, NULL, -1);
1062 	if (chan->ch_dev == NULL) {
1063 		mtx_unlock(&Giant);
1064 		device_printf(parent, "device_add_child for chan%u failed\n",
1065 		    chan->ch_id);
1066 		return (ENXIO);
1067 	}
1068 	device_set_ivars(chan->ch_dev, chan);
1069 	device_probe_and_attach(chan->ch_dev);
1070 
1071 	mtx_unlock(&Giant);
1072 	return (0);
1073 }
1074 
1075 int
1076 vmbus_delete_child(struct vmbus_channel *chan)
1077 {
1078 	int error = 0;
1079 
1080 	mtx_lock(&Giant);
1081 	if (chan->ch_dev != NULL) {
1082 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1083 		    chan->ch_dev);
1084 		chan->ch_dev = NULL;
1085 	}
1086 	mtx_unlock(&Giant);
1087 	return (error);
1088 }
1089 
1090 static int
1091 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1092 {
1093 	struct vmbus_softc *sc = arg1;
1094 	char verstr[16];
1095 
1096 	snprintf(verstr, sizeof(verstr), "%u.%u",
1097 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1098 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1099 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1100 }
1101 
1102 /*
1103  * We need the function to make sure the MMIO resource is allocated from the
1104  * ranges found in _CRS.
1105  *
1106  * For the release function, we can use bus_generic_release_resource().
1107  */
1108 static struct resource *
1109 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1110     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1111 {
1112 	device_t parent = device_get_parent(dev);
1113 	struct resource *res;
1114 
1115 #ifdef NEW_PCIB
1116 	if (type == SYS_RES_MEMORY) {
1117 		struct vmbus_softc *sc = device_get_softc(dev);
1118 
1119 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1120 		    rid, start, end, count, flags);
1121 	} else
1122 #endif
1123 	{
1124 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1125 		    end, count, flags);
1126 	}
1127 
1128 	return (res);
1129 }
1130 
1131 static int
1132 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1133 {
1134 
1135 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1136 	    irqs));
1137 }
1138 
1139 static int
1140 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1141 {
1142 
1143 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1144 }
1145 
1146 static int
1147 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1148 {
1149 
1150 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1151 }
1152 
1153 static int
1154 vmbus_release_msix(device_t bus, device_t dev, int irq)
1155 {
1156 
1157 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1158 }
1159 
1160 static int
1161 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1162 	uint32_t *data)
1163 {
1164 
1165 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1166 }
1167 
1168 static uint32_t
1169 vmbus_get_version_method(device_t bus, device_t dev)
1170 {
1171 	struct vmbus_softc *sc = device_get_softc(bus);
1172 
1173 	return sc->vmbus_version;
1174 }
1175 
1176 static int
1177 vmbus_probe_guid_method(device_t bus, device_t dev,
1178     const struct hyperv_guid *guid)
1179 {
1180 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1181 
1182 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1183 		return 0;
1184 	return ENXIO;
1185 }
1186 
1187 static uint32_t
1188 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1189 {
1190 	const struct vmbus_softc *sc = device_get_softc(bus);
1191 
1192 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1193 }
1194 
1195 static struct taskqueue *
1196 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1197 {
1198 	const struct vmbus_softc *sc = device_get_softc(bus);
1199 
1200 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1201 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1202 }
1203 
1204 #ifdef NEW_PCIB
1205 #define VTPM_BASE_ADDR 0xfed40000
1206 #define FOUR_GB (1ULL << 32)
1207 
1208 enum parse_pass { parse_64, parse_32 };
1209 
1210 struct parse_context {
1211 	device_t vmbus_dev;
1212 	enum parse_pass pass;
1213 };
1214 
1215 static ACPI_STATUS
1216 parse_crs(ACPI_RESOURCE *res, void *ctx)
1217 {
1218 	const struct parse_context *pc = ctx;
1219 	device_t vmbus_dev = pc->vmbus_dev;
1220 
1221 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1222 	UINT64 start, end;
1223 
1224 	switch (res->Type) {
1225 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1226 		start = res->Data.Address32.Address.Minimum;
1227 		end = res->Data.Address32.Address.Maximum;
1228 		break;
1229 
1230 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1231 		start = res->Data.Address64.Address.Minimum;
1232 		end = res->Data.Address64.Address.Maximum;
1233 		break;
1234 
1235 	default:
1236 		/* Unused types. */
1237 		return (AE_OK);
1238 	}
1239 
1240 	/*
1241 	 * We don't use <1MB addresses.
1242 	 */
1243 	if (end < 0x100000)
1244 		return (AE_OK);
1245 
1246 	/* Don't conflict with vTPM. */
1247 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1248 		end = VTPM_BASE_ADDR - 1;
1249 
1250 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1251 	    (pc->pass == parse_64 && start >= FOUR_GB))
1252 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1253 		    start, end, 0);
1254 
1255 	return (AE_OK);
1256 }
1257 
1258 static void
1259 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1260 {
1261 	struct parse_context pc;
1262 	ACPI_STATUS status;
1263 
1264 	if (bootverbose)
1265 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1266 
1267 	pc.vmbus_dev = vmbus_dev;
1268 	pc.pass = pass;
1269 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1270 			parse_crs, &pc);
1271 
1272 	if (bootverbose && ACPI_FAILURE(status))
1273 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1274 }
1275 
1276 static void
1277 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1278 {
1279 	device_t acpi0, parent;
1280 
1281 	parent = device_get_parent(dev);
1282 
1283 	acpi0 = device_get_parent(parent);
1284 	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1285 		device_t *children;
1286 		int count;
1287 
1288 		/*
1289 		 * Try to locate VMBUS resources and find _CRS on them.
1290 		 */
1291 		if (device_get_children(acpi0, &children, &count) == 0) {
1292 			int i;
1293 
1294 			for (i = 0; i < count; ++i) {
1295 				if (!device_is_attached(children[i]))
1296 					continue;
1297 
1298 				if (strcmp("vmbus_res",
1299 				    device_get_name(children[i])) == 0)
1300 					vmbus_get_crs(children[i], dev, pass);
1301 			}
1302 			free(children, M_TEMP);
1303 		}
1304 
1305 		/*
1306 		 * Try to find _CRS on acpi.
1307 		 */
1308 		vmbus_get_crs(acpi0, dev, pass);
1309 	} else {
1310 		device_printf(dev, "not grandchild of acpi\n");
1311 	}
1312 
1313 	/*
1314 	 * Try to find _CRS on parent.
1315 	 */
1316 	vmbus_get_crs(parent, dev, pass);
1317 }
1318 
1319 static void
1320 vmbus_get_mmio_res(device_t dev)
1321 {
1322 	struct vmbus_softc *sc = device_get_softc(dev);
1323 	/*
1324 	 * We walk the resources twice to make sure that: in the resource
1325 	 * list, the 32-bit resources appear behind the 64-bit resources.
1326 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1327 	 * iterate through the list to find a range for a 64-bit BAR in
1328 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1329 	 * ranges first.
1330 	 */
1331 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1332 
1333 	vmbus_get_mmio_res_pass(dev, parse_64);
1334 	vmbus_get_mmio_res_pass(dev, parse_32);
1335 }
1336 
1337 /*
1338  * On Gen2 VMs, Hyper-V provides mmio space for framebuffer.
1339  * This mmio address range is not useable for other PCI devices.
1340  * Currently only efifb driver is using this range without reserving
1341  * it from system.
1342  * Therefore, vmbus driver reserves it before any other PCI device
1343  * drivers start to request mmio addresses.
1344  */
1345 static struct resource *hv_fb_res;
1346 
1347 static void
1348 vmbus_fb_mmio_res(device_t dev)
1349 {
1350 	struct efi_fb *efifb;
1351 	caddr_t kmdp;
1352 
1353 	struct vmbus_softc *sc = device_get_softc(dev);
1354 	int rid = 0;
1355 
1356 	kmdp = preload_search_by_type("elf kernel");
1357 	if (kmdp == NULL)
1358 		kmdp = preload_search_by_type("elf64 kernel");
1359 	efifb = (struct efi_fb *)preload_search_info(kmdp,
1360 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
1361 	if (efifb == NULL) {
1362 		if (bootverbose)
1363 			device_printf(dev,
1364 			    "fb has no preloaded kernel efi information\n");
1365 		/* We are on Gen1 VM, just return. */
1366 		return;
1367 	} else {
1368 		if (bootverbose)
1369 			device_printf(dev,
1370 			    "efifb: fb_addr: %#jx, size: %#jx, "
1371 			    "actual size needed: 0x%x\n",
1372 			    efifb->fb_addr, efifb->fb_size,
1373 			    (int) efifb->fb_height * efifb->fb_width);
1374 	}
1375 
1376 	hv_fb_res = pcib_host_res_alloc(&sc->vmbus_mmio_res, dev,
1377 	    SYS_RES_MEMORY, &rid,
1378 	    efifb->fb_addr, efifb->fb_addr + efifb->fb_size, efifb->fb_size,
1379 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1380 
1381 	if (hv_fb_res && bootverbose)
1382 		device_printf(dev,
1383 		    "successfully reserved memory for framebuffer "
1384 		    "starting at %#jx, size %#jx\n",
1385 		    efifb->fb_addr, efifb->fb_size);
1386 }
1387 
1388 static void
1389 vmbus_free_mmio_res(device_t dev)
1390 {
1391 	struct vmbus_softc *sc = device_get_softc(dev);
1392 
1393 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1394 
1395 	if (hv_fb_res)
1396 		hv_fb_res = NULL;
1397 }
1398 #endif	/* NEW_PCIB */
1399 
1400 static void
1401 vmbus_identify(driver_t *driver, device_t parent)
1402 {
1403 
1404 	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1405 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1406 		return;
1407 	device_add_child(parent, "vmbus", -1);
1408 }
1409 
1410 static int
1411 vmbus_probe(device_t dev)
1412 {
1413 
1414 	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1415 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1416 		return (ENXIO);
1417 
1418 	device_set_desc(dev, "Hyper-V Vmbus");
1419 	return (BUS_PROBE_DEFAULT);
1420 }
1421 
1422 /**
1423  * @brief Main vmbus driver initialization routine.
1424  *
1425  * Here, we
1426  * - initialize the vmbus driver context
1427  * - setup various driver entry points
1428  * - invoke the vmbus hv main init routine
1429  * - get the irq resource
1430  * - invoke the vmbus to add the vmbus root device
1431  * - setup the vmbus root device
1432  * - retrieve the channel offers
1433  */
1434 static int
1435 vmbus_doattach(struct vmbus_softc *sc)
1436 {
1437 	struct sysctl_oid_list *child;
1438 	struct sysctl_ctx_list *ctx;
1439 	int ret;
1440 
1441 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1442 		return (0);
1443 
1444 #ifdef NEW_PCIB
1445 	vmbus_get_mmio_res(sc->vmbus_dev);
1446 	vmbus_fb_mmio_res(sc->vmbus_dev);
1447 #endif
1448 
1449 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1450 
1451 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1452 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1453 	TAILQ_INIT(&sc->vmbus_prichans);
1454 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1455 	TAILQ_INIT(&sc->vmbus_chans);
1456 	sc->vmbus_chmap = malloc(
1457 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1458 	    M_WAITOK | M_ZERO);
1459 
1460 	/*
1461 	 * Create context for "post message" Hypercalls
1462 	 */
1463 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1464 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1465 	    sizeof(struct vmbus_msghc));
1466 	if (sc->vmbus_xc == NULL) {
1467 		ret = ENXIO;
1468 		goto cleanup;
1469 	}
1470 
1471 	/*
1472 	 * Allocate DMA stuffs.
1473 	 */
1474 	ret = vmbus_dma_alloc(sc);
1475 	if (ret != 0)
1476 		goto cleanup;
1477 
1478 	/*
1479 	 * Setup interrupt.
1480 	 */
1481 	ret = vmbus_intr_setup(sc);
1482 	if (ret != 0)
1483 		goto cleanup;
1484 
1485 	/*
1486 	 * Setup SynIC.
1487 	 */
1488 	if (bootverbose)
1489 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1490 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1491 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1492 
1493 	/*
1494 	 * Initialize vmbus, e.g. connect to Hypervisor.
1495 	 */
1496 	ret = vmbus_init(sc);
1497 	if (ret != 0)
1498 		goto cleanup;
1499 
1500 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1501 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1502 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1503 	else
1504 		sc->vmbus_event_proc = vmbus_event_proc;
1505 
1506 	ret = vmbus_scan(sc);
1507 	if (ret != 0)
1508 		goto cleanup;
1509 
1510 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1511 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1512 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1513 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1514 	    vmbus_sysctl_version, "A", "vmbus version");
1515 
1516 	return (ret);
1517 
1518 cleanup:
1519 	vmbus_scan_teardown(sc);
1520 	vmbus_intr_teardown(sc);
1521 	vmbus_dma_free(sc);
1522 	if (sc->vmbus_xc != NULL) {
1523 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1524 		sc->vmbus_xc = NULL;
1525 	}
1526 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1527 	mtx_destroy(&sc->vmbus_prichan_lock);
1528 	mtx_destroy(&sc->vmbus_chan_lock);
1529 
1530 	return (ret);
1531 }
1532 
1533 static void
1534 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1535 {
1536 }
1537 
1538 #ifdef EARLY_AP_STARTUP
1539 
1540 static void
1541 vmbus_intrhook(void *xsc)
1542 {
1543 	struct vmbus_softc *sc = xsc;
1544 
1545 	if (bootverbose)
1546 		device_printf(sc->vmbus_dev, "intrhook\n");
1547 	vmbus_doattach(sc);
1548 	config_intrhook_disestablish(&sc->vmbus_intrhook);
1549 }
1550 
1551 #endif	/* EARLY_AP_STARTUP */
1552 
1553 static int
1554 vmbus_attach(device_t dev)
1555 {
1556 	vmbus_sc = device_get_softc(dev);
1557 	vmbus_sc->vmbus_dev = dev;
1558 	vmbus_sc->vmbus_idtvec = -1;
1559 
1560 	/*
1561 	 * Event processing logic will be configured:
1562 	 * - After the vmbus protocol version negotiation.
1563 	 * - Before we request channel offers.
1564 	 */
1565 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1566 
1567 #ifdef EARLY_AP_STARTUP
1568 	/*
1569 	 * Defer the real attach until the pause(9) works as expected.
1570 	 */
1571 	vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1572 	vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1573 	config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1574 #else	/* !EARLY_AP_STARTUP */
1575 	/*
1576 	 * If the system has already booted and thread
1577 	 * scheduling is possible indicated by the global
1578 	 * cold set to zero, we just call the driver
1579 	 * initialization directly.
1580 	 */
1581 	if (!cold)
1582 		vmbus_doattach(vmbus_sc);
1583 #endif	/* EARLY_AP_STARTUP */
1584 
1585 	return (0);
1586 }
1587 
1588 static int
1589 vmbus_detach(device_t dev)
1590 {
1591 	struct vmbus_softc *sc = device_get_softc(dev);
1592 
1593 	bus_generic_detach(dev);
1594 	vmbus_chan_destroy_all(sc);
1595 
1596 	vmbus_scan_teardown(sc);
1597 
1598 	vmbus_disconnect(sc);
1599 
1600 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1601 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1602 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1603 	}
1604 
1605 	vmbus_intr_teardown(sc);
1606 	vmbus_dma_free(sc);
1607 
1608 	if (sc->vmbus_xc != NULL) {
1609 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1610 		sc->vmbus_xc = NULL;
1611 	}
1612 
1613 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1614 	mtx_destroy(&sc->vmbus_prichan_lock);
1615 	mtx_destroy(&sc->vmbus_chan_lock);
1616 
1617 #ifdef NEW_PCIB
1618 	vmbus_free_mmio_res(dev);
1619 #endif
1620 
1621 	return (0);
1622 }
1623 
1624 #ifndef EARLY_AP_STARTUP
1625 
1626 static void
1627 vmbus_sysinit(void *arg __unused)
1628 {
1629 	struct vmbus_softc *sc = vmbus_get_softc();
1630 
1631 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1632 		return;
1633 
1634 	/*
1635 	 * If the system has already booted and thread
1636 	 * scheduling is possible, as indicated by the
1637 	 * global cold set to zero, we just call the driver
1638 	 * initialization directly.
1639 	 */
1640 	if (!cold)
1641 		vmbus_doattach(sc);
1642 }
1643 /*
1644  * NOTE:
1645  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1646  * initialized.
1647  */
1648 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1649 
1650 #endif	/* !EARLY_AP_STARTUP */
1651