xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/linker.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/sbuf.h>
44 #include <sys/smp.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/taskqueue.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 
53 #include <machine/bus.h>
54 #include <machine/intr_machdep.h>
55 #include <machine/metadata.h>
56 #include <machine/md_var.h>
57 #include <machine/resource.h>
58 #include <x86/include/apicvar.h>
59 
60 #include <contrib/dev/acpica/include/acpi.h>
61 #include <dev/acpica/acpivar.h>
62 
63 #include <dev/hyperv/include/hyperv.h>
64 #include <dev/hyperv/include/vmbus_xact.h>
65 #include <dev/hyperv/vmbus/hyperv_reg.h>
66 #include <dev/hyperv/vmbus/hyperv_var.h>
67 #include <dev/hyperv/vmbus/vmbus_reg.h>
68 #include <dev/hyperv/vmbus/vmbus_var.h>
69 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
70 
71 #include "acpi_if.h"
72 #include "pcib_if.h"
73 #include "vmbus_if.h"
74 
75 #define VMBUS_GPADL_START		0xe1e10
76 
77 struct vmbus_msghc {
78 	struct vmbus_xact		*mh_xact;
79 	struct hypercall_postmsg_in	mh_inprm_save;
80 };
81 
82 static void			vmbus_identify(driver_t *, device_t);
83 static int			vmbus_probe(device_t);
84 static int			vmbus_attach(device_t);
85 static int			vmbus_detach(device_t);
86 static int			vmbus_read_ivar(device_t, device_t, int,
87 				    uintptr_t *);
88 static int			vmbus_child_pnpinfo(device_t, device_t, struct sbuf *);
89 static struct resource		*vmbus_alloc_resource(device_t dev,
90 				    device_t child, int type, int *rid,
91 				    rman_res_t start, rman_res_t end,
92 				    rman_res_t count, u_int flags);
93 static int			vmbus_alloc_msi(device_t bus, device_t dev,
94 				    int count, int maxcount, int *irqs);
95 static int			vmbus_release_msi(device_t bus, device_t dev,
96 				    int count, int *irqs);
97 static int			vmbus_alloc_msix(device_t bus, device_t dev,
98 				    int *irq);
99 static int			vmbus_release_msix(device_t bus, device_t dev,
100 				    int irq);
101 static int			vmbus_map_msi(device_t bus, device_t dev,
102 				    int irq, uint64_t *addr, uint32_t *data);
103 static uint32_t			vmbus_get_version_method(device_t, device_t);
104 static int			vmbus_probe_guid_method(device_t, device_t,
105 				    const struct hyperv_guid *);
106 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
107 				    device_t dev, int cpu);
108 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
109 				    int);
110 #ifdef EARLY_AP_STARTUP
111 static void			vmbus_intrhook(void *);
112 #endif
113 
114 static int			vmbus_init(struct vmbus_softc *);
115 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
116 static int			vmbus_req_channels(struct vmbus_softc *sc);
117 static void			vmbus_disconnect(struct vmbus_softc *);
118 static int			vmbus_scan(struct vmbus_softc *);
119 static void			vmbus_scan_teardown(struct vmbus_softc *);
120 static void			vmbus_scan_done(struct vmbus_softc *,
121 				    const struct vmbus_message *);
122 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
123 				    const struct vmbus_message *);
124 static void			vmbus_msg_task(void *, int);
125 static void			vmbus_synic_setup(void *);
126 static void			vmbus_synic_teardown(void *);
127 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
128 static int			vmbus_dma_alloc(struct vmbus_softc *);
129 static void			vmbus_dma_free(struct vmbus_softc *);
130 static int			vmbus_intr_setup(struct vmbus_softc *);
131 static void			vmbus_intr_teardown(struct vmbus_softc *);
132 static int			vmbus_doattach(struct vmbus_softc *);
133 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
134 				    int);
135 
136 static struct vmbus_softc	*vmbus_sc;
137 
138 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
139     "Hyper-V vmbus");
140 
141 static int			vmbus_pin_evttask = 1;
142 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
143     &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
144 
145 extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
146 #define VMBUS_ISR_ADDR	trunc_page((uintptr_t)IDTVEC(vmbus_isr_pti))
147 
148 uint32_t			vmbus_current_version;
149 
150 static const uint32_t		vmbus_version[] = {
151 	VMBUS_VERSION_WIN10,
152 	VMBUS_VERSION_WIN8_1,
153 	VMBUS_VERSION_WIN8,
154 	VMBUS_VERSION_WIN7,
155 	VMBUS_VERSION_WS2008
156 };
157 
158 static const vmbus_chanmsg_proc_t
159 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
160 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
161 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
162 };
163 
164 static device_method_t vmbus_methods[] = {
165 	/* Device interface */
166 	DEVMETHOD(device_identify,		vmbus_identify),
167 	DEVMETHOD(device_probe,			vmbus_probe),
168 	DEVMETHOD(device_attach,		vmbus_attach),
169 	DEVMETHOD(device_detach,		vmbus_detach),
170 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
171 	DEVMETHOD(device_suspend,		bus_generic_suspend),
172 	DEVMETHOD(device_resume,		bus_generic_resume),
173 
174 	/* Bus interface */
175 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
176 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
177 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
178 	DEVMETHOD(bus_child_pnpinfo,		vmbus_child_pnpinfo),
179 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
180 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
181 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
182 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
183 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
184 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
185 #if __FreeBSD_version >= 1100000
186 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
187 #endif
188 
189 	/* pcib interface */
190 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
191 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
192 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
193 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
194 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
195 
196 	/* Vmbus interface */
197 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
198 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
199 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
200 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
201 
202 	DEVMETHOD_END
203 };
204 
205 static driver_t vmbus_driver = {
206 	"vmbus",
207 	vmbus_methods,
208 	sizeof(struct vmbus_softc)
209 };
210 
211 static devclass_t vmbus_devclass;
212 
213 DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
214 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
215     NULL, NULL);
216 
217 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
218 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
219 MODULE_VERSION(vmbus, 1);
220 
221 static __inline struct vmbus_softc *
222 vmbus_get_softc(void)
223 {
224 	return vmbus_sc;
225 }
226 
227 void
228 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
229 {
230 	struct hypercall_postmsg_in *inprm;
231 
232 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
233 		panic("invalid data size %zu", dsize);
234 
235 	inprm = vmbus_xact_req_data(mh->mh_xact);
236 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
237 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
238 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
239 	inprm->hc_dsize = dsize;
240 }
241 
242 struct vmbus_msghc *
243 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
244 {
245 	struct vmbus_msghc *mh;
246 	struct vmbus_xact *xact;
247 
248 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
249 		panic("invalid data size %zu", dsize);
250 
251 	xact = vmbus_xact_get(sc->vmbus_xc,
252 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
253 	if (xact == NULL)
254 		return (NULL);
255 
256 	mh = vmbus_xact_priv(xact, sizeof(*mh));
257 	mh->mh_xact = xact;
258 
259 	vmbus_msghc_reset(mh, dsize);
260 	return (mh);
261 }
262 
263 void
264 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
265 {
266 
267 	vmbus_xact_put(mh->mh_xact);
268 }
269 
270 void *
271 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
272 {
273 	struct hypercall_postmsg_in *inprm;
274 
275 	inprm = vmbus_xact_req_data(mh->mh_xact);
276 	return (inprm->hc_data);
277 }
278 
279 int
280 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
281 {
282 	sbintime_t time = SBT_1MS;
283 	struct hypercall_postmsg_in *inprm;
284 	bus_addr_t inprm_paddr;
285 	int i;
286 
287 	inprm = vmbus_xact_req_data(mh->mh_xact);
288 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
289 
290 	/*
291 	 * Save the input parameter so that we could restore the input
292 	 * parameter if the Hypercall failed.
293 	 *
294 	 * XXX
295 	 * Is this really necessary?!  i.e. Will the Hypercall ever
296 	 * overwrite the input parameter?
297 	 */
298 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
299 
300 	/*
301 	 * In order to cope with transient failures, e.g. insufficient
302 	 * resources on host side, we retry the post message Hypercall
303 	 * several times.  20 retries seem sufficient.
304 	 */
305 #define HC_RETRY_MAX	20
306 
307 	for (i = 0; i < HC_RETRY_MAX; ++i) {
308 		uint64_t status;
309 
310 		status = hypercall_post_message(inprm_paddr);
311 		if (status == HYPERCALL_STATUS_SUCCESS)
312 			return 0;
313 
314 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
315 		if (time < SBT_1S * 2)
316 			time *= 2;
317 
318 		/* Restore input parameter and try again */
319 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
320 	}
321 
322 #undef HC_RETRY_MAX
323 
324 	return EIO;
325 }
326 
327 int
328 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
329 {
330 	int error;
331 
332 	vmbus_xact_activate(mh->mh_xact);
333 	error = vmbus_msghc_exec_noresult(mh);
334 	if (error)
335 		vmbus_xact_deactivate(mh->mh_xact);
336 	return error;
337 }
338 
339 void
340 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
341 {
342 
343 	vmbus_xact_deactivate(mh->mh_xact);
344 }
345 
346 const struct vmbus_message *
347 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
348 {
349 	size_t resp_len;
350 
351 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
352 }
353 
354 const struct vmbus_message *
355 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
356 {
357 	size_t resp_len;
358 
359 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
360 }
361 
362 void
363 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
364 {
365 
366 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
367 }
368 
369 uint32_t
370 vmbus_gpadl_alloc(struct vmbus_softc *sc)
371 {
372 	uint32_t gpadl;
373 
374 again:
375 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
376 	if (gpadl == 0)
377 		goto again;
378 	return (gpadl);
379 }
380 
381 /* Used for Hyper-V socket when guest client connects to host */
382 int
383 vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
384     struct hyperv_guid *host_srv_id)
385 {
386 	struct vmbus_softc *sc = vmbus_get_softc();
387 	struct vmbus_chanmsg_tl_connect *req;
388 	struct vmbus_msghc *mh;
389 	int error;
390 
391 	if (!sc)
392 		return ENXIO;
393 
394 	mh = vmbus_msghc_get(sc, sizeof(*req));
395 	if (mh == NULL) {
396 		device_printf(sc->vmbus_dev,
397 		    "can not get msg hypercall for tl connect\n");
398 		return ENXIO;
399 	}
400 
401 	req = vmbus_msghc_dataptr(mh);
402 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
403 	req->guest_endpoint_id = *guest_srv_id;
404 	req->host_service_id = *host_srv_id;
405 
406 	error = vmbus_msghc_exec_noresult(mh);
407 	vmbus_msghc_put(sc, mh);
408 
409 	if (error) {
410 		device_printf(sc->vmbus_dev,
411 		    "tl connect msg hypercall failed\n");
412 	}
413 
414 	return error;
415 }
416 
417 static int
418 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
419 {
420 	struct vmbus_chanmsg_connect *req;
421 	const struct vmbus_message *msg;
422 	struct vmbus_msghc *mh;
423 	int error, done = 0;
424 
425 	mh = vmbus_msghc_get(sc, sizeof(*req));
426 	if (mh == NULL)
427 		return ENXIO;
428 
429 	req = vmbus_msghc_dataptr(mh);
430 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
431 	req->chm_ver = version;
432 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
433 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
434 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
435 
436 	error = vmbus_msghc_exec(sc, mh);
437 	if (error) {
438 		vmbus_msghc_put(sc, mh);
439 		return error;
440 	}
441 
442 	msg = vmbus_msghc_wait_result(sc, mh);
443 	done = ((const struct vmbus_chanmsg_connect_resp *)
444 	    msg->msg_data)->chm_done;
445 
446 	vmbus_msghc_put(sc, mh);
447 
448 	return (done ? 0 : EOPNOTSUPP);
449 }
450 
451 static int
452 vmbus_init(struct vmbus_softc *sc)
453 {
454 	int i;
455 
456 	for (i = 0; i < nitems(vmbus_version); ++i) {
457 		int error;
458 
459 		error = vmbus_connect(sc, vmbus_version[i]);
460 		if (!error) {
461 			vmbus_current_version = vmbus_version[i];
462 			sc->vmbus_version = vmbus_version[i];
463 			device_printf(sc->vmbus_dev, "version %u.%u\n",
464 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
465 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
466 			return 0;
467 		}
468 	}
469 	return ENXIO;
470 }
471 
472 static void
473 vmbus_disconnect(struct vmbus_softc *sc)
474 {
475 	struct vmbus_chanmsg_disconnect *req;
476 	struct vmbus_msghc *mh;
477 	int error;
478 
479 	mh = vmbus_msghc_get(sc, sizeof(*req));
480 	if (mh == NULL) {
481 		device_printf(sc->vmbus_dev,
482 		    "can not get msg hypercall for disconnect\n");
483 		return;
484 	}
485 
486 	req = vmbus_msghc_dataptr(mh);
487 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
488 
489 	error = vmbus_msghc_exec_noresult(mh);
490 	vmbus_msghc_put(sc, mh);
491 
492 	if (error) {
493 		device_printf(sc->vmbus_dev,
494 		    "disconnect msg hypercall failed\n");
495 	}
496 }
497 
498 static int
499 vmbus_req_channels(struct vmbus_softc *sc)
500 {
501 	struct vmbus_chanmsg_chrequest *req;
502 	struct vmbus_msghc *mh;
503 	int error;
504 
505 	mh = vmbus_msghc_get(sc, sizeof(*req));
506 	if (mh == NULL)
507 		return ENXIO;
508 
509 	req = vmbus_msghc_dataptr(mh);
510 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
511 
512 	error = vmbus_msghc_exec_noresult(mh);
513 	vmbus_msghc_put(sc, mh);
514 
515 	return error;
516 }
517 
518 static void
519 vmbus_scan_done_task(void *xsc, int pending __unused)
520 {
521 	struct vmbus_softc *sc = xsc;
522 
523 	bus_topo_lock();
524 	sc->vmbus_scandone = true;
525 	bus_topo_unlock();
526 	wakeup(&sc->vmbus_scandone);
527 }
528 
529 static void
530 vmbus_scan_done(struct vmbus_softc *sc,
531     const struct vmbus_message *msg __unused)
532 {
533 
534 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
535 }
536 
537 static int
538 vmbus_scan(struct vmbus_softc *sc)
539 {
540 	int error;
541 
542 	/*
543 	 * Identify, probe and attach for non-channel devices.
544 	 */
545 	bus_generic_probe(sc->vmbus_dev);
546 	bus_generic_attach(sc->vmbus_dev);
547 
548 	/*
549 	 * This taskqueue serializes vmbus devices' attach and detach
550 	 * for channel offer and rescind messages.
551 	 */
552 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
553 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
554 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
555 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
556 
557 	/*
558 	 * This taskqueue handles sub-channel detach, so that vmbus
559 	 * device's detach running in vmbus_devtq can drain its sub-
560 	 * channels.
561 	 */
562 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
563 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
564 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
565 
566 	/*
567 	 * Start vmbus scanning.
568 	 */
569 	error = vmbus_req_channels(sc);
570 	if (error) {
571 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
572 		    error);
573 		return (error);
574 	}
575 
576 	/*
577 	 * Wait for all vmbus devices from the initial channel offers to be
578 	 * attached.
579 	 */
580 	bus_topo_assert();
581 	while (!sc->vmbus_scandone)
582 		mtx_sleep(&sc->vmbus_scandone, bus_topo_mtx(), 0, "vmbusdev", 0);
583 
584 	if (bootverbose) {
585 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
586 		    "done\n");
587 	}
588 	return (0);
589 }
590 
591 static void
592 vmbus_scan_teardown(struct vmbus_softc *sc)
593 {
594 
595 	bus_topo_assert();
596 	if (sc->vmbus_devtq != NULL) {
597 		bus_topo_unlock();
598 		taskqueue_free(sc->vmbus_devtq);
599 		bus_topo_lock();
600 		sc->vmbus_devtq = NULL;
601 	}
602 	if (sc->vmbus_subchtq != NULL) {
603 		bus_topo_unlock();
604 		taskqueue_free(sc->vmbus_subchtq);
605 		bus_topo_lock();
606 		sc->vmbus_subchtq = NULL;
607 	}
608 }
609 
610 static void
611 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
612 {
613 	vmbus_chanmsg_proc_t msg_proc;
614 	uint32_t msg_type;
615 
616 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
617 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
618 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
619 		    msg_type);
620 		return;
621 	}
622 
623 	msg_proc = vmbus_chanmsg_handlers[msg_type];
624 	if (msg_proc != NULL)
625 		msg_proc(sc, msg);
626 
627 	/* Channel specific processing */
628 	vmbus_chan_msgproc(sc, msg);
629 }
630 
631 static void
632 vmbus_msg_task(void *xsc, int pending __unused)
633 {
634 	struct vmbus_softc *sc = xsc;
635 	volatile struct vmbus_message *msg;
636 
637 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
638 	for (;;) {
639 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
640 			/* No message */
641 			break;
642 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
643 			/* Channel message */
644 			vmbus_chanmsg_handle(sc,
645 			    __DEVOLATILE(const struct vmbus_message *, msg));
646 		}
647 
648 		msg->msg_type = HYPERV_MSGTYPE_NONE;
649 		/*
650 		 * Make sure the write to msg_type (i.e. set to
651 		 * HYPERV_MSGTYPE_NONE) happens before we read the
652 		 * msg_flags and EOMing. Otherwise, the EOMing will
653 		 * not deliver any more messages since there is no
654 		 * empty slot
655 		 *
656 		 * NOTE:
657 		 * mb() is used here, since atomic_thread_fence_seq_cst()
658 		 * will become compiler fence on UP kernel.
659 		 */
660 		mb();
661 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
662 			/*
663 			 * This will cause message queue rescan to possibly
664 			 * deliver another msg from the hypervisor
665 			 */
666 			wrmsr(MSR_HV_EOM, 0);
667 		}
668 	}
669 }
670 
671 static __inline int
672 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
673 {
674 	volatile struct vmbus_message *msg;
675 	struct vmbus_message *msg_base;
676 
677 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
678 
679 	/*
680 	 * Check event timer.
681 	 *
682 	 * TODO: move this to independent IDT vector.
683 	 */
684 	msg = msg_base + VMBUS_SINT_TIMER;
685 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
686 		msg->msg_type = HYPERV_MSGTYPE_NONE;
687 
688 		vmbus_et_intr(frame);
689 
690 		/*
691 		 * Make sure the write to msg_type (i.e. set to
692 		 * HYPERV_MSGTYPE_NONE) happens before we read the
693 		 * msg_flags and EOMing. Otherwise, the EOMing will
694 		 * not deliver any more messages since there is no
695 		 * empty slot
696 		 *
697 		 * NOTE:
698 		 * mb() is used here, since atomic_thread_fence_seq_cst()
699 		 * will become compiler fence on UP kernel.
700 		 */
701 		mb();
702 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
703 			/*
704 			 * This will cause message queue rescan to possibly
705 			 * deliver another msg from the hypervisor
706 			 */
707 			wrmsr(MSR_HV_EOM, 0);
708 		}
709 	}
710 
711 	/*
712 	 * Check events.  Hot path for network and storage I/O data; high rate.
713 	 *
714 	 * NOTE:
715 	 * As recommended by the Windows guest fellows, we check events before
716 	 * checking messages.
717 	 */
718 	sc->vmbus_event_proc(sc, cpu);
719 
720 	/*
721 	 * Check messages.  Mainly management stuffs; ultra low rate.
722 	 */
723 	msg = msg_base + VMBUS_SINT_MESSAGE;
724 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
725 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
726 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
727 	}
728 
729 	return (FILTER_HANDLED);
730 }
731 
732 void
733 vmbus_handle_intr(struct trapframe *trap_frame)
734 {
735 	struct vmbus_softc *sc = vmbus_get_softc();
736 	int cpu = curcpu;
737 
738 	/*
739 	 * Disable preemption.
740 	 */
741 	critical_enter();
742 
743 	/*
744 	 * Do a little interrupt counting.
745 	 */
746 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
747 
748 	vmbus_handle_intr1(sc, trap_frame, cpu);
749 
750 	/*
751 	 * Enable preemption.
752 	 */
753 	critical_exit();
754 }
755 
756 static void
757 vmbus_synic_setup(void *xsc)
758 {
759 	struct vmbus_softc *sc = xsc;
760 	int cpu = curcpu;
761 	uint64_t val, orig;
762 	uint32_t sint;
763 
764 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
765 		/* Save virtual processor id. */
766 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
767 	} else {
768 		/* Set virtual processor id to 0 for compatibility. */
769 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
770 	}
771 
772 	/*
773 	 * Setup the SynIC message.
774 	 */
775 	orig = rdmsr(MSR_HV_SIMP);
776 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
777 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
778 	     MSR_HV_SIMP_PGSHIFT);
779 	wrmsr(MSR_HV_SIMP, val);
780 
781 	/*
782 	 * Setup the SynIC event flags.
783 	 */
784 	orig = rdmsr(MSR_HV_SIEFP);
785 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
786 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
787 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
788 	wrmsr(MSR_HV_SIEFP, val);
789 
790 
791 	/*
792 	 * Configure and unmask SINT for message and event flags.
793 	 */
794 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
795 	orig = rdmsr(sint);
796 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
797 	    (orig & MSR_HV_SINT_RSVD_MASK);
798 	wrmsr(sint, val);
799 
800 	/*
801 	 * Configure and unmask SINT for timer.
802 	 */
803 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
804 	orig = rdmsr(sint);
805 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
806 	    (orig & MSR_HV_SINT_RSVD_MASK);
807 	wrmsr(sint, val);
808 
809 	/*
810 	 * All done; enable SynIC.
811 	 */
812 	orig = rdmsr(MSR_HV_SCONTROL);
813 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
814 	wrmsr(MSR_HV_SCONTROL, val);
815 }
816 
817 static void
818 vmbus_synic_teardown(void *arg)
819 {
820 	uint64_t orig;
821 	uint32_t sint;
822 
823 	/*
824 	 * Disable SynIC.
825 	 */
826 	orig = rdmsr(MSR_HV_SCONTROL);
827 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
828 
829 	/*
830 	 * Mask message and event flags SINT.
831 	 */
832 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
833 	orig = rdmsr(sint);
834 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
835 
836 	/*
837 	 * Mask timer SINT.
838 	 */
839 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
840 	orig = rdmsr(sint);
841 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
842 
843 	/*
844 	 * Teardown SynIC message.
845 	 */
846 	orig = rdmsr(MSR_HV_SIMP);
847 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
848 
849 	/*
850 	 * Teardown SynIC event flags.
851 	 */
852 	orig = rdmsr(MSR_HV_SIEFP);
853 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
854 }
855 
856 static int
857 vmbus_dma_alloc(struct vmbus_softc *sc)
858 {
859 	bus_dma_tag_t parent_dtag;
860 	uint8_t *evtflags;
861 	int cpu;
862 
863 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
864 	CPU_FOREACH(cpu) {
865 		void *ptr;
866 
867 		/*
868 		 * Per-cpu messages and event flags.
869 		 */
870 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
871 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
872 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
873 		if (ptr == NULL)
874 			return ENOMEM;
875 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
876 
877 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
878 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
879 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
880 		if (ptr == NULL)
881 			return ENOMEM;
882 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
883 	}
884 
885 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
886 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
887 	if (evtflags == NULL)
888 		return ENOMEM;
889 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
890 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
891 	sc->vmbus_evtflags = evtflags;
892 
893 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
894 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
895 	if (sc->vmbus_mnf1 == NULL)
896 		return ENOMEM;
897 
898 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
899 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
900 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
901 	if (sc->vmbus_mnf2 == NULL)
902 		return ENOMEM;
903 
904 	return 0;
905 }
906 
907 static void
908 vmbus_dma_free(struct vmbus_softc *sc)
909 {
910 	int cpu;
911 
912 	if (sc->vmbus_evtflags != NULL) {
913 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
914 		sc->vmbus_evtflags = NULL;
915 		sc->vmbus_rx_evtflags = NULL;
916 		sc->vmbus_tx_evtflags = NULL;
917 	}
918 	if (sc->vmbus_mnf1 != NULL) {
919 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
920 		sc->vmbus_mnf1 = NULL;
921 	}
922 	if (sc->vmbus_mnf2 != NULL) {
923 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
924 		sc->vmbus_mnf2 = NULL;
925 	}
926 
927 	CPU_FOREACH(cpu) {
928 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
929 			hyperv_dmamem_free(
930 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
931 			    VMBUS_PCPU_GET(sc, message, cpu));
932 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
933 		}
934 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
935 			hyperv_dmamem_free(
936 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
937 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
938 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
939 		}
940 	}
941 }
942 
943 static int
944 vmbus_intr_setup(struct vmbus_softc *sc)
945 {
946 	int cpu;
947 
948 	CPU_FOREACH(cpu) {
949 		char buf[MAXCOMLEN + 1];
950 		cpuset_t cpu_mask;
951 
952 		/* Allocate an interrupt counter for Hyper-V interrupt */
953 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
954 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
955 
956 		/*
957 		 * Setup taskqueue to handle events.  Task will be per-
958 		 * channel.
959 		 */
960 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
961 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
962 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
963 		if (vmbus_pin_evttask) {
964 			CPU_SETOF(cpu, &cpu_mask);
965 			taskqueue_start_threads_cpuset(
966 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
967 			    &cpu_mask, "hvevent%d", cpu);
968 		} else {
969 			taskqueue_start_threads(
970 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
971 			    "hvevent%d", cpu);
972 		}
973 
974 		/*
975 		 * Setup tasks and taskqueues to handle messages.
976 		 */
977 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
978 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
979 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
980 		CPU_SETOF(cpu, &cpu_mask);
981 		taskqueue_start_threads_cpuset(
982 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
983 		    "hvmsg%d", cpu);
984 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
985 		    vmbus_msg_task, sc);
986 	}
987 
988 #if defined(__amd64__) && defined(KLD_MODULE)
989 	pmap_pti_add_kva(VMBUS_ISR_ADDR, VMBUS_ISR_ADDR + PAGE_SIZE, true);
990 #endif
991 
992 	/*
993 	 * All Hyper-V ISR required resources are setup, now let's find a
994 	 * free IDT vector for Hyper-V ISR and set it up.
995 	 */
996 	sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
997 	    IDTVEC(vmbus_isr));
998 	if (sc->vmbus_idtvec < 0) {
999 #if defined(__amd64__) && defined(KLD_MODULE)
1000 		pmap_pti_remove_kva(VMBUS_ISR_ADDR, VMBUS_ISR_ADDR + PAGE_SIZE);
1001 #endif
1002 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
1003 		return ENXIO;
1004 	}
1005 	if (bootverbose) {
1006 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
1007 		    sc->vmbus_idtvec);
1008 	}
1009 	return 0;
1010 }
1011 
1012 static void
1013 vmbus_intr_teardown(struct vmbus_softc *sc)
1014 {
1015 	int cpu;
1016 
1017 	if (sc->vmbus_idtvec >= 0) {
1018 		lapic_ipi_free(sc->vmbus_idtvec);
1019 		sc->vmbus_idtvec = -1;
1020 	}
1021 
1022 #if defined(__amd64__) && defined(KLD_MODULE)
1023 	pmap_pti_remove_kva(VMBUS_ISR_ADDR, VMBUS_ISR_ADDR + PAGE_SIZE);
1024 #endif
1025 
1026 	CPU_FOREACH(cpu) {
1027 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
1028 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
1029 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
1030 		}
1031 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
1032 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1033 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
1034 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
1035 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
1036 		}
1037 	}
1038 }
1039 
1040 static int
1041 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
1042 {
1043 	return (ENOENT);
1044 }
1045 
1046 static int
1047 vmbus_child_pnpinfo(device_t dev, device_t child, struct sbuf *sb)
1048 {
1049 	const struct vmbus_channel *chan;
1050 	char guidbuf[HYPERV_GUID_STRLEN];
1051 
1052 	chan = vmbus_get_channel(child);
1053 	if (chan == NULL) {
1054 		/* Event timer device, which does not belong to a channel */
1055 		return (0);
1056 	}
1057 
1058 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1059 	sbuf_printf(sb, "classid=%s", guidbuf);
1060 
1061 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1062 	sbuf_printf(sb, " deviceid=%s", guidbuf);
1063 
1064 	return (0);
1065 }
1066 
1067 int
1068 vmbus_add_child(struct vmbus_channel *chan)
1069 {
1070 	struct vmbus_softc *sc = chan->ch_vmbus;
1071 	device_t parent = sc->vmbus_dev;
1072 
1073 	bus_topo_lock();
1074 	chan->ch_dev = device_add_child(parent, NULL, -1);
1075 	if (chan->ch_dev == NULL) {
1076 		bus_topo_unlock();
1077 		device_printf(parent, "device_add_child for chan%u failed\n",
1078 		    chan->ch_id);
1079 		return (ENXIO);
1080 	}
1081 	device_set_ivars(chan->ch_dev, chan);
1082 	device_probe_and_attach(chan->ch_dev);
1083 	bus_topo_unlock();
1084 
1085 	return (0);
1086 }
1087 
1088 int
1089 vmbus_delete_child(struct vmbus_channel *chan)
1090 {
1091 	int error = 0;
1092 
1093 	bus_topo_lock();
1094 	if (chan->ch_dev != NULL) {
1095 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1096 		    chan->ch_dev);
1097 		chan->ch_dev = NULL;
1098 	}
1099 	bus_topo_unlock();
1100 	return (error);
1101 }
1102 
1103 static int
1104 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1105 {
1106 	struct vmbus_softc *sc = arg1;
1107 	char verstr[16];
1108 
1109 	snprintf(verstr, sizeof(verstr), "%u.%u",
1110 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1111 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1112 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1113 }
1114 
1115 /*
1116  * We need the function to make sure the MMIO resource is allocated from the
1117  * ranges found in _CRS.
1118  *
1119  * For the release function, we can use bus_generic_release_resource().
1120  */
1121 static struct resource *
1122 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1123     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1124 {
1125 	device_t parent = device_get_parent(dev);
1126 	struct resource *res;
1127 
1128 #ifdef NEW_PCIB
1129 	if (type == SYS_RES_MEMORY) {
1130 		struct vmbus_softc *sc = device_get_softc(dev);
1131 
1132 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1133 		    rid, start, end, count, flags);
1134 	} else
1135 #endif
1136 	{
1137 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1138 		    end, count, flags);
1139 	}
1140 
1141 	return (res);
1142 }
1143 
1144 static int
1145 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1146 {
1147 
1148 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1149 	    irqs));
1150 }
1151 
1152 static int
1153 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1154 {
1155 
1156 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1157 }
1158 
1159 static int
1160 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1161 {
1162 
1163 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1164 }
1165 
1166 static int
1167 vmbus_release_msix(device_t bus, device_t dev, int irq)
1168 {
1169 
1170 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1171 }
1172 
1173 static int
1174 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1175 	uint32_t *data)
1176 {
1177 
1178 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1179 }
1180 
1181 static uint32_t
1182 vmbus_get_version_method(device_t bus, device_t dev)
1183 {
1184 	struct vmbus_softc *sc = device_get_softc(bus);
1185 
1186 	return sc->vmbus_version;
1187 }
1188 
1189 static int
1190 vmbus_probe_guid_method(device_t bus, device_t dev,
1191     const struct hyperv_guid *guid)
1192 {
1193 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1194 
1195 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1196 		return 0;
1197 	return ENXIO;
1198 }
1199 
1200 static uint32_t
1201 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1202 {
1203 	const struct vmbus_softc *sc = device_get_softc(bus);
1204 
1205 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1206 }
1207 
1208 static struct taskqueue *
1209 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1210 {
1211 	const struct vmbus_softc *sc = device_get_softc(bus);
1212 
1213 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1214 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1215 }
1216 
1217 #ifdef NEW_PCIB
1218 #define VTPM_BASE_ADDR 0xfed40000
1219 #define FOUR_GB (1ULL << 32)
1220 
1221 enum parse_pass { parse_64, parse_32 };
1222 
1223 struct parse_context {
1224 	device_t vmbus_dev;
1225 	enum parse_pass pass;
1226 };
1227 
1228 static ACPI_STATUS
1229 parse_crs(ACPI_RESOURCE *res, void *ctx)
1230 {
1231 	const struct parse_context *pc = ctx;
1232 	device_t vmbus_dev = pc->vmbus_dev;
1233 
1234 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1235 	UINT64 start, end;
1236 
1237 	switch (res->Type) {
1238 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1239 		start = res->Data.Address32.Address.Minimum;
1240 		end = res->Data.Address32.Address.Maximum;
1241 		break;
1242 
1243 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1244 		start = res->Data.Address64.Address.Minimum;
1245 		end = res->Data.Address64.Address.Maximum;
1246 		break;
1247 
1248 	default:
1249 		/* Unused types. */
1250 		return (AE_OK);
1251 	}
1252 
1253 	/*
1254 	 * We don't use <1MB addresses.
1255 	 */
1256 	if (end < 0x100000)
1257 		return (AE_OK);
1258 
1259 	/* Don't conflict with vTPM. */
1260 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1261 		end = VTPM_BASE_ADDR - 1;
1262 
1263 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1264 	    (pc->pass == parse_64 && start >= FOUR_GB))
1265 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1266 		    start, end, 0);
1267 
1268 	return (AE_OK);
1269 }
1270 
1271 static void
1272 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1273 {
1274 	struct parse_context pc;
1275 	ACPI_STATUS status;
1276 
1277 	if (bootverbose)
1278 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1279 
1280 	pc.vmbus_dev = vmbus_dev;
1281 	pc.pass = pass;
1282 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1283 			parse_crs, &pc);
1284 
1285 	if (bootverbose && ACPI_FAILURE(status))
1286 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1287 }
1288 
1289 static void
1290 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1291 {
1292 	device_t acpi0, parent;
1293 
1294 	parent = device_get_parent(dev);
1295 
1296 	acpi0 = device_get_parent(parent);
1297 	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1298 		device_t *children;
1299 		int count;
1300 
1301 		/*
1302 		 * Try to locate VMBUS resources and find _CRS on them.
1303 		 */
1304 		if (device_get_children(acpi0, &children, &count) == 0) {
1305 			int i;
1306 
1307 			for (i = 0; i < count; ++i) {
1308 				if (!device_is_attached(children[i]))
1309 					continue;
1310 
1311 				if (strcmp("vmbus_res",
1312 				    device_get_name(children[i])) == 0)
1313 					vmbus_get_crs(children[i], dev, pass);
1314 			}
1315 			free(children, M_TEMP);
1316 		}
1317 
1318 		/*
1319 		 * Try to find _CRS on acpi.
1320 		 */
1321 		vmbus_get_crs(acpi0, dev, pass);
1322 	} else {
1323 		device_printf(dev, "not grandchild of acpi\n");
1324 	}
1325 
1326 	/*
1327 	 * Try to find _CRS on parent.
1328 	 */
1329 	vmbus_get_crs(parent, dev, pass);
1330 }
1331 
1332 static void
1333 vmbus_get_mmio_res(device_t dev)
1334 {
1335 	struct vmbus_softc *sc = device_get_softc(dev);
1336 	/*
1337 	 * We walk the resources twice to make sure that: in the resource
1338 	 * list, the 32-bit resources appear behind the 64-bit resources.
1339 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1340 	 * iterate through the list to find a range for a 64-bit BAR in
1341 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1342 	 * ranges first.
1343 	 */
1344 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1345 
1346 	vmbus_get_mmio_res_pass(dev, parse_64);
1347 	vmbus_get_mmio_res_pass(dev, parse_32);
1348 }
1349 
1350 /*
1351  * On Gen2 VMs, Hyper-V provides mmio space for framebuffer.
1352  * This mmio address range is not useable for other PCI devices.
1353  * Currently only efifb and vbefb drivers are using this range without
1354  * reserving it from system.
1355  * Therefore, vmbus driver reserves it before any other PCI device
1356  * drivers start to request mmio addresses.
1357  */
1358 static struct resource *hv_fb_res;
1359 
1360 static void
1361 vmbus_fb_mmio_res(device_t dev)
1362 {
1363 	struct efi_fb *efifb;
1364 	struct vbe_fb *vbefb;
1365 	rman_res_t fb_start, fb_end, fb_count;
1366 	int fb_height, fb_width;
1367 	caddr_t kmdp;
1368 
1369 	struct vmbus_softc *sc = device_get_softc(dev);
1370 	int rid = 0;
1371 
1372 	kmdp = preload_search_by_type("elf kernel");
1373 	if (kmdp == NULL)
1374 		kmdp = preload_search_by_type("elf64 kernel");
1375 	efifb = (struct efi_fb *)preload_search_info(kmdp,
1376 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
1377 	vbefb = (struct vbe_fb *)preload_search_info(kmdp,
1378 	    MODINFO_METADATA | MODINFOMD_VBE_FB);
1379 	if (efifb != NULL) {
1380 		fb_start = efifb->fb_addr;
1381 		fb_end = efifb->fb_addr + efifb->fb_size;
1382 		fb_count = efifb->fb_size;
1383 		fb_height = efifb->fb_height;
1384 		fb_width = efifb->fb_width;
1385 	} else if (vbefb != NULL) {
1386 		fb_start = vbefb->fb_addr;
1387 		fb_end = vbefb->fb_addr + vbefb->fb_size;
1388 		fb_count = vbefb->fb_size;
1389 		fb_height = vbefb->fb_height;
1390 		fb_width = vbefb->fb_width;
1391 	} else {
1392 		if (bootverbose)
1393 			device_printf(dev,
1394 			    "no preloaded kernel fb information\n");
1395 		/* We are on Gen1 VM, just return. */
1396 		return;
1397 	}
1398 
1399 	if (bootverbose)
1400 		device_printf(dev,
1401 		    "fb: fb_addr: %#jx, size: %#jx, "
1402 		    "actual size needed: 0x%x\n",
1403 		    fb_start, fb_count, fb_height * fb_width);
1404 
1405 	hv_fb_res = pcib_host_res_alloc(&sc->vmbus_mmio_res, dev,
1406 	    SYS_RES_MEMORY, &rid, fb_start, fb_end, fb_count,
1407 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1408 
1409 	if (hv_fb_res && bootverbose)
1410 		device_printf(dev,
1411 		    "successfully reserved memory for framebuffer "
1412 		    "starting at %#jx, size %#jx\n",
1413 		    fb_start, fb_count);
1414 }
1415 
1416 static void
1417 vmbus_free_mmio_res(device_t dev)
1418 {
1419 	struct vmbus_softc *sc = device_get_softc(dev);
1420 
1421 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1422 
1423 	if (hv_fb_res)
1424 		hv_fb_res = NULL;
1425 }
1426 #endif	/* NEW_PCIB */
1427 
1428 static void
1429 vmbus_identify(driver_t *driver, device_t parent)
1430 {
1431 
1432 	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1433 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1434 		return;
1435 	device_add_child(parent, "vmbus", -1);
1436 }
1437 
1438 static int
1439 vmbus_probe(device_t dev)
1440 {
1441 
1442 	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1443 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1444 		return (ENXIO);
1445 
1446 	device_set_desc(dev, "Hyper-V Vmbus");
1447 	return (BUS_PROBE_DEFAULT);
1448 }
1449 
1450 /**
1451  * @brief Main vmbus driver initialization routine.
1452  *
1453  * Here, we
1454  * - initialize the vmbus driver context
1455  * - setup various driver entry points
1456  * - invoke the vmbus hv main init routine
1457  * - get the irq resource
1458  * - invoke the vmbus to add the vmbus root device
1459  * - setup the vmbus root device
1460  * - retrieve the channel offers
1461  */
1462 static int
1463 vmbus_doattach(struct vmbus_softc *sc)
1464 {
1465 	struct sysctl_oid_list *child;
1466 	struct sysctl_ctx_list *ctx;
1467 	int ret;
1468 
1469 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1470 		return (0);
1471 
1472 #ifdef NEW_PCIB
1473 	vmbus_get_mmio_res(sc->vmbus_dev);
1474 	vmbus_fb_mmio_res(sc->vmbus_dev);
1475 #endif
1476 
1477 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1478 
1479 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1480 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1481 	TAILQ_INIT(&sc->vmbus_prichans);
1482 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1483 	TAILQ_INIT(&sc->vmbus_chans);
1484 	sc->vmbus_chmap = malloc(
1485 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1486 	    M_WAITOK | M_ZERO);
1487 
1488 	/*
1489 	 * Create context for "post message" Hypercalls
1490 	 */
1491 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1492 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1493 	    sizeof(struct vmbus_msghc));
1494 	if (sc->vmbus_xc == NULL) {
1495 		ret = ENXIO;
1496 		goto cleanup;
1497 	}
1498 
1499 	/*
1500 	 * Allocate DMA stuffs.
1501 	 */
1502 	ret = vmbus_dma_alloc(sc);
1503 	if (ret != 0)
1504 		goto cleanup;
1505 
1506 	/*
1507 	 * Setup interrupt.
1508 	 */
1509 	ret = vmbus_intr_setup(sc);
1510 	if (ret != 0)
1511 		goto cleanup;
1512 
1513 	/*
1514 	 * Setup SynIC.
1515 	 */
1516 	if (bootverbose)
1517 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1518 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1519 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1520 
1521 	/*
1522 	 * Initialize vmbus, e.g. connect to Hypervisor.
1523 	 */
1524 	ret = vmbus_init(sc);
1525 	if (ret != 0)
1526 		goto cleanup;
1527 
1528 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1529 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1530 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1531 	else
1532 		sc->vmbus_event_proc = vmbus_event_proc;
1533 
1534 	ret = vmbus_scan(sc);
1535 	if (ret != 0)
1536 		goto cleanup;
1537 
1538 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1539 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1540 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1541 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1542 	    vmbus_sysctl_version, "A", "vmbus version");
1543 
1544 	return (ret);
1545 
1546 cleanup:
1547 	vmbus_scan_teardown(sc);
1548 	vmbus_intr_teardown(sc);
1549 	vmbus_dma_free(sc);
1550 	if (sc->vmbus_xc != NULL) {
1551 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1552 		sc->vmbus_xc = NULL;
1553 	}
1554 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1555 	mtx_destroy(&sc->vmbus_prichan_lock);
1556 	mtx_destroy(&sc->vmbus_chan_lock);
1557 
1558 	return (ret);
1559 }
1560 
1561 static void
1562 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1563 {
1564 }
1565 
1566 #ifdef EARLY_AP_STARTUP
1567 
1568 static void
1569 vmbus_intrhook(void *xsc)
1570 {
1571 	struct vmbus_softc *sc = xsc;
1572 
1573 	if (bootverbose)
1574 		device_printf(sc->vmbus_dev, "intrhook\n");
1575 	vmbus_doattach(sc);
1576 	config_intrhook_disestablish(&sc->vmbus_intrhook);
1577 }
1578 
1579 #endif	/* EARLY_AP_STARTUP */
1580 
1581 static int
1582 vmbus_attach(device_t dev)
1583 {
1584 	vmbus_sc = device_get_softc(dev);
1585 	vmbus_sc->vmbus_dev = dev;
1586 	vmbus_sc->vmbus_idtvec = -1;
1587 
1588 	/*
1589 	 * Event processing logic will be configured:
1590 	 * - After the vmbus protocol version negotiation.
1591 	 * - Before we request channel offers.
1592 	 */
1593 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1594 
1595 #ifdef EARLY_AP_STARTUP
1596 	/*
1597 	 * Defer the real attach until the pause(9) works as expected.
1598 	 */
1599 	vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1600 	vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1601 	config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1602 #else	/* !EARLY_AP_STARTUP */
1603 	/*
1604 	 * If the system has already booted and thread
1605 	 * scheduling is possible indicated by the global
1606 	 * cold set to zero, we just call the driver
1607 	 * initialization directly.
1608 	 */
1609 	if (!cold)
1610 		vmbus_doattach(vmbus_sc);
1611 #endif	/* EARLY_AP_STARTUP */
1612 
1613 	return (0);
1614 }
1615 
1616 static int
1617 vmbus_detach(device_t dev)
1618 {
1619 	struct vmbus_softc *sc = device_get_softc(dev);
1620 
1621 	bus_generic_detach(dev);
1622 	vmbus_chan_destroy_all(sc);
1623 
1624 	vmbus_scan_teardown(sc);
1625 
1626 	vmbus_disconnect(sc);
1627 
1628 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1629 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1630 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1631 	}
1632 
1633 	vmbus_intr_teardown(sc);
1634 	vmbus_dma_free(sc);
1635 
1636 	if (sc->vmbus_xc != NULL) {
1637 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1638 		sc->vmbus_xc = NULL;
1639 	}
1640 
1641 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1642 	mtx_destroy(&sc->vmbus_prichan_lock);
1643 	mtx_destroy(&sc->vmbus_chan_lock);
1644 
1645 #ifdef NEW_PCIB
1646 	vmbus_free_mmio_res(dev);
1647 #endif
1648 
1649 	return (0);
1650 }
1651 
1652 #ifndef EARLY_AP_STARTUP
1653 
1654 static void
1655 vmbus_sysinit(void *arg __unused)
1656 {
1657 	struct vmbus_softc *sc = vmbus_get_softc();
1658 
1659 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1660 		return;
1661 
1662 	/*
1663 	 * If the system has already booted and thread
1664 	 * scheduling is possible, as indicated by the
1665 	 * global cold set to zero, we just call the driver
1666 	 * initialization directly.
1667 	 */
1668 	if (!cold)
1669 		vmbus_doattach(sc);
1670 }
1671 /*
1672  * NOTE:
1673  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1674  * initialized.
1675  */
1676 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1677 
1678 #endif	/* !EARLY_AP_STARTUP */
1679