xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision eb9da1ada8b6b2c74378a5c17029ec5a7fb199e6)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/taskqueue.h>
46 
47 #include <machine/intr_machdep.h>
48 #include <x86/include/apicvar.h>
49 
50 #include <contrib/dev/acpica/include/acpi.h>
51 
52 #include <dev/hyperv/include/hyperv.h>
53 #include <dev/hyperv/include/vmbus_xact.h>
54 #include <dev/hyperv/vmbus/hyperv_reg.h>
55 #include <dev/hyperv/vmbus/hyperv_var.h>
56 #include <dev/hyperv/vmbus/vmbus_reg.h>
57 #include <dev/hyperv/vmbus/vmbus_var.h>
58 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
59 
60 #include "acpi_if.h"
61 #include "vmbus_if.h"
62 
63 #define VMBUS_GPADL_START		0xe1e10
64 
65 struct vmbus_msghc {
66 	struct vmbus_xact		*mh_xact;
67 	struct hypercall_postmsg_in	mh_inprm_save;
68 };
69 
70 static int			vmbus_probe(device_t);
71 static int			vmbus_attach(device_t);
72 static int			vmbus_detach(device_t);
73 static int			vmbus_read_ivar(device_t, device_t, int,
74 				    uintptr_t *);
75 static int			vmbus_child_pnpinfo_str(device_t, device_t,
76 				    char *, size_t);
77 static uint32_t			vmbus_get_version_method(device_t, device_t);
78 static int			vmbus_probe_guid_method(device_t, device_t,
79 				    const struct hyperv_guid *);
80 
81 static int			vmbus_init(struct vmbus_softc *);
82 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
83 static int			vmbus_req_channels(struct vmbus_softc *sc);
84 static void			vmbus_disconnect(struct vmbus_softc *);
85 static int			vmbus_scan(struct vmbus_softc *);
86 static void			vmbus_scan_wait(struct vmbus_softc *);
87 static void			vmbus_scan_newchan(struct vmbus_softc *);
88 static void			vmbus_scan_newdev(struct vmbus_softc *);
89 static void			vmbus_scan_done(struct vmbus_softc *,
90 				    const struct vmbus_message *);
91 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
92 				    const struct vmbus_message *);
93 static void			vmbus_msg_task(void *, int);
94 static void			vmbus_synic_setup(void *);
95 static void			vmbus_synic_teardown(void *);
96 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
97 static int			vmbus_dma_alloc(struct vmbus_softc *);
98 static void			vmbus_dma_free(struct vmbus_softc *);
99 static int			vmbus_intr_setup(struct vmbus_softc *);
100 static void			vmbus_intr_teardown(struct vmbus_softc *);
101 static int			vmbus_doattach(struct vmbus_softc *);
102 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
103 				    int);
104 
105 static struct vmbus_softc	*vmbus_sc;
106 
107 extern inthand_t IDTVEC(vmbus_isr);
108 
109 static const uint32_t		vmbus_version[] = {
110 	VMBUS_VERSION_WIN8_1,
111 	VMBUS_VERSION_WIN8,
112 	VMBUS_VERSION_WIN7,
113 	VMBUS_VERSION_WS2008
114 };
115 
116 static const vmbus_chanmsg_proc_t
117 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
118 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
119 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
120 };
121 
122 static device_method_t vmbus_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,			vmbus_probe),
125 	DEVMETHOD(device_attach,		vmbus_attach),
126 	DEVMETHOD(device_detach,		vmbus_detach),
127 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,		bus_generic_suspend),
129 	DEVMETHOD(device_resume,		bus_generic_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
133 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
134 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
135 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
136 
137 	/* Vmbus interface */
138 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
139 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
140 
141 	DEVMETHOD_END
142 };
143 
144 static driver_t vmbus_driver = {
145 	"vmbus",
146 	vmbus_methods,
147 	sizeof(struct vmbus_softc)
148 };
149 
150 static devclass_t vmbus_devclass;
151 
152 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
153 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
154 MODULE_VERSION(vmbus, 1);
155 
156 static __inline struct vmbus_softc *
157 vmbus_get_softc(void)
158 {
159 	return vmbus_sc;
160 }
161 
162 void
163 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
164 {
165 	struct hypercall_postmsg_in *inprm;
166 
167 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
168 		panic("invalid data size %zu", dsize);
169 
170 	inprm = vmbus_xact_req_data(mh->mh_xact);
171 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
172 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
173 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
174 	inprm->hc_dsize = dsize;
175 }
176 
177 struct vmbus_msghc *
178 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
179 {
180 	struct vmbus_msghc *mh;
181 	struct vmbus_xact *xact;
182 
183 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
184 		panic("invalid data size %zu", dsize);
185 
186 	xact = vmbus_xact_get(sc->vmbus_xc,
187 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
188 	if (xact == NULL)
189 		return (NULL);
190 
191 	mh = vmbus_xact_priv(xact, sizeof(*mh));
192 	mh->mh_xact = xact;
193 
194 	vmbus_msghc_reset(mh, dsize);
195 	return (mh);
196 }
197 
198 void
199 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
200 {
201 
202 	vmbus_xact_put(mh->mh_xact);
203 }
204 
205 void *
206 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
207 {
208 	struct hypercall_postmsg_in *inprm;
209 
210 	inprm = vmbus_xact_req_data(mh->mh_xact);
211 	return (inprm->hc_data);
212 }
213 
214 int
215 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
216 {
217 	sbintime_t time = SBT_1MS;
218 	struct hypercall_postmsg_in *inprm;
219 	bus_addr_t inprm_paddr;
220 	int i;
221 
222 	inprm = vmbus_xact_req_data(mh->mh_xact);
223 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
224 
225 	/*
226 	 * Save the input parameter so that we could restore the input
227 	 * parameter if the Hypercall failed.
228 	 *
229 	 * XXX
230 	 * Is this really necessary?!  i.e. Will the Hypercall ever
231 	 * overwrite the input parameter?
232 	 */
233 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
234 
235 	/*
236 	 * In order to cope with transient failures, e.g. insufficient
237 	 * resources on host side, we retry the post message Hypercall
238 	 * several times.  20 retries seem sufficient.
239 	 */
240 #define HC_RETRY_MAX	20
241 
242 	for (i = 0; i < HC_RETRY_MAX; ++i) {
243 		uint64_t status;
244 
245 		status = hypercall_post_message(inprm_paddr);
246 		if (status == HYPERCALL_STATUS_SUCCESS)
247 			return 0;
248 
249 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
250 		if (time < SBT_1S * 2)
251 			time *= 2;
252 
253 		/* Restore input parameter and try again */
254 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
255 	}
256 
257 #undef HC_RETRY_MAX
258 
259 	return EIO;
260 }
261 
262 int
263 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
264 {
265 	int error;
266 
267 	vmbus_xact_activate(mh->mh_xact);
268 	error = vmbus_msghc_exec_noresult(mh);
269 	if (error)
270 		vmbus_xact_deactivate(mh->mh_xact);
271 	return error;
272 }
273 
274 const struct vmbus_message *
275 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
276 {
277 	size_t resp_len;
278 
279 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
280 }
281 
282 void
283 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
284 {
285 
286 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
287 }
288 
289 uint32_t
290 vmbus_gpadl_alloc(struct vmbus_softc *sc)
291 {
292 	return atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
293 }
294 
295 static int
296 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
297 {
298 	struct vmbus_chanmsg_connect *req;
299 	const struct vmbus_message *msg;
300 	struct vmbus_msghc *mh;
301 	int error, done = 0;
302 
303 	mh = vmbus_msghc_get(sc, sizeof(*req));
304 	if (mh == NULL)
305 		return ENXIO;
306 
307 	req = vmbus_msghc_dataptr(mh);
308 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
309 	req->chm_ver = version;
310 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
311 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
312 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
313 
314 	error = vmbus_msghc_exec(sc, mh);
315 	if (error) {
316 		vmbus_msghc_put(sc, mh);
317 		return error;
318 	}
319 
320 	msg = vmbus_msghc_wait_result(sc, mh);
321 	done = ((const struct vmbus_chanmsg_connect_resp *)
322 	    msg->msg_data)->chm_done;
323 
324 	vmbus_msghc_put(sc, mh);
325 
326 	return (done ? 0 : EOPNOTSUPP);
327 }
328 
329 static int
330 vmbus_init(struct vmbus_softc *sc)
331 {
332 	int i;
333 
334 	for (i = 0; i < nitems(vmbus_version); ++i) {
335 		int error;
336 
337 		error = vmbus_connect(sc, vmbus_version[i]);
338 		if (!error) {
339 			sc->vmbus_version = vmbus_version[i];
340 			device_printf(sc->vmbus_dev, "version %u.%u\n",
341 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
342 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
343 			return 0;
344 		}
345 	}
346 	return ENXIO;
347 }
348 
349 static void
350 vmbus_disconnect(struct vmbus_softc *sc)
351 {
352 	struct vmbus_chanmsg_disconnect *req;
353 	struct vmbus_msghc *mh;
354 	int error;
355 
356 	mh = vmbus_msghc_get(sc, sizeof(*req));
357 	if (mh == NULL) {
358 		device_printf(sc->vmbus_dev,
359 		    "can not get msg hypercall for disconnect\n");
360 		return;
361 	}
362 
363 	req = vmbus_msghc_dataptr(mh);
364 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
365 
366 	error = vmbus_msghc_exec_noresult(mh);
367 	vmbus_msghc_put(sc, mh);
368 
369 	if (error) {
370 		device_printf(sc->vmbus_dev,
371 		    "disconnect msg hypercall failed\n");
372 	}
373 }
374 
375 static int
376 vmbus_req_channels(struct vmbus_softc *sc)
377 {
378 	struct vmbus_chanmsg_chrequest *req;
379 	struct vmbus_msghc *mh;
380 	int error;
381 
382 	mh = vmbus_msghc_get(sc, sizeof(*req));
383 	if (mh == NULL)
384 		return ENXIO;
385 
386 	req = vmbus_msghc_dataptr(mh);
387 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
388 
389 	error = vmbus_msghc_exec_noresult(mh);
390 	vmbus_msghc_put(sc, mh);
391 
392 	return error;
393 }
394 
395 static void
396 vmbus_scan_newchan(struct vmbus_softc *sc)
397 {
398 	mtx_lock(&sc->vmbus_scan_lock);
399 	if ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0)
400 		sc->vmbus_scan_chcnt++;
401 	mtx_unlock(&sc->vmbus_scan_lock);
402 }
403 
404 static void
405 vmbus_scan_done(struct vmbus_softc *sc,
406     const struct vmbus_message *msg __unused)
407 {
408 	mtx_lock(&sc->vmbus_scan_lock);
409 	sc->vmbus_scan_chcnt |= VMBUS_SCAN_CHCNT_DONE;
410 	mtx_unlock(&sc->vmbus_scan_lock);
411 	wakeup(&sc->vmbus_scan_chcnt);
412 }
413 
414 static void
415 vmbus_scan_newdev(struct vmbus_softc *sc)
416 {
417 	mtx_lock(&sc->vmbus_scan_lock);
418 	sc->vmbus_scan_devcnt++;
419 	mtx_unlock(&sc->vmbus_scan_lock);
420 	wakeup(&sc->vmbus_scan_devcnt);
421 }
422 
423 static void
424 vmbus_scan_wait(struct vmbus_softc *sc)
425 {
426 	uint32_t chancnt;
427 
428 	mtx_lock(&sc->vmbus_scan_lock);
429 	while ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) {
430 		mtx_sleep(&sc->vmbus_scan_chcnt, &sc->vmbus_scan_lock, 0,
431 		    "waitch", 0);
432 	}
433 	chancnt = sc->vmbus_scan_chcnt & ~VMBUS_SCAN_CHCNT_DONE;
434 
435 	while (sc->vmbus_scan_devcnt != chancnt) {
436 		mtx_sleep(&sc->vmbus_scan_devcnt, &sc->vmbus_scan_lock, 0,
437 		    "waitdev", 0);
438 	}
439 	mtx_unlock(&sc->vmbus_scan_lock);
440 }
441 
442 static int
443 vmbus_scan(struct vmbus_softc *sc)
444 {
445 	int error;
446 
447 	/*
448 	 * Start vmbus scanning.
449 	 */
450 	error = vmbus_req_channels(sc);
451 	if (error) {
452 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
453 		    error);
454 		return error;
455 	}
456 
457 	/*
458 	 * Wait for all devices are added to vmbus.
459 	 */
460 	vmbus_scan_wait(sc);
461 
462 	/*
463 	 * Identify, probe and attach.
464 	 */
465 	bus_generic_probe(sc->vmbus_dev);
466 	bus_generic_attach(sc->vmbus_dev);
467 
468 	if (bootverbose) {
469 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
470 		    "done\n");
471 	}
472 	return 0;
473 }
474 
475 static void
476 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
477 {
478 	vmbus_chanmsg_proc_t msg_proc;
479 	uint32_t msg_type;
480 
481 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
482 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
483 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
484 		    msg_type);
485 		return;
486 	}
487 
488 	msg_proc = vmbus_chanmsg_handlers[msg_type];
489 	if (msg_proc != NULL)
490 		msg_proc(sc, msg);
491 
492 	/* Channel specific processing */
493 	vmbus_chan_msgproc(sc, msg);
494 }
495 
496 static void
497 vmbus_msg_task(void *xsc, int pending __unused)
498 {
499 	struct vmbus_softc *sc = xsc;
500 	volatile struct vmbus_message *msg;
501 
502 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
503 	for (;;) {
504 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
505 			/* No message */
506 			break;
507 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
508 			/* Channel message */
509 			vmbus_chanmsg_handle(sc,
510 			    __DEVOLATILE(const struct vmbus_message *, msg));
511 		}
512 
513 		msg->msg_type = HYPERV_MSGTYPE_NONE;
514 		/*
515 		 * Make sure the write to msg_type (i.e. set to
516 		 * HYPERV_MSGTYPE_NONE) happens before we read the
517 		 * msg_flags and EOMing. Otherwise, the EOMing will
518 		 * not deliver any more messages since there is no
519 		 * empty slot
520 		 *
521 		 * NOTE:
522 		 * mb() is used here, since atomic_thread_fence_seq_cst()
523 		 * will become compiler fence on UP kernel.
524 		 */
525 		mb();
526 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
527 			/*
528 			 * This will cause message queue rescan to possibly
529 			 * deliver another msg from the hypervisor
530 			 */
531 			wrmsr(MSR_HV_EOM, 0);
532 		}
533 	}
534 }
535 
536 static __inline int
537 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
538 {
539 	volatile struct vmbus_message *msg;
540 	struct vmbus_message *msg_base;
541 
542 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
543 
544 	/*
545 	 * Check event timer.
546 	 *
547 	 * TODO: move this to independent IDT vector.
548 	 */
549 	msg = msg_base + VMBUS_SINT_TIMER;
550 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
551 		msg->msg_type = HYPERV_MSGTYPE_NONE;
552 
553 		vmbus_et_intr(frame);
554 
555 		/*
556 		 * Make sure the write to msg_type (i.e. set to
557 		 * HYPERV_MSGTYPE_NONE) happens before we read the
558 		 * msg_flags and EOMing. Otherwise, the EOMing will
559 		 * not deliver any more messages since there is no
560 		 * empty slot
561 		 *
562 		 * NOTE:
563 		 * mb() is used here, since atomic_thread_fence_seq_cst()
564 		 * will become compiler fence on UP kernel.
565 		 */
566 		mb();
567 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
568 			/*
569 			 * This will cause message queue rescan to possibly
570 			 * deliver another msg from the hypervisor
571 			 */
572 			wrmsr(MSR_HV_EOM, 0);
573 		}
574 	}
575 
576 	/*
577 	 * Check events.  Hot path for network and storage I/O data; high rate.
578 	 *
579 	 * NOTE:
580 	 * As recommended by the Windows guest fellows, we check events before
581 	 * checking messages.
582 	 */
583 	sc->vmbus_event_proc(sc, cpu);
584 
585 	/*
586 	 * Check messages.  Mainly management stuffs; ultra low rate.
587 	 */
588 	msg = msg_base + VMBUS_SINT_MESSAGE;
589 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
590 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
591 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
592 	}
593 
594 	return (FILTER_HANDLED);
595 }
596 
597 void
598 vmbus_handle_intr(struct trapframe *trap_frame)
599 {
600 	struct vmbus_softc *sc = vmbus_get_softc();
601 	int cpu = curcpu;
602 
603 	/*
604 	 * Disable preemption.
605 	 */
606 	critical_enter();
607 
608 	/*
609 	 * Do a little interrupt counting.
610 	 */
611 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
612 
613 	vmbus_handle_intr1(sc, trap_frame, cpu);
614 
615 	/*
616 	 * Enable preemption.
617 	 */
618 	critical_exit();
619 }
620 
621 static void
622 vmbus_synic_setup(void *xsc)
623 {
624 	struct vmbus_softc *sc = xsc;
625 	int cpu = curcpu;
626 	uint64_t val, orig;
627 	uint32_t sint;
628 
629 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
630 		/* Save virtual processor id. */
631 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
632 	} else {
633 		/* Set virtual processor id to 0 for compatibility. */
634 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
635 	}
636 
637 	/*
638 	 * Setup the SynIC message.
639 	 */
640 	orig = rdmsr(MSR_HV_SIMP);
641 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
642 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
643 	     MSR_HV_SIMP_PGSHIFT);
644 	wrmsr(MSR_HV_SIMP, val);
645 
646 	/*
647 	 * Setup the SynIC event flags.
648 	 */
649 	orig = rdmsr(MSR_HV_SIEFP);
650 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
651 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
652 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
653 	wrmsr(MSR_HV_SIEFP, val);
654 
655 
656 	/*
657 	 * Configure and unmask SINT for message and event flags.
658 	 */
659 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
660 	orig = rdmsr(sint);
661 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
662 	    (orig & MSR_HV_SINT_RSVD_MASK);
663 	wrmsr(sint, val);
664 
665 	/*
666 	 * Configure and unmask SINT for timer.
667 	 */
668 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
669 	orig = rdmsr(sint);
670 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
671 	    (orig & MSR_HV_SINT_RSVD_MASK);
672 	wrmsr(sint, val);
673 
674 	/*
675 	 * All done; enable SynIC.
676 	 */
677 	orig = rdmsr(MSR_HV_SCONTROL);
678 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
679 	wrmsr(MSR_HV_SCONTROL, val);
680 }
681 
682 static void
683 vmbus_synic_teardown(void *arg)
684 {
685 	uint64_t orig;
686 	uint32_t sint;
687 
688 	/*
689 	 * Disable SynIC.
690 	 */
691 	orig = rdmsr(MSR_HV_SCONTROL);
692 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
693 
694 	/*
695 	 * Mask message and event flags SINT.
696 	 */
697 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
698 	orig = rdmsr(sint);
699 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
700 
701 	/*
702 	 * Mask timer SINT.
703 	 */
704 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
705 	orig = rdmsr(sint);
706 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
707 
708 	/*
709 	 * Teardown SynIC message.
710 	 */
711 	orig = rdmsr(MSR_HV_SIMP);
712 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
713 
714 	/*
715 	 * Teardown SynIC event flags.
716 	 */
717 	orig = rdmsr(MSR_HV_SIEFP);
718 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
719 }
720 
721 static int
722 vmbus_dma_alloc(struct vmbus_softc *sc)
723 {
724 	bus_dma_tag_t parent_dtag;
725 	uint8_t *evtflags;
726 	int cpu;
727 
728 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
729 	CPU_FOREACH(cpu) {
730 		void *ptr;
731 
732 		/*
733 		 * Per-cpu messages and event flags.
734 		 */
735 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
736 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
737 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
738 		if (ptr == NULL)
739 			return ENOMEM;
740 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
741 
742 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
743 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
744 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
745 		if (ptr == NULL)
746 			return ENOMEM;
747 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
748 	}
749 
750 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
751 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
752 	if (evtflags == NULL)
753 		return ENOMEM;
754 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
755 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
756 	sc->vmbus_evtflags = evtflags;
757 
758 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
759 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
760 	if (sc->vmbus_mnf1 == NULL)
761 		return ENOMEM;
762 
763 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
764 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
765 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
766 	if (sc->vmbus_mnf2 == NULL)
767 		return ENOMEM;
768 
769 	return 0;
770 }
771 
772 static void
773 vmbus_dma_free(struct vmbus_softc *sc)
774 {
775 	int cpu;
776 
777 	if (sc->vmbus_evtflags != NULL) {
778 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
779 		sc->vmbus_evtflags = NULL;
780 		sc->vmbus_rx_evtflags = NULL;
781 		sc->vmbus_tx_evtflags = NULL;
782 	}
783 	if (sc->vmbus_mnf1 != NULL) {
784 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
785 		sc->vmbus_mnf1 = NULL;
786 	}
787 	if (sc->vmbus_mnf2 != NULL) {
788 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
789 		sc->vmbus_mnf2 = NULL;
790 	}
791 
792 	CPU_FOREACH(cpu) {
793 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
794 			hyperv_dmamem_free(
795 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
796 			    VMBUS_PCPU_GET(sc, message, cpu));
797 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
798 		}
799 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
800 			hyperv_dmamem_free(
801 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
802 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
803 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
804 		}
805 	}
806 }
807 
808 static int
809 vmbus_intr_setup(struct vmbus_softc *sc)
810 {
811 	int cpu;
812 
813 	CPU_FOREACH(cpu) {
814 		char buf[MAXCOMLEN + 1];
815 		cpuset_t cpu_mask;
816 
817 		/* Allocate an interrupt counter for Hyper-V interrupt */
818 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
819 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
820 
821 		/*
822 		 * Setup taskqueue to handle events.  Task will be per-
823 		 * channel.
824 		 */
825 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
826 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
827 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
828 		CPU_SETOF(cpu, &cpu_mask);
829 		taskqueue_start_threads_cpuset(
830 		    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
831 		    "hvevent%d", cpu);
832 
833 		/*
834 		 * Setup tasks and taskqueues to handle messages.
835 		 */
836 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
837 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
838 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
839 		CPU_SETOF(cpu, &cpu_mask);
840 		taskqueue_start_threads_cpuset(
841 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
842 		    "hvmsg%d", cpu);
843 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
844 		    vmbus_msg_task, sc);
845 	}
846 
847 	/*
848 	 * All Hyper-V ISR required resources are setup, now let's find a
849 	 * free IDT vector for Hyper-V ISR and set it up.
850 	 */
851 	sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
852 	if (sc->vmbus_idtvec < 0) {
853 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
854 		return ENXIO;
855 	}
856 	if(bootverbose) {
857 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
858 		    sc->vmbus_idtvec);
859 	}
860 	return 0;
861 }
862 
863 static void
864 vmbus_intr_teardown(struct vmbus_softc *sc)
865 {
866 	int cpu;
867 
868 	if (sc->vmbus_idtvec >= 0) {
869 		lapic_ipi_free(sc->vmbus_idtvec);
870 		sc->vmbus_idtvec = -1;
871 	}
872 
873 	CPU_FOREACH(cpu) {
874 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
875 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
876 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
877 		}
878 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
879 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
880 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
881 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
882 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
883 		}
884 	}
885 }
886 
887 static int
888 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
889 {
890 	return (ENOENT);
891 }
892 
893 static int
894 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
895 {
896 	const struct vmbus_channel *chan;
897 	char guidbuf[HYPERV_GUID_STRLEN];
898 
899 	chan = vmbus_get_channel(child);
900 	if (chan == NULL) {
901 		/* Event timer device, which does not belong to a channel */
902 		return (0);
903 	}
904 
905 	strlcat(buf, "classid=", buflen);
906 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
907 	strlcat(buf, guidbuf, buflen);
908 
909 	strlcat(buf, " deviceid=", buflen);
910 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
911 	strlcat(buf, guidbuf, buflen);
912 
913 	return (0);
914 }
915 
916 int
917 vmbus_add_child(struct vmbus_channel *chan)
918 {
919 	struct vmbus_softc *sc = chan->ch_vmbus;
920 	device_t parent = sc->vmbus_dev;
921 	int error = 0;
922 
923 	/* New channel has been offered */
924 	vmbus_scan_newchan(sc);
925 
926 	chan->ch_dev = device_add_child(parent, NULL, -1);
927 	if (chan->ch_dev == NULL) {
928 		device_printf(parent, "device_add_child for chan%u failed\n",
929 		    chan->ch_id);
930 		error = ENXIO;
931 		goto done;
932 	}
933 	device_set_ivars(chan->ch_dev, chan);
934 
935 done:
936 	/* New device has been/should be added to vmbus. */
937 	vmbus_scan_newdev(sc);
938 	return error;
939 }
940 
941 int
942 vmbus_delete_child(struct vmbus_channel *chan)
943 {
944 	int error;
945 
946 	if (chan->ch_dev == NULL) {
947 		/* Failed to add a device. */
948 		return 0;
949 	}
950 
951 	/*
952 	 * XXXKYS: Ensure that this is the opposite of
953 	 * device_add_child()
954 	 */
955 	mtx_lock(&Giant);
956 	error = device_delete_child(chan->ch_vmbus->vmbus_dev, chan->ch_dev);
957 	mtx_unlock(&Giant);
958 
959 	return error;
960 }
961 
962 static int
963 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
964 {
965 	struct vmbus_softc *sc = arg1;
966 	char verstr[16];
967 
968 	snprintf(verstr, sizeof(verstr), "%u.%u",
969 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
970 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
971 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
972 }
973 
974 static uint32_t
975 vmbus_get_version_method(device_t bus, device_t dev)
976 {
977 	struct vmbus_softc *sc = device_get_softc(bus);
978 
979 	return sc->vmbus_version;
980 }
981 
982 static int
983 vmbus_probe_guid_method(device_t bus, device_t dev,
984     const struct hyperv_guid *guid)
985 {
986 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
987 
988 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
989 		return 0;
990 	return ENXIO;
991 }
992 
993 static int
994 vmbus_probe(device_t dev)
995 {
996 	char *id[] = { "VMBUS", NULL };
997 
998 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
999 	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1000 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1001 		return (ENXIO);
1002 
1003 	device_set_desc(dev, "Hyper-V Vmbus");
1004 
1005 	return (BUS_PROBE_DEFAULT);
1006 }
1007 
1008 /**
1009  * @brief Main vmbus driver initialization routine.
1010  *
1011  * Here, we
1012  * - initialize the vmbus driver context
1013  * - setup various driver entry points
1014  * - invoke the vmbus hv main init routine
1015  * - get the irq resource
1016  * - invoke the vmbus to add the vmbus root device
1017  * - setup the vmbus root device
1018  * - retrieve the channel offers
1019  */
1020 static int
1021 vmbus_doattach(struct vmbus_softc *sc)
1022 {
1023 	struct sysctl_oid_list *child;
1024 	struct sysctl_ctx_list *ctx;
1025 	int ret;
1026 
1027 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1028 		return (0);
1029 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1030 
1031 	mtx_init(&sc->vmbus_scan_lock, "vmbus scan", NULL, MTX_DEF);
1032 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1033 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1034 	TAILQ_INIT(&sc->vmbus_prichans);
1035 	sc->vmbus_chmap = malloc(
1036 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1037 	    M_WAITOK | M_ZERO);
1038 
1039 	/*
1040 	 * Create context for "post message" Hypercalls
1041 	 */
1042 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1043 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1044 	    sizeof(struct vmbus_msghc));
1045 	if (sc->vmbus_xc == NULL) {
1046 		ret = ENXIO;
1047 		goto cleanup;
1048 	}
1049 
1050 	/*
1051 	 * Allocate DMA stuffs.
1052 	 */
1053 	ret = vmbus_dma_alloc(sc);
1054 	if (ret != 0)
1055 		goto cleanup;
1056 
1057 	/*
1058 	 * Setup interrupt.
1059 	 */
1060 	ret = vmbus_intr_setup(sc);
1061 	if (ret != 0)
1062 		goto cleanup;
1063 
1064 	/*
1065 	 * Setup SynIC.
1066 	 */
1067 	if (bootverbose)
1068 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1069 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1070 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1071 
1072 	/*
1073 	 * Initialize vmbus, e.g. connect to Hypervisor.
1074 	 */
1075 	ret = vmbus_init(sc);
1076 	if (ret != 0)
1077 		goto cleanup;
1078 
1079 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1080 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1081 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1082 	else
1083 		sc->vmbus_event_proc = vmbus_event_proc;
1084 
1085 	ret = vmbus_scan(sc);
1086 	if (ret != 0)
1087 		goto cleanup;
1088 
1089 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1090 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1091 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1092 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1093 	    vmbus_sysctl_version, "A", "vmbus version");
1094 
1095 	return (ret);
1096 
1097 cleanup:
1098 	vmbus_intr_teardown(sc);
1099 	vmbus_dma_free(sc);
1100 	if (sc->vmbus_xc != NULL) {
1101 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1102 		sc->vmbus_xc = NULL;
1103 	}
1104 	free(sc->vmbus_chmap, M_DEVBUF);
1105 	mtx_destroy(&sc->vmbus_scan_lock);
1106 	mtx_destroy(&sc->vmbus_prichan_lock);
1107 
1108 	return (ret);
1109 }
1110 
1111 static void
1112 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1113 {
1114 }
1115 
1116 static int
1117 vmbus_attach(device_t dev)
1118 {
1119 	vmbus_sc = device_get_softc(dev);
1120 	vmbus_sc->vmbus_dev = dev;
1121 	vmbus_sc->vmbus_idtvec = -1;
1122 
1123 	/*
1124 	 * Event processing logic will be configured:
1125 	 * - After the vmbus protocol version negotiation.
1126 	 * - Before we request channel offers.
1127 	 */
1128 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1129 
1130 #ifndef EARLY_AP_STARTUP
1131 	/*
1132 	 * If the system has already booted and thread
1133 	 * scheduling is possible indicated by the global
1134 	 * cold set to zero, we just call the driver
1135 	 * initialization directly.
1136 	 */
1137 	if (!cold)
1138 #endif
1139 		vmbus_doattach(vmbus_sc);
1140 
1141 	return (0);
1142 }
1143 
1144 static int
1145 vmbus_detach(device_t dev)
1146 {
1147 	struct vmbus_softc *sc = device_get_softc(dev);
1148 
1149 	vmbus_chan_destroy_all(sc);
1150 
1151 	vmbus_disconnect(sc);
1152 
1153 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1154 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1155 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1156 	}
1157 
1158 	vmbus_intr_teardown(sc);
1159 	vmbus_dma_free(sc);
1160 
1161 	if (sc->vmbus_xc != NULL) {
1162 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1163 		sc->vmbus_xc = NULL;
1164 	}
1165 
1166 	free(sc->vmbus_chmap, M_DEVBUF);
1167 	mtx_destroy(&sc->vmbus_scan_lock);
1168 	mtx_destroy(&sc->vmbus_prichan_lock);
1169 
1170 	return (0);
1171 }
1172 
1173 #ifndef EARLY_AP_STARTUP
1174 
1175 static void
1176 vmbus_sysinit(void *arg __unused)
1177 {
1178 	struct vmbus_softc *sc = vmbus_get_softc();
1179 
1180 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1181 		return;
1182 
1183 	/*
1184 	 * If the system has already booted and thread
1185 	 * scheduling is possible, as indicated by the
1186 	 * global cold set to zero, we just call the driver
1187 	 * initialization directly.
1188 	 */
1189 	if (!cold)
1190 		vmbus_doattach(sc);
1191 }
1192 /*
1193  * NOTE:
1194  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1195  * initialized.
1196  */
1197 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1198 
1199 #endif	/* !EARLY_AP_STARTUP */
1200