xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision d01498defbe804f66435b44f22da9278acddf082)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/taskqueue.h>
46 
47 #include <machine/intr_machdep.h>
48 #include <x86/include/apicvar.h>
49 
50 #include <contrib/dev/acpica/include/acpi.h>
51 
52 #include <dev/hyperv/include/hyperv.h>
53 #include <dev/hyperv/include/vmbus_xact.h>
54 #include <dev/hyperv/vmbus/hyperv_reg.h>
55 #include <dev/hyperv/vmbus/hyperv_var.h>
56 #include <dev/hyperv/vmbus/vmbus_reg.h>
57 #include <dev/hyperv/vmbus/vmbus_var.h>
58 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
59 
60 #include "acpi_if.h"
61 #include "vmbus_if.h"
62 
63 #define VMBUS_GPADL_START		0xe1e10
64 
65 struct vmbus_msghc {
66 	struct vmbus_xact		*mh_xact;
67 	struct hypercall_postmsg_in	mh_inprm_save;
68 };
69 
70 static int			vmbus_probe(device_t);
71 static int			vmbus_attach(device_t);
72 static int			vmbus_detach(device_t);
73 static int			vmbus_read_ivar(device_t, device_t, int,
74 				    uintptr_t *);
75 static int			vmbus_child_pnpinfo_str(device_t, device_t,
76 				    char *, size_t);
77 static uint32_t			vmbus_get_version_method(device_t, device_t);
78 static int			vmbus_probe_guid_method(device_t, device_t,
79 				    const struct hyperv_guid *);
80 
81 static int			vmbus_init(struct vmbus_softc *);
82 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
83 static int			vmbus_req_channels(struct vmbus_softc *sc);
84 static void			vmbus_disconnect(struct vmbus_softc *);
85 static int			vmbus_scan(struct vmbus_softc *);
86 static void			vmbus_scan_teardown(struct vmbus_softc *);
87 static void			vmbus_scan_done(struct vmbus_softc *,
88 				    const struct vmbus_message *);
89 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
90 				    const struct vmbus_message *);
91 static void			vmbus_msg_task(void *, int);
92 static void			vmbus_synic_setup(void *);
93 static void			vmbus_synic_teardown(void *);
94 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
95 static int			vmbus_dma_alloc(struct vmbus_softc *);
96 static void			vmbus_dma_free(struct vmbus_softc *);
97 static int			vmbus_intr_setup(struct vmbus_softc *);
98 static void			vmbus_intr_teardown(struct vmbus_softc *);
99 static int			vmbus_doattach(struct vmbus_softc *);
100 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
101 				    int);
102 
103 static struct vmbus_softc	*vmbus_sc;
104 
105 extern inthand_t IDTVEC(vmbus_isr);
106 
107 static const uint32_t		vmbus_version[] = {
108 	VMBUS_VERSION_WIN8_1,
109 	VMBUS_VERSION_WIN8,
110 	VMBUS_VERSION_WIN7,
111 	VMBUS_VERSION_WS2008
112 };
113 
114 static const vmbus_chanmsg_proc_t
115 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
116 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
117 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
118 };
119 
120 static device_method_t vmbus_methods[] = {
121 	/* Device interface */
122 	DEVMETHOD(device_probe,			vmbus_probe),
123 	DEVMETHOD(device_attach,		vmbus_attach),
124 	DEVMETHOD(device_detach,		vmbus_detach),
125 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
126 	DEVMETHOD(device_suspend,		bus_generic_suspend),
127 	DEVMETHOD(device_resume,		bus_generic_resume),
128 
129 	/* Bus interface */
130 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
131 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
132 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
133 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
134 
135 	/* Vmbus interface */
136 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
137 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
138 
139 	DEVMETHOD_END
140 };
141 
142 static driver_t vmbus_driver = {
143 	"vmbus",
144 	vmbus_methods,
145 	sizeof(struct vmbus_softc)
146 };
147 
148 static devclass_t vmbus_devclass;
149 
150 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
151 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
152 MODULE_VERSION(vmbus, 1);
153 
154 static __inline struct vmbus_softc *
155 vmbus_get_softc(void)
156 {
157 	return vmbus_sc;
158 }
159 
160 void
161 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
162 {
163 	struct hypercall_postmsg_in *inprm;
164 
165 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
166 		panic("invalid data size %zu", dsize);
167 
168 	inprm = vmbus_xact_req_data(mh->mh_xact);
169 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
170 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
171 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
172 	inprm->hc_dsize = dsize;
173 }
174 
175 struct vmbus_msghc *
176 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
177 {
178 	struct vmbus_msghc *mh;
179 	struct vmbus_xact *xact;
180 
181 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
182 		panic("invalid data size %zu", dsize);
183 
184 	xact = vmbus_xact_get(sc->vmbus_xc,
185 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
186 	if (xact == NULL)
187 		return (NULL);
188 
189 	mh = vmbus_xact_priv(xact, sizeof(*mh));
190 	mh->mh_xact = xact;
191 
192 	vmbus_msghc_reset(mh, dsize);
193 	return (mh);
194 }
195 
196 void
197 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
198 {
199 
200 	vmbus_xact_put(mh->mh_xact);
201 }
202 
203 void *
204 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
205 {
206 	struct hypercall_postmsg_in *inprm;
207 
208 	inprm = vmbus_xact_req_data(mh->mh_xact);
209 	return (inprm->hc_data);
210 }
211 
212 int
213 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
214 {
215 	sbintime_t time = SBT_1MS;
216 	struct hypercall_postmsg_in *inprm;
217 	bus_addr_t inprm_paddr;
218 	int i;
219 
220 	inprm = vmbus_xact_req_data(mh->mh_xact);
221 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
222 
223 	/*
224 	 * Save the input parameter so that we could restore the input
225 	 * parameter if the Hypercall failed.
226 	 *
227 	 * XXX
228 	 * Is this really necessary?!  i.e. Will the Hypercall ever
229 	 * overwrite the input parameter?
230 	 */
231 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
232 
233 	/*
234 	 * In order to cope with transient failures, e.g. insufficient
235 	 * resources on host side, we retry the post message Hypercall
236 	 * several times.  20 retries seem sufficient.
237 	 */
238 #define HC_RETRY_MAX	20
239 
240 	for (i = 0; i < HC_RETRY_MAX; ++i) {
241 		uint64_t status;
242 
243 		status = hypercall_post_message(inprm_paddr);
244 		if (status == HYPERCALL_STATUS_SUCCESS)
245 			return 0;
246 
247 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
248 		if (time < SBT_1S * 2)
249 			time *= 2;
250 
251 		/* Restore input parameter and try again */
252 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
253 	}
254 
255 #undef HC_RETRY_MAX
256 
257 	return EIO;
258 }
259 
260 int
261 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
262 {
263 	int error;
264 
265 	vmbus_xact_activate(mh->mh_xact);
266 	error = vmbus_msghc_exec_noresult(mh);
267 	if (error)
268 		vmbus_xact_deactivate(mh->mh_xact);
269 	return error;
270 }
271 
272 const struct vmbus_message *
273 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
274 {
275 	size_t resp_len;
276 
277 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
278 }
279 
280 void
281 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
282 {
283 
284 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
285 }
286 
287 uint32_t
288 vmbus_gpadl_alloc(struct vmbus_softc *sc)
289 {
290 	return atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
291 }
292 
293 static int
294 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
295 {
296 	struct vmbus_chanmsg_connect *req;
297 	const struct vmbus_message *msg;
298 	struct vmbus_msghc *mh;
299 	int error, done = 0;
300 
301 	mh = vmbus_msghc_get(sc, sizeof(*req));
302 	if (mh == NULL)
303 		return ENXIO;
304 
305 	req = vmbus_msghc_dataptr(mh);
306 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
307 	req->chm_ver = version;
308 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
309 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
310 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
311 
312 	error = vmbus_msghc_exec(sc, mh);
313 	if (error) {
314 		vmbus_msghc_put(sc, mh);
315 		return error;
316 	}
317 
318 	msg = vmbus_msghc_wait_result(sc, mh);
319 	done = ((const struct vmbus_chanmsg_connect_resp *)
320 	    msg->msg_data)->chm_done;
321 
322 	vmbus_msghc_put(sc, mh);
323 
324 	return (done ? 0 : EOPNOTSUPP);
325 }
326 
327 static int
328 vmbus_init(struct vmbus_softc *sc)
329 {
330 	int i;
331 
332 	for (i = 0; i < nitems(vmbus_version); ++i) {
333 		int error;
334 
335 		error = vmbus_connect(sc, vmbus_version[i]);
336 		if (!error) {
337 			sc->vmbus_version = vmbus_version[i];
338 			device_printf(sc->vmbus_dev, "version %u.%u\n",
339 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
340 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
341 			return 0;
342 		}
343 	}
344 	return ENXIO;
345 }
346 
347 static void
348 vmbus_disconnect(struct vmbus_softc *sc)
349 {
350 	struct vmbus_chanmsg_disconnect *req;
351 	struct vmbus_msghc *mh;
352 	int error;
353 
354 	mh = vmbus_msghc_get(sc, sizeof(*req));
355 	if (mh == NULL) {
356 		device_printf(sc->vmbus_dev,
357 		    "can not get msg hypercall for disconnect\n");
358 		return;
359 	}
360 
361 	req = vmbus_msghc_dataptr(mh);
362 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
363 
364 	error = vmbus_msghc_exec_noresult(mh);
365 	vmbus_msghc_put(sc, mh);
366 
367 	if (error) {
368 		device_printf(sc->vmbus_dev,
369 		    "disconnect msg hypercall failed\n");
370 	}
371 }
372 
373 static int
374 vmbus_req_channels(struct vmbus_softc *sc)
375 {
376 	struct vmbus_chanmsg_chrequest *req;
377 	struct vmbus_msghc *mh;
378 	int error;
379 
380 	mh = vmbus_msghc_get(sc, sizeof(*req));
381 	if (mh == NULL)
382 		return ENXIO;
383 
384 	req = vmbus_msghc_dataptr(mh);
385 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
386 
387 	error = vmbus_msghc_exec_noresult(mh);
388 	vmbus_msghc_put(sc, mh);
389 
390 	return error;
391 }
392 
393 static void
394 vmbus_scan_done_task(void *xsc, int pending __unused)
395 {
396 	struct vmbus_softc *sc = xsc;
397 
398 	mtx_lock(&Giant);
399 	sc->vmbus_scandone = true;
400 	mtx_unlock(&Giant);
401 	wakeup(&sc->vmbus_scandone);
402 }
403 
404 static void
405 vmbus_scan_done(struct vmbus_softc *sc,
406     const struct vmbus_message *msg __unused)
407 {
408 
409 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
410 }
411 
412 static int
413 vmbus_scan(struct vmbus_softc *sc)
414 {
415 	int error;
416 
417 	/*
418 	 * Identify, probe and attach for non-channel devices.
419 	 */
420 	bus_generic_probe(sc->vmbus_dev);
421 	bus_generic_attach(sc->vmbus_dev);
422 
423 	/*
424 	 * This taskqueue serializes vmbus devices' attach and detach
425 	 * for channel offer and rescind messages.
426 	 */
427 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
428 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
429 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
430 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
431 
432 	/*
433 	 * This taskqueue handles sub-channel detach, so that vmbus
434 	 * device's detach running in vmbus_devtq can drain its sub-
435 	 * channels.
436 	 */
437 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
438 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
439 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
440 
441 	/*
442 	 * Start vmbus scanning.
443 	 */
444 	error = vmbus_req_channels(sc);
445 	if (error) {
446 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
447 		    error);
448 		return (error);
449 	}
450 
451 	/*
452 	 * Wait for all vmbus devices from the initial channel offers to be
453 	 * attached.
454 	 */
455 	GIANT_REQUIRED;
456 	while (!sc->vmbus_scandone)
457 		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
458 
459 	if (bootverbose) {
460 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
461 		    "done\n");
462 	}
463 	return (0);
464 }
465 
466 static void
467 vmbus_scan_teardown(struct vmbus_softc *sc)
468 {
469 
470 	GIANT_REQUIRED;
471 	if (sc->vmbus_devtq != NULL) {
472 		mtx_unlock(&Giant);
473 		taskqueue_free(sc->vmbus_devtq);
474 		mtx_lock(&Giant);
475 		sc->vmbus_devtq = NULL;
476 	}
477 	if (sc->vmbus_subchtq != NULL) {
478 		mtx_unlock(&Giant);
479 		taskqueue_free(sc->vmbus_subchtq);
480 		mtx_lock(&Giant);
481 		sc->vmbus_subchtq = NULL;
482 	}
483 }
484 
485 static void
486 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
487 {
488 	vmbus_chanmsg_proc_t msg_proc;
489 	uint32_t msg_type;
490 
491 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
492 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
493 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
494 		    msg_type);
495 		return;
496 	}
497 
498 	msg_proc = vmbus_chanmsg_handlers[msg_type];
499 	if (msg_proc != NULL)
500 		msg_proc(sc, msg);
501 
502 	/* Channel specific processing */
503 	vmbus_chan_msgproc(sc, msg);
504 }
505 
506 static void
507 vmbus_msg_task(void *xsc, int pending __unused)
508 {
509 	struct vmbus_softc *sc = xsc;
510 	volatile struct vmbus_message *msg;
511 
512 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
513 	for (;;) {
514 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
515 			/* No message */
516 			break;
517 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
518 			/* Channel message */
519 			vmbus_chanmsg_handle(sc,
520 			    __DEVOLATILE(const struct vmbus_message *, msg));
521 		}
522 
523 		msg->msg_type = HYPERV_MSGTYPE_NONE;
524 		/*
525 		 * Make sure the write to msg_type (i.e. set to
526 		 * HYPERV_MSGTYPE_NONE) happens before we read the
527 		 * msg_flags and EOMing. Otherwise, the EOMing will
528 		 * not deliver any more messages since there is no
529 		 * empty slot
530 		 *
531 		 * NOTE:
532 		 * mb() is used here, since atomic_thread_fence_seq_cst()
533 		 * will become compiler fence on UP kernel.
534 		 */
535 		mb();
536 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
537 			/*
538 			 * This will cause message queue rescan to possibly
539 			 * deliver another msg from the hypervisor
540 			 */
541 			wrmsr(MSR_HV_EOM, 0);
542 		}
543 	}
544 }
545 
546 static __inline int
547 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
548 {
549 	volatile struct vmbus_message *msg;
550 	struct vmbus_message *msg_base;
551 
552 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
553 
554 	/*
555 	 * Check event timer.
556 	 *
557 	 * TODO: move this to independent IDT vector.
558 	 */
559 	msg = msg_base + VMBUS_SINT_TIMER;
560 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
561 		msg->msg_type = HYPERV_MSGTYPE_NONE;
562 
563 		vmbus_et_intr(frame);
564 
565 		/*
566 		 * Make sure the write to msg_type (i.e. set to
567 		 * HYPERV_MSGTYPE_NONE) happens before we read the
568 		 * msg_flags and EOMing. Otherwise, the EOMing will
569 		 * not deliver any more messages since there is no
570 		 * empty slot
571 		 *
572 		 * NOTE:
573 		 * mb() is used here, since atomic_thread_fence_seq_cst()
574 		 * will become compiler fence on UP kernel.
575 		 */
576 		mb();
577 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
578 			/*
579 			 * This will cause message queue rescan to possibly
580 			 * deliver another msg from the hypervisor
581 			 */
582 			wrmsr(MSR_HV_EOM, 0);
583 		}
584 	}
585 
586 	/*
587 	 * Check events.  Hot path for network and storage I/O data; high rate.
588 	 *
589 	 * NOTE:
590 	 * As recommended by the Windows guest fellows, we check events before
591 	 * checking messages.
592 	 */
593 	sc->vmbus_event_proc(sc, cpu);
594 
595 	/*
596 	 * Check messages.  Mainly management stuffs; ultra low rate.
597 	 */
598 	msg = msg_base + VMBUS_SINT_MESSAGE;
599 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
600 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
601 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
602 	}
603 
604 	return (FILTER_HANDLED);
605 }
606 
607 void
608 vmbus_handle_intr(struct trapframe *trap_frame)
609 {
610 	struct vmbus_softc *sc = vmbus_get_softc();
611 	int cpu = curcpu;
612 
613 	/*
614 	 * Disable preemption.
615 	 */
616 	critical_enter();
617 
618 	/*
619 	 * Do a little interrupt counting.
620 	 */
621 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
622 
623 	vmbus_handle_intr1(sc, trap_frame, cpu);
624 
625 	/*
626 	 * Enable preemption.
627 	 */
628 	critical_exit();
629 }
630 
631 static void
632 vmbus_synic_setup(void *xsc)
633 {
634 	struct vmbus_softc *sc = xsc;
635 	int cpu = curcpu;
636 	uint64_t val, orig;
637 	uint32_t sint;
638 
639 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
640 		/* Save virtual processor id. */
641 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
642 	} else {
643 		/* Set virtual processor id to 0 for compatibility. */
644 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
645 	}
646 
647 	/*
648 	 * Setup the SynIC message.
649 	 */
650 	orig = rdmsr(MSR_HV_SIMP);
651 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
652 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
653 	     MSR_HV_SIMP_PGSHIFT);
654 	wrmsr(MSR_HV_SIMP, val);
655 
656 	/*
657 	 * Setup the SynIC event flags.
658 	 */
659 	orig = rdmsr(MSR_HV_SIEFP);
660 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
661 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
662 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
663 	wrmsr(MSR_HV_SIEFP, val);
664 
665 
666 	/*
667 	 * Configure and unmask SINT for message and event flags.
668 	 */
669 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
670 	orig = rdmsr(sint);
671 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
672 	    (orig & MSR_HV_SINT_RSVD_MASK);
673 	wrmsr(sint, val);
674 
675 	/*
676 	 * Configure and unmask SINT for timer.
677 	 */
678 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
679 	orig = rdmsr(sint);
680 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
681 	    (orig & MSR_HV_SINT_RSVD_MASK);
682 	wrmsr(sint, val);
683 
684 	/*
685 	 * All done; enable SynIC.
686 	 */
687 	orig = rdmsr(MSR_HV_SCONTROL);
688 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
689 	wrmsr(MSR_HV_SCONTROL, val);
690 }
691 
692 static void
693 vmbus_synic_teardown(void *arg)
694 {
695 	uint64_t orig;
696 	uint32_t sint;
697 
698 	/*
699 	 * Disable SynIC.
700 	 */
701 	orig = rdmsr(MSR_HV_SCONTROL);
702 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
703 
704 	/*
705 	 * Mask message and event flags SINT.
706 	 */
707 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
708 	orig = rdmsr(sint);
709 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
710 
711 	/*
712 	 * Mask timer SINT.
713 	 */
714 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
715 	orig = rdmsr(sint);
716 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
717 
718 	/*
719 	 * Teardown SynIC message.
720 	 */
721 	orig = rdmsr(MSR_HV_SIMP);
722 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
723 
724 	/*
725 	 * Teardown SynIC event flags.
726 	 */
727 	orig = rdmsr(MSR_HV_SIEFP);
728 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
729 }
730 
731 static int
732 vmbus_dma_alloc(struct vmbus_softc *sc)
733 {
734 	bus_dma_tag_t parent_dtag;
735 	uint8_t *evtflags;
736 	int cpu;
737 
738 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
739 	CPU_FOREACH(cpu) {
740 		void *ptr;
741 
742 		/*
743 		 * Per-cpu messages and event flags.
744 		 */
745 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
746 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
747 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
748 		if (ptr == NULL)
749 			return ENOMEM;
750 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
751 
752 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
753 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
754 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
755 		if (ptr == NULL)
756 			return ENOMEM;
757 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
758 	}
759 
760 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
761 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
762 	if (evtflags == NULL)
763 		return ENOMEM;
764 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
765 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
766 	sc->vmbus_evtflags = evtflags;
767 
768 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
769 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
770 	if (sc->vmbus_mnf1 == NULL)
771 		return ENOMEM;
772 
773 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
774 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
775 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
776 	if (sc->vmbus_mnf2 == NULL)
777 		return ENOMEM;
778 
779 	return 0;
780 }
781 
782 static void
783 vmbus_dma_free(struct vmbus_softc *sc)
784 {
785 	int cpu;
786 
787 	if (sc->vmbus_evtflags != NULL) {
788 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
789 		sc->vmbus_evtflags = NULL;
790 		sc->vmbus_rx_evtflags = NULL;
791 		sc->vmbus_tx_evtflags = NULL;
792 	}
793 	if (sc->vmbus_mnf1 != NULL) {
794 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
795 		sc->vmbus_mnf1 = NULL;
796 	}
797 	if (sc->vmbus_mnf2 != NULL) {
798 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
799 		sc->vmbus_mnf2 = NULL;
800 	}
801 
802 	CPU_FOREACH(cpu) {
803 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
804 			hyperv_dmamem_free(
805 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
806 			    VMBUS_PCPU_GET(sc, message, cpu));
807 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
808 		}
809 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
810 			hyperv_dmamem_free(
811 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
812 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
813 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
814 		}
815 	}
816 }
817 
818 static int
819 vmbus_intr_setup(struct vmbus_softc *sc)
820 {
821 	int cpu;
822 
823 	CPU_FOREACH(cpu) {
824 		char buf[MAXCOMLEN + 1];
825 		cpuset_t cpu_mask;
826 
827 		/* Allocate an interrupt counter for Hyper-V interrupt */
828 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
829 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
830 
831 		/*
832 		 * Setup taskqueue to handle events.  Task will be per-
833 		 * channel.
834 		 */
835 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
836 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
837 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
838 		CPU_SETOF(cpu, &cpu_mask);
839 		taskqueue_start_threads_cpuset(
840 		    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
841 		    "hvevent%d", cpu);
842 
843 		/*
844 		 * Setup tasks and taskqueues to handle messages.
845 		 */
846 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
847 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
848 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
849 		CPU_SETOF(cpu, &cpu_mask);
850 		taskqueue_start_threads_cpuset(
851 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
852 		    "hvmsg%d", cpu);
853 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
854 		    vmbus_msg_task, sc);
855 	}
856 
857 	/*
858 	 * All Hyper-V ISR required resources are setup, now let's find a
859 	 * free IDT vector for Hyper-V ISR and set it up.
860 	 */
861 	sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
862 	if (sc->vmbus_idtvec < 0) {
863 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
864 		return ENXIO;
865 	}
866 	if(bootverbose) {
867 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
868 		    sc->vmbus_idtvec);
869 	}
870 	return 0;
871 }
872 
873 static void
874 vmbus_intr_teardown(struct vmbus_softc *sc)
875 {
876 	int cpu;
877 
878 	if (sc->vmbus_idtvec >= 0) {
879 		lapic_ipi_free(sc->vmbus_idtvec);
880 		sc->vmbus_idtvec = -1;
881 	}
882 
883 	CPU_FOREACH(cpu) {
884 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
885 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
886 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
887 		}
888 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
889 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
890 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
891 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
892 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
893 		}
894 	}
895 }
896 
897 static int
898 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
899 {
900 	return (ENOENT);
901 }
902 
903 static int
904 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
905 {
906 	const struct vmbus_channel *chan;
907 	char guidbuf[HYPERV_GUID_STRLEN];
908 
909 	chan = vmbus_get_channel(child);
910 	if (chan == NULL) {
911 		/* Event timer device, which does not belong to a channel */
912 		return (0);
913 	}
914 
915 	strlcat(buf, "classid=", buflen);
916 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
917 	strlcat(buf, guidbuf, buflen);
918 
919 	strlcat(buf, " deviceid=", buflen);
920 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
921 	strlcat(buf, guidbuf, buflen);
922 
923 	return (0);
924 }
925 
926 int
927 vmbus_add_child(struct vmbus_channel *chan)
928 {
929 	struct vmbus_softc *sc = chan->ch_vmbus;
930 	device_t parent = sc->vmbus_dev;
931 
932 	mtx_lock(&Giant);
933 
934 	chan->ch_dev = device_add_child(parent, NULL, -1);
935 	if (chan->ch_dev == NULL) {
936 		mtx_unlock(&Giant);
937 		device_printf(parent, "device_add_child for chan%u failed\n",
938 		    chan->ch_id);
939 		return (ENXIO);
940 	}
941 	device_set_ivars(chan->ch_dev, chan);
942 	device_probe_and_attach(chan->ch_dev);
943 
944 	mtx_unlock(&Giant);
945 	return (0);
946 }
947 
948 int
949 vmbus_delete_child(struct vmbus_channel *chan)
950 {
951 	int error = 0;
952 
953 	mtx_lock(&Giant);
954 	if (chan->ch_dev != NULL) {
955 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
956 		    chan->ch_dev);
957 	}
958 	mtx_unlock(&Giant);
959 	return (error);
960 }
961 
962 static int
963 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
964 {
965 	struct vmbus_softc *sc = arg1;
966 	char verstr[16];
967 
968 	snprintf(verstr, sizeof(verstr), "%u.%u",
969 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
970 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
971 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
972 }
973 
974 static uint32_t
975 vmbus_get_version_method(device_t bus, device_t dev)
976 {
977 	struct vmbus_softc *sc = device_get_softc(bus);
978 
979 	return sc->vmbus_version;
980 }
981 
982 static int
983 vmbus_probe_guid_method(device_t bus, device_t dev,
984     const struct hyperv_guid *guid)
985 {
986 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
987 
988 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
989 		return 0;
990 	return ENXIO;
991 }
992 
993 static int
994 vmbus_probe(device_t dev)
995 {
996 	char *id[] = { "VMBUS", NULL };
997 
998 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
999 	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1000 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1001 		return (ENXIO);
1002 
1003 	device_set_desc(dev, "Hyper-V Vmbus");
1004 
1005 	return (BUS_PROBE_DEFAULT);
1006 }
1007 
1008 /**
1009  * @brief Main vmbus driver initialization routine.
1010  *
1011  * Here, we
1012  * - initialize the vmbus driver context
1013  * - setup various driver entry points
1014  * - invoke the vmbus hv main init routine
1015  * - get the irq resource
1016  * - invoke the vmbus to add the vmbus root device
1017  * - setup the vmbus root device
1018  * - retrieve the channel offers
1019  */
1020 static int
1021 vmbus_doattach(struct vmbus_softc *sc)
1022 {
1023 	struct sysctl_oid_list *child;
1024 	struct sysctl_ctx_list *ctx;
1025 	int ret;
1026 
1027 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1028 		return (0);
1029 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1030 
1031 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1032 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1033 	TAILQ_INIT(&sc->vmbus_prichans);
1034 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1035 	TAILQ_INIT(&sc->vmbus_chans);
1036 	sc->vmbus_chmap = malloc(
1037 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1038 	    M_WAITOK | M_ZERO);
1039 
1040 	/*
1041 	 * Create context for "post message" Hypercalls
1042 	 */
1043 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1044 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1045 	    sizeof(struct vmbus_msghc));
1046 	if (sc->vmbus_xc == NULL) {
1047 		ret = ENXIO;
1048 		goto cleanup;
1049 	}
1050 
1051 	/*
1052 	 * Allocate DMA stuffs.
1053 	 */
1054 	ret = vmbus_dma_alloc(sc);
1055 	if (ret != 0)
1056 		goto cleanup;
1057 
1058 	/*
1059 	 * Setup interrupt.
1060 	 */
1061 	ret = vmbus_intr_setup(sc);
1062 	if (ret != 0)
1063 		goto cleanup;
1064 
1065 	/*
1066 	 * Setup SynIC.
1067 	 */
1068 	if (bootverbose)
1069 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1070 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1071 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1072 
1073 	/*
1074 	 * Initialize vmbus, e.g. connect to Hypervisor.
1075 	 */
1076 	ret = vmbus_init(sc);
1077 	if (ret != 0)
1078 		goto cleanup;
1079 
1080 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1081 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1082 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1083 	else
1084 		sc->vmbus_event_proc = vmbus_event_proc;
1085 
1086 	ret = vmbus_scan(sc);
1087 	if (ret != 0)
1088 		goto cleanup;
1089 
1090 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1091 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1092 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1093 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1094 	    vmbus_sysctl_version, "A", "vmbus version");
1095 
1096 	return (ret);
1097 
1098 cleanup:
1099 	vmbus_scan_teardown(sc);
1100 	vmbus_intr_teardown(sc);
1101 	vmbus_dma_free(sc);
1102 	if (sc->vmbus_xc != NULL) {
1103 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1104 		sc->vmbus_xc = NULL;
1105 	}
1106 	free(sc->vmbus_chmap, M_DEVBUF);
1107 	mtx_destroy(&sc->vmbus_prichan_lock);
1108 	mtx_destroy(&sc->vmbus_chan_lock);
1109 
1110 	return (ret);
1111 }
1112 
1113 static void
1114 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1115 {
1116 }
1117 
1118 static int
1119 vmbus_attach(device_t dev)
1120 {
1121 	vmbus_sc = device_get_softc(dev);
1122 	vmbus_sc->vmbus_dev = dev;
1123 	vmbus_sc->vmbus_idtvec = -1;
1124 
1125 	/*
1126 	 * Event processing logic will be configured:
1127 	 * - After the vmbus protocol version negotiation.
1128 	 * - Before we request channel offers.
1129 	 */
1130 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1131 
1132 #ifndef EARLY_AP_STARTUP
1133 	/*
1134 	 * If the system has already booted and thread
1135 	 * scheduling is possible indicated by the global
1136 	 * cold set to zero, we just call the driver
1137 	 * initialization directly.
1138 	 */
1139 	if (!cold)
1140 #endif
1141 		vmbus_doattach(vmbus_sc);
1142 
1143 	return (0);
1144 }
1145 
1146 static int
1147 vmbus_detach(device_t dev)
1148 {
1149 	struct vmbus_softc *sc = device_get_softc(dev);
1150 
1151 	bus_generic_detach(dev);
1152 	vmbus_chan_destroy_all(sc);
1153 
1154 	vmbus_scan_teardown(sc);
1155 
1156 	vmbus_disconnect(sc);
1157 
1158 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1159 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1160 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1161 	}
1162 
1163 	vmbus_intr_teardown(sc);
1164 	vmbus_dma_free(sc);
1165 
1166 	if (sc->vmbus_xc != NULL) {
1167 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1168 		sc->vmbus_xc = NULL;
1169 	}
1170 
1171 	free(sc->vmbus_chmap, M_DEVBUF);
1172 	mtx_destroy(&sc->vmbus_prichan_lock);
1173 	mtx_destroy(&sc->vmbus_chan_lock);
1174 
1175 	return (0);
1176 }
1177 
1178 #ifndef EARLY_AP_STARTUP
1179 
1180 static void
1181 vmbus_sysinit(void *arg __unused)
1182 {
1183 	struct vmbus_softc *sc = vmbus_get_softc();
1184 
1185 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1186 		return;
1187 
1188 	/*
1189 	 * If the system has already booted and thread
1190 	 * scheduling is possible, as indicated by the
1191 	 * global cold set to zero, we just call the driver
1192 	 * initialization directly.
1193 	 */
1194 	if (!cold)
1195 		vmbus_doattach(sc);
1196 }
1197 /*
1198  * NOTE:
1199  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1200  * initialized.
1201  */
1202 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1203 
1204 #endif	/* !EARLY_AP_STARTUP */
1205