xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision 9cbf1de7e34a6fced041388fad5d9180cb7705fe)
1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/linker.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/mutex.h>
41 #include <sys/sbuf.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/taskqueue.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_extern.h>
49 #include <vm/vm_param.h>
50 #include <vm/pmap.h>
51 
52 #include <machine/bus.h>
53 #if defined(__aarch64__)
54 #include <dev/psci/smccc.h>
55 #include <dev/hyperv/vmbus/aarch64/hyperv_machdep.h>
56 #include <dev/hyperv/vmbus/aarch64/hyperv_reg.h>
57 #else
58 #include <dev/hyperv/vmbus/x86/hyperv_machdep.h>
59 #include <dev/hyperv/vmbus/x86/hyperv_reg.h>
60 #include <machine/intr_machdep.h>
61 #include <x86/include/apicvar.h>
62 #endif
63 #include <machine/metadata.h>
64 #include <machine/md_var.h>
65 #include <machine/resource.h>
66 #include <contrib/dev/acpica/include/acpi.h>
67 #include <dev/acpica/acpivar.h>
68 
69 #include <dev/hyperv/include/hyperv.h>
70 #include <dev/hyperv/include/vmbus_xact.h>
71 #include <dev/hyperv/vmbus/hyperv_var.h>
72 #include <dev/hyperv/vmbus/vmbus_reg.h>
73 #include <dev/hyperv/vmbus/vmbus_var.h>
74 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
75 #include <dev/hyperv/vmbus/hyperv_common_reg.h>
76 #include "acpi_if.h"
77 #include "pcib_if.h"
78 #include "vmbus_if.h"
79 
80 #define VMBUS_GPADL_START		0xe1e10
81 
82 struct vmbus_msghc {
83 	struct vmbus_xact		*mh_xact;
84 	struct hypercall_postmsg_in	mh_inprm_save;
85 };
86 
87 static void			vmbus_identify(driver_t *, device_t);
88 static int			vmbus_probe(device_t);
89 static int			vmbus_attach(device_t);
90 static int			vmbus_detach(device_t);
91 static int			vmbus_read_ivar(device_t, device_t, int,
92 				    uintptr_t *);
93 static int			vmbus_child_pnpinfo(device_t, device_t, struct sbuf *);
94 static struct resource		*vmbus_alloc_resource(device_t dev,
95 				    device_t child, int type, int *rid,
96 				    rman_res_t start, rman_res_t end,
97 				    rman_res_t count, u_int flags);
98 static int			vmbus_alloc_msi(device_t bus, device_t dev,
99 				    int count, int maxcount, int *irqs);
100 static int			vmbus_release_msi(device_t bus, device_t dev,
101 				    int count, int *irqs);
102 static int			vmbus_alloc_msix(device_t bus, device_t dev,
103 				    int *irq);
104 static int			vmbus_release_msix(device_t bus, device_t dev,
105 				    int irq);
106 static int			vmbus_map_msi(device_t bus, device_t dev,
107 				    int irq, uint64_t *addr, uint32_t *data);
108 static uint32_t			vmbus_get_version_method(device_t, device_t);
109 static int			vmbus_probe_guid_method(device_t, device_t,
110 				    const struct hyperv_guid *);
111 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
112 				    device_t dev, int cpu);
113 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
114 				    int);
115 #if defined(EARLY_AP_STARTUP)
116 static void			vmbus_intrhook(void *);
117 #endif
118 
119 static int			vmbus_init(struct vmbus_softc *);
120 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
121 static int			vmbus_req_channels(struct vmbus_softc *sc);
122 static void			vmbus_disconnect(struct vmbus_softc *);
123 static int			vmbus_scan(struct vmbus_softc *);
124 static void			vmbus_scan_teardown(struct vmbus_softc *);
125 static void			vmbus_scan_done(struct vmbus_softc *,
126 				    const struct vmbus_message *);
127 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
128 				    const struct vmbus_message *);
129 static void			vmbus_msg_task(void *, int);
130 static void			vmbus_synic_setup(void *);
131 static void			vmbus_synic_teardown(void *);
132 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
133 static int			vmbus_dma_alloc(struct vmbus_softc *);
134 static void			vmbus_dma_free(struct vmbus_softc *);
135 static int			vmbus_intr_setup(struct vmbus_softc *);
136 static void			vmbus_intr_teardown(struct vmbus_softc *);
137 static int			vmbus_doattach(struct vmbus_softc *);
138 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
139 				    int);
140 static bus_dma_tag_t	vmbus_get_dma_tag(device_t parent, device_t child);
141 static struct vmbus_softc	*vmbus_sc;
142 #if defined(__x86_64__)
143 static int vmbus_alloc_cpu_mem(struct vmbus_softc *sc);
144 static void vmbus_free_cpu_mem(struct vmbus_softc *sc);
145 #endif
146 
147 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
148     "Hyper-V vmbus");
149 
150 static int			vmbus_pin_evttask = 1;
151 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
152     &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
153 
154 #if defined(__x86_64__)
155 static int			hv_tlb_hcall = 1;
156 SYSCTL_INT(_hw_vmbus, OID_AUTO, tlb_hcall , CTLFLAG_RDTUN,
157     &hv_tlb_hcall, 0, "Use Hyper_V hyercall for tlb flush");
158 #endif
159 
160 uint32_t			vmbus_current_version;
161 
162 static const uint32_t		vmbus_version[] = {
163 	VMBUS_VERSION_WIN10,
164 	VMBUS_VERSION_WIN8_1,
165 	VMBUS_VERSION_WIN8,
166 	VMBUS_VERSION_WIN7,
167 	VMBUS_VERSION_WS2008
168 };
169 
170 static const vmbus_chanmsg_proc_t
171 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
172 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
173 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
174 };
175 
176 static device_method_t vmbus_methods[] = {
177 	/* Device interface */
178 	DEVMETHOD(device_identify,		vmbus_identify),
179 	DEVMETHOD(device_probe,			vmbus_probe),
180 	DEVMETHOD(device_attach,		vmbus_attach),
181 	DEVMETHOD(device_detach,		vmbus_detach),
182 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
183 	DEVMETHOD(device_suspend,		bus_generic_suspend),
184 	DEVMETHOD(device_resume,		bus_generic_resume),
185 
186 	/* Bus interface */
187 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
188 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
189 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
190 	DEVMETHOD(bus_child_pnpinfo,		vmbus_child_pnpinfo),
191 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
192 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
193 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
194 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
195 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
196 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
197 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
198 	DEVMETHOD(bus_get_dma_tag,		vmbus_get_dma_tag),
199 
200 	/* pcib interface */
201 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
202 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
203 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
204 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
205 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
206 
207 	/* Vmbus interface */
208 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
209 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
210 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
211 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
212 
213 	DEVMETHOD_END
214 };
215 
216 static driver_t vmbus_driver = {
217 	"vmbus",
218 	vmbus_methods,
219 	sizeof(struct vmbus_softc)
220 };
221 
222 uint32_t hv_max_vp_index;
223 
224 DRIVER_MODULE(vmbus, pcib, vmbus_driver, NULL, NULL);
225 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, NULL, NULL);
226 
227 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
228 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
229 MODULE_VERSION(vmbus, 1);
230 
231 static __inline struct vmbus_softc *
232 vmbus_get_softc(void)
233 {
234 	return vmbus_sc;
235 }
236 
237 static bus_dma_tag_t
238 vmbus_get_dma_tag(device_t dev, device_t child)
239 {
240 	struct vmbus_softc *sc = vmbus_get_softc();
241 	return (sc->dmat);
242 }
243 
244 void
245 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
246 {
247 	struct hypercall_postmsg_in *inprm;
248 
249 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
250 		panic("invalid data size %zu", dsize);
251 
252 	inprm = vmbus_xact_req_data(mh->mh_xact);
253 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
254 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
255 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
256 	inprm->hc_dsize = dsize;
257 }
258 
259 struct vmbus_msghc *
260 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
261 {
262 	struct vmbus_msghc *mh;
263 	struct vmbus_xact *xact;
264 
265 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
266 		panic("invalid data size %zu", dsize);
267 
268 	xact = vmbus_xact_get(sc->vmbus_xc,
269 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
270 	if (xact == NULL)
271 		return (NULL);
272 
273 	mh = vmbus_xact_priv(xact, sizeof(*mh));
274 	mh->mh_xact = xact;
275 
276 	vmbus_msghc_reset(mh, dsize);
277 	return (mh);
278 }
279 
280 void
281 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
282 {
283 
284 	vmbus_xact_put(mh->mh_xact);
285 }
286 
287 void *
288 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
289 {
290 	struct hypercall_postmsg_in *inprm;
291 
292 	inprm = vmbus_xact_req_data(mh->mh_xact);
293 	return (inprm->hc_data);
294 }
295 
296 int
297 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
298 {
299 	sbintime_t time = SBT_1MS;
300 	struct hypercall_postmsg_in *inprm;
301 	bus_addr_t inprm_paddr;
302 	int i;
303 
304 	inprm = vmbus_xact_req_data(mh->mh_xact);
305 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
306 
307 	/*
308 	 * Save the input parameter so that we could restore the input
309 	 * parameter if the Hypercall failed.
310 	 *
311 	 * XXX
312 	 * Is this really necessary?!  i.e. Will the Hypercall ever
313 	 * overwrite the input parameter?
314 	 */
315 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
316 
317 	/*
318 	 * In order to cope with transient failures, e.g. insufficient
319 	 * resources on host side, we retry the post message Hypercall
320 	 * several times.  20 retries seem sufficient.
321 	 */
322 #define HC_RETRY_MAX	20
323 
324 	for (i = 0; i < HC_RETRY_MAX; ++i) {
325 		uint64_t status;
326 
327 		status = hypercall_post_message(inprm_paddr);
328 		if (status == HYPERCALL_STATUS_SUCCESS)
329 			return 0;
330 
331 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
332 		if (time < SBT_1S * 2)
333 			time *= 2;
334 
335 		/* Restore input parameter and try again */
336 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
337 	}
338 
339 #undef HC_RETRY_MAX
340 
341 	return EIO;
342 }
343 
344 int
345 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
346 {
347 	int error;
348 
349 	vmbus_xact_activate(mh->mh_xact);
350 	error = vmbus_msghc_exec_noresult(mh);
351 	if (error)
352 		vmbus_xact_deactivate(mh->mh_xact);
353 	return error;
354 }
355 
356 void
357 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
358 {
359 
360 	vmbus_xact_deactivate(mh->mh_xact);
361 }
362 
363 const struct vmbus_message *
364 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
365 {
366 	size_t resp_len;
367 
368 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
369 }
370 
371 const struct vmbus_message *
372 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
373 {
374 	size_t resp_len;
375 
376 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
377 }
378 
379 void
380 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
381 {
382 
383 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
384 }
385 
386 uint32_t
387 vmbus_gpadl_alloc(struct vmbus_softc *sc)
388 {
389 	uint32_t gpadl;
390 
391 again:
392 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
393 	if (gpadl == 0)
394 		goto again;
395 	return (gpadl);
396 }
397 
398 /* Used for Hyper-V socket when guest client connects to host */
399 int
400 vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
401     struct hyperv_guid *host_srv_id)
402 {
403 	struct vmbus_softc *sc = vmbus_get_softc();
404 	struct vmbus_chanmsg_tl_connect *req;
405 	struct vmbus_msghc *mh;
406 	int error;
407 
408 	if (!sc)
409 		return ENXIO;
410 
411 	mh = vmbus_msghc_get(sc, sizeof(*req));
412 	if (mh == NULL) {
413 		device_printf(sc->vmbus_dev,
414 		    "can not get msg hypercall for tl connect\n");
415 		return ENXIO;
416 	}
417 
418 	req = vmbus_msghc_dataptr(mh);
419 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
420 	req->guest_endpoint_id = *guest_srv_id;
421 	req->host_service_id = *host_srv_id;
422 
423 	error = vmbus_msghc_exec_noresult(mh);
424 	vmbus_msghc_put(sc, mh);
425 
426 	if (error) {
427 		device_printf(sc->vmbus_dev,
428 		    "tl connect msg hypercall failed\n");
429 	}
430 
431 	return error;
432 }
433 
434 static int
435 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
436 {
437 	struct vmbus_chanmsg_connect *req;
438 	const struct vmbus_message *msg;
439 	struct vmbus_msghc *mh;
440 	int error, done = 0;
441 
442 	mh = vmbus_msghc_get(sc, sizeof(*req));
443 	if (mh == NULL)
444 		return ENXIO;
445 
446 	req = vmbus_msghc_dataptr(mh);
447 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
448 	req->chm_ver = version;
449 	req->chm_evtflags = pmap_kextract((vm_offset_t)sc->vmbus_evtflags);
450 	req->chm_mnf1 = pmap_kextract((vm_offset_t)sc->vmbus_mnf1);
451 	req->chm_mnf2 = pmap_kextract((vm_offset_t)sc->vmbus_mnf2);
452 
453 	error = vmbus_msghc_exec(sc, mh);
454 	if (error) {
455 		vmbus_msghc_put(sc, mh);
456 		return error;
457 	}
458 
459 	msg = vmbus_msghc_wait_result(sc, mh);
460 	done = ((const struct vmbus_chanmsg_connect_resp *)
461 	    msg->msg_data)->chm_done;
462 
463 	vmbus_msghc_put(sc, mh);
464 
465 	return (done ? 0 : EOPNOTSUPP);
466 }
467 
468 static int
469 vmbus_init(struct vmbus_softc *sc)
470 {
471 	int i;
472 
473 	for (i = 0; i < nitems(vmbus_version); ++i) {
474 		int error;
475 
476 		error = vmbus_connect(sc, vmbus_version[i]);
477 		if (!error) {
478 			vmbus_current_version = vmbus_version[i];
479 			sc->vmbus_version = vmbus_version[i];
480 			device_printf(sc->vmbus_dev, "version %u.%u\n",
481 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
482 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
483 			return 0;
484 		}
485 	}
486 	return ENXIO;
487 }
488 
489 static void
490 vmbus_disconnect(struct vmbus_softc *sc)
491 {
492 	struct vmbus_chanmsg_disconnect *req;
493 	struct vmbus_msghc *mh;
494 	int error;
495 
496 	mh = vmbus_msghc_get(sc, sizeof(*req));
497 	if (mh == NULL) {
498 		device_printf(sc->vmbus_dev,
499 		    "can not get msg hypercall for disconnect\n");
500 		return;
501 	}
502 
503 	req = vmbus_msghc_dataptr(mh);
504 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
505 
506 	error = vmbus_msghc_exec_noresult(mh);
507 	vmbus_msghc_put(sc, mh);
508 
509 	if (error) {
510 		device_printf(sc->vmbus_dev,
511 		    "disconnect msg hypercall failed\n");
512 	}
513 }
514 
515 static int
516 vmbus_req_channels(struct vmbus_softc *sc)
517 {
518 	struct vmbus_chanmsg_chrequest *req;
519 	struct vmbus_msghc *mh;
520 	int error;
521 
522 	mh = vmbus_msghc_get(sc, sizeof(*req));
523 	if (mh == NULL)
524 		return ENXIO;
525 
526 	req = vmbus_msghc_dataptr(mh);
527 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
528 
529 	error = vmbus_msghc_exec_noresult(mh);
530 	vmbus_msghc_put(sc, mh);
531 
532 	return error;
533 }
534 
535 static void
536 vmbus_scan_done_task(void *xsc, int pending __unused)
537 {
538 	struct vmbus_softc *sc = xsc;
539 
540 	bus_topo_lock();
541 	sc->vmbus_scandone = true;
542 	bus_topo_unlock();
543 	wakeup(&sc->vmbus_scandone);
544 }
545 
546 static void
547 vmbus_scan_done(struct vmbus_softc *sc,
548     const struct vmbus_message *msg __unused)
549 {
550 
551 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
552 }
553 
554 static int
555 vmbus_scan(struct vmbus_softc *sc)
556 {
557 	int error;
558 
559 	/*
560 	 * Identify, probe and attach for non-channel devices.
561 	 */
562 	bus_generic_probe(sc->vmbus_dev);
563 	bus_generic_attach(sc->vmbus_dev);
564 
565 	/*
566 	 * This taskqueue serializes vmbus devices' attach and detach
567 	 * for channel offer and rescind messages.
568 	 */
569 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
570 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
571 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
572 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
573 
574 	/*
575 	 * This taskqueue handles sub-channel detach, so that vmbus
576 	 * device's detach running in vmbus_devtq can drain its sub-
577 	 * channels.
578 	 */
579 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
580 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
581 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
582 
583 	/*
584 	 * Start vmbus scanning.
585 	 */
586 	error = vmbus_req_channels(sc);
587 	if (error) {
588 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
589 		    error);
590 		return (error);
591 	}
592 
593 	/*
594 	 * Wait for all vmbus devices from the initial channel offers to be
595 	 * attached.
596 	 */
597 	bus_topo_assert();
598 	while (!sc->vmbus_scandone)
599 		mtx_sleep(&sc->vmbus_scandone, bus_topo_mtx(), 0, "vmbusdev", 0);
600 
601 	if (bootverbose) {
602 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
603 		    "done\n");
604 	}
605 	return (0);
606 }
607 
608 static void
609 vmbus_scan_teardown(struct vmbus_softc *sc)
610 {
611 
612 	bus_topo_assert();
613 	if (sc->vmbus_devtq != NULL) {
614 		bus_topo_unlock();
615 		taskqueue_free(sc->vmbus_devtq);
616 		bus_topo_lock();
617 		sc->vmbus_devtq = NULL;
618 	}
619 	if (sc->vmbus_subchtq != NULL) {
620 		bus_topo_unlock();
621 		taskqueue_free(sc->vmbus_subchtq);
622 		bus_topo_lock();
623 		sc->vmbus_subchtq = NULL;
624 	}
625 }
626 
627 static void
628 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
629 {
630 	vmbus_chanmsg_proc_t msg_proc;
631 	uint32_t msg_type;
632 
633 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
634 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
635 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
636 		    msg_type);
637 		return;
638 	}
639 
640 	msg_proc = vmbus_chanmsg_handlers[msg_type];
641 	if (msg_proc != NULL)
642 		msg_proc(sc, msg);
643 
644 	/* Channel specific processing */
645 	vmbus_chan_msgproc(sc, msg);
646 }
647 
648 static void
649 vmbus_msg_task(void *xsc, int pending __unused)
650 {
651 	struct vmbus_softc *sc = xsc;
652 	volatile struct vmbus_message *msg;
653 
654 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
655 	for (;;) {
656 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
657 			/* No message */
658 			break;
659 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
660 			/* Channel message */
661 			vmbus_chanmsg_handle(sc,
662 			    __DEVOLATILE(const struct vmbus_message *, msg));
663 		}
664 
665 		msg->msg_type = HYPERV_MSGTYPE_NONE;
666 		/*
667 		 * Make sure the write to msg_type (i.e. set to
668 		 * HYPERV_MSGTYPE_NONE) happens before we read the
669 		 * msg_flags and EOMing. Otherwise, the EOMing will
670 		 * not deliver any more messages since there is no
671 		 * empty slot
672 		 *
673 		 * NOTE:
674 		 * mb() is used here, since atomic_thread_fence_seq_cst()
675 		 * will become compiler fence on UP kernel.
676 		 */
677 		mb();
678 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
679 			/*
680 			 * This will cause message queue rescan to possibly
681 			 * deliver another msg from the hypervisor
682 			 */
683 			WRMSR(MSR_HV_EOM, 0);
684 		}
685 	}
686 }
687 static __inline int
688 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
689 {
690 	volatile struct vmbus_message *msg;
691 	struct vmbus_message *msg_base;
692 
693 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
694 
695 	/*
696 	 * Check event timer.
697 	 *
698 	 * TODO: move this to independent IDT vector.
699 	 */
700 	vmbus_handle_timer_intr1(msg_base, frame);
701 	/*
702 	 * Check events.  Hot path for network and storage I/O data; high rate.
703 	 *
704 	 * NOTE:
705 	 * As recommended by the Windows guest fellows, we check events before
706 	 * checking messages.
707 	 */
708 	sc->vmbus_event_proc(sc, cpu);
709 
710 	/*
711 	 * Check messages.  Mainly management stuffs; ultra low rate.
712 	 */
713 	msg = msg_base + VMBUS_SINT_MESSAGE;
714 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
715 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
716 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
717 	}
718 
719 	return (FILTER_HANDLED);
720 }
721 
722 void
723 vmbus_handle_intr(struct trapframe *trap_frame)
724 {
725 	struct vmbus_softc *sc = vmbus_get_softc();
726 	int cpu = curcpu;
727 
728 	/*
729 	 * Disable preemption.
730 	 */
731 	critical_enter();
732 
733 	/*
734 	 * Do a little interrupt counting. This used x86 specific
735 	 * intrcnt_add function
736 	 */
737 #if !defined(__aarch64__)
738 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
739 #endif /* not for aarch64 */
740 	vmbus_handle_intr1(sc, trap_frame, cpu);
741 
742 	/*
743 	 * Enable preemption.
744 	 */
745 	critical_exit();
746 }
747 
748 static void
749 vmbus_synic_setup(void *xsc)
750 {
751 	struct vmbus_softc *sc = xsc;
752 	int cpu = curcpu;
753 	uint64_t val, orig;
754 	uint32_t sint;
755 
756 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
757 		/* Save virtual processor id. */
758 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = RDMSR(MSR_HV_VP_INDEX);
759 	} else {
760 		/* Set virtual processor id to 0 for compatibility. */
761 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
762 	}
763 
764 	if (VMBUS_PCPU_GET(sc, vcpuid, cpu) > hv_max_vp_index)
765 		hv_max_vp_index = VMBUS_PCPU_GET(sc, vcpuid, cpu);
766 
767 	/*
768 	 * Setup the SynIC message.
769 	 */
770 	orig = RDMSR(MSR_HV_SIMP);
771 	val = pmap_kextract((vm_offset_t)VMBUS_PCPU_GET(sc, message, cpu)) &
772 	    MSR_HV_SIMP_PGMASK;
773 	val |= MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK);
774 	WRMSR(MSR_HV_SIMP, val);
775 	/*
776 	 * Setup the SynIC event flags.
777 	 */
778 	orig = RDMSR(MSR_HV_SIEFP);
779 	val = pmap_kextract((vm_offset_t)VMBUS_PCPU_GET(sc, event_flags, cpu)) &
780 	    MSR_HV_SIMP_PGMASK;
781 	val |= MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK);
782 	WRMSR(MSR_HV_SIEFP, val);
783 
784 	/*
785 	 * Configure and unmask SINT for message and event flags.
786 	 */
787 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
788 	orig = RDMSR(sint);
789 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
790 	    (orig & MSR_HV_SINT_RSVD_MASK);
791 	WRMSR(sint, val);
792 
793 	/*
794 	 * Configure and unmask SINT for timer.
795 	 */
796 	vmbus_synic_setup1(sc);
797 	/*
798 	 * All done; enable SynIC.
799 	 */
800 	orig = RDMSR(MSR_HV_SCONTROL);
801 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
802 	WRMSR(MSR_HV_SCONTROL, val);
803 }
804 
805 #if defined(__x86_64__)
806 void
807 hyperv_vm_tlb_flush(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2,
808     smp_invl_local_cb_t curcpu_cb, enum invl_op_codes op)
809 {
810 	struct vmbus_softc *sc = vmbus_get_softc();
811 	return hv_vm_tlb_flush(pmap, addr1, addr2, op, sc, curcpu_cb);
812 }
813 #endif /*__x86_64__*/
814 
815 static void
816 vmbus_synic_teardown(void *arg)
817 {
818 	uint64_t orig;
819 	uint32_t sint;
820 
821 	/*
822 	 * Disable SynIC.
823 	 */
824 	orig = RDMSR(MSR_HV_SCONTROL);
825 	WRMSR(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
826 
827 	/*
828 	 * Mask message and event flags SINT.
829 	 */
830 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
831 	orig = RDMSR(sint);
832 	WRMSR(sint, orig | MSR_HV_SINT_MASKED);
833 
834 	/*
835 	 * Mask timer SINT.
836 	 */
837 	vmbus_synic_teardown1();
838 	/*
839 	 * Teardown SynIC message.
840 	 */
841 	orig = RDMSR(MSR_HV_SIMP);
842 	WRMSR(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
843 
844 	/*
845 	 * Teardown SynIC event flags.
846 	 */
847 	orig = RDMSR(MSR_HV_SIEFP);
848 	WRMSR(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
849 }
850 
851 static int
852 vmbus_dma_alloc(struct vmbus_softc *sc)
853 {
854 	uint8_t *evtflags;
855 	int cpu;
856 
857 	CPU_FOREACH(cpu) {
858 		void *ptr;
859 
860 		/*
861 		 * Per-cpu messages and event flags.
862 		 */
863 		ptr = contigmalloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO,
864 		    0ul, ~0ul, PAGE_SIZE, 0);
865 		if (ptr == NULL)
866 			return ENOMEM;
867 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
868 
869 		ptr = contigmalloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO,
870 		    0ul, ~0ul, PAGE_SIZE, 0);
871 		if (ptr == NULL)
872 			return ENOMEM;
873 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
874 	}
875 
876 	evtflags = contigmalloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO,
877 	    0ul, ~0ul, PAGE_SIZE, 0);
878 	if (evtflags == NULL)
879 		return ENOMEM;
880 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
881 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
882 	sc->vmbus_evtflags = evtflags;
883 
884 	sc->vmbus_mnf1 = contigmalloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO,
885 	    0ul, ~0ul, PAGE_SIZE, 0);
886 	if (sc->vmbus_mnf1 == NULL)
887 		return ENOMEM;
888 
889 	sc->vmbus_mnf2 = contigmalloc(sizeof(struct vmbus_mnf), M_DEVBUF,
890 	    M_WAITOK | M_ZERO, 0ul, ~0ul, PAGE_SIZE, 0);
891 	if (sc->vmbus_mnf2 == NULL)
892 		return ENOMEM;
893 
894 	return 0;
895 }
896 
897 static void
898 vmbus_dma_free(struct vmbus_softc *sc)
899 {
900 	int cpu;
901 
902 	if (sc->vmbus_evtflags != NULL) {
903 		contigfree(sc->vmbus_evtflags, PAGE_SIZE, M_DEVBUF);
904 		sc->vmbus_evtflags = NULL;
905 		sc->vmbus_rx_evtflags = NULL;
906 		sc->vmbus_tx_evtflags = NULL;
907 	}
908 	if (sc->vmbus_mnf1 != NULL) {
909 		contigfree(sc->vmbus_mnf1, PAGE_SIZE, M_DEVBUF);
910 		sc->vmbus_mnf1 = NULL;
911 	}
912 	if (sc->vmbus_mnf2 != NULL) {
913 		contigfree(sc->vmbus_mnf2, sizeof(struct vmbus_mnf), M_DEVBUF);
914 		sc->vmbus_mnf2 = NULL;
915 	}
916 
917 	CPU_FOREACH(cpu) {
918 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
919 			contigfree(VMBUS_PCPU_GET(sc, message, cpu), PAGE_SIZE,
920 			    M_DEVBUF);
921 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
922 		}
923 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
924 			contigfree(VMBUS_PCPU_GET(sc, event_flags, cpu),
925 			    PAGE_SIZE, M_DEVBUF);
926 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
927 		}
928 	}
929 }
930 
931 static int
932 vmbus_intr_setup(struct vmbus_softc *sc)
933 {
934 	int cpu;
935 
936 	CPU_FOREACH(cpu) {
937 		char buf[MAXCOMLEN + 1];
938 		cpuset_t cpu_mask;
939 
940 		/* Allocate an interrupt counter for Hyper-V interrupt */
941 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
942 #if !defined(__aarch64__)
943 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
944 #endif /* not for aarch64 */
945 		/*
946 		 * Setup taskqueue to handle events.  Task will be per-
947 		 * channel.
948 		 */
949 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
950 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
951 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
952 		if (vmbus_pin_evttask) {
953 			CPU_SETOF(cpu, &cpu_mask);
954 			taskqueue_start_threads_cpuset(
955 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
956 			    &cpu_mask, "hvevent%d", cpu);
957 		} else {
958 			taskqueue_start_threads(
959 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
960 			    "hvevent%d", cpu);
961 		}
962 
963 		/*
964 		 * Setup tasks and taskqueues to handle messages.
965 		 */
966 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
967 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
968 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
969 		CPU_SETOF(cpu, &cpu_mask);
970 		taskqueue_start_threads_cpuset(
971 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
972 		    "hvmsg%d", cpu);
973 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
974 		    vmbus_msg_task, sc);
975 	}
976 	return (vmbus_setup_intr1(sc));
977 }
978 static void
979 vmbus_intr_teardown(struct vmbus_softc *sc)
980 {
981 	vmbus_intr_teardown1(sc);
982 }
983 
984 static int
985 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
986 {
987 	return (ENOENT);
988 }
989 
990 static int
991 vmbus_child_pnpinfo(device_t dev, device_t child, struct sbuf *sb)
992 {
993 	const struct vmbus_channel *chan;
994 	char guidbuf[HYPERV_GUID_STRLEN];
995 
996 	chan = vmbus_get_channel(child);
997 	if (chan == NULL) {
998 		/* Event timer device, which does not belong to a channel */
999 		return (0);
1000 	}
1001 
1002 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1003 	sbuf_printf(sb, "classid=%s", guidbuf);
1004 
1005 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1006 	sbuf_printf(sb, " deviceid=%s", guidbuf);
1007 
1008 	return (0);
1009 }
1010 
1011 int
1012 vmbus_add_child(struct vmbus_channel *chan)
1013 {
1014 	struct vmbus_softc *sc = chan->ch_vmbus;
1015 	device_t parent = sc->vmbus_dev;
1016 
1017 	bus_topo_lock();
1018 	chan->ch_dev = device_add_child(parent, NULL, -1);
1019 	if (chan->ch_dev == NULL) {
1020 		bus_topo_unlock();
1021 		device_printf(parent, "device_add_child for chan%u failed\n",
1022 		    chan->ch_id);
1023 		return (ENXIO);
1024 	}
1025 	device_set_ivars(chan->ch_dev, chan);
1026 	device_probe_and_attach(chan->ch_dev);
1027 	bus_topo_unlock();
1028 
1029 	return (0);
1030 }
1031 
1032 int
1033 vmbus_delete_child(struct vmbus_channel *chan)
1034 {
1035 	int error = 0;
1036 
1037 	bus_topo_lock();
1038 	if (chan->ch_dev != NULL) {
1039 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1040 		    chan->ch_dev);
1041 		chan->ch_dev = NULL;
1042 	}
1043 	bus_topo_unlock();
1044 	return (error);
1045 }
1046 
1047 static int
1048 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1049 {
1050 	struct vmbus_softc *sc = arg1;
1051 	char verstr[16];
1052 
1053 	snprintf(verstr, sizeof(verstr), "%u.%u",
1054 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1055 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1056 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1057 }
1058 
1059 /*
1060  * We need the function to make sure the MMIO resource is allocated from the
1061  * ranges found in _CRS.
1062  *
1063  * For the release function, we can use bus_generic_release_resource().
1064  */
1065 static struct resource *
1066 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1067     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1068 {
1069 	device_t parent = device_get_parent(dev);
1070 	struct resource *res;
1071 
1072 #ifdef NEW_PCIB
1073 	if (type == SYS_RES_MEMORY) {
1074 		struct vmbus_softc *sc = device_get_softc(dev);
1075 
1076 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1077 		    rid, start, end, count, flags);
1078 	} else
1079 #endif
1080 	{
1081 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1082 		    end, count, flags);
1083 	}
1084 
1085 	return (res);
1086 }
1087 
1088 static int
1089 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1090 {
1091 
1092 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1093 	    irqs));
1094 }
1095 
1096 static int
1097 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1098 {
1099 
1100 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1101 }
1102 
1103 static int
1104 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1105 {
1106 
1107 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1108 }
1109 
1110 static int
1111 vmbus_release_msix(device_t bus, device_t dev, int irq)
1112 {
1113 
1114 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1115 }
1116 
1117 static int
1118 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1119 	uint32_t *data)
1120 {
1121 
1122 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1123 }
1124 
1125 static uint32_t
1126 vmbus_get_version_method(device_t bus, device_t dev)
1127 {
1128 	struct vmbus_softc *sc = device_get_softc(bus);
1129 
1130 	return sc->vmbus_version;
1131 }
1132 
1133 static int
1134 vmbus_probe_guid_method(device_t bus, device_t dev,
1135     const struct hyperv_guid *guid)
1136 {
1137 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1138 
1139 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1140 		return 0;
1141 	return ENXIO;
1142 }
1143 
1144 static uint32_t
1145 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1146 {
1147 	const struct vmbus_softc *sc = device_get_softc(bus);
1148 
1149 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1150 }
1151 
1152 static struct taskqueue *
1153 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1154 {
1155 	const struct vmbus_softc *sc = device_get_softc(bus);
1156 
1157 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1158 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1159 }
1160 
1161 #ifdef NEW_PCIB
1162 #define VTPM_BASE_ADDR 0xfed40000
1163 #define FOUR_GB (1ULL << 32)
1164 
1165 enum parse_pass { parse_64, parse_32 };
1166 
1167 struct parse_context {
1168 	device_t vmbus_dev;
1169 	enum parse_pass pass;
1170 };
1171 
1172 static ACPI_STATUS
1173 parse_crs(ACPI_RESOURCE *res, void *ctx)
1174 {
1175 	const struct parse_context *pc = ctx;
1176 	device_t vmbus_dev = pc->vmbus_dev;
1177 
1178 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1179 	UINT64 start, end;
1180 
1181 	switch (res->Type) {
1182 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1183 		start = res->Data.Address32.Address.Minimum;
1184 		end = res->Data.Address32.Address.Maximum;
1185 		break;
1186 
1187 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1188 		start = res->Data.Address64.Address.Minimum;
1189 		end = res->Data.Address64.Address.Maximum;
1190 		break;
1191 
1192 	default:
1193 		/* Unused types. */
1194 		return (AE_OK);
1195 	}
1196 
1197 	/*
1198 	 * We don't use <1MB addresses.
1199 	 */
1200 	if (end < 0x100000)
1201 		return (AE_OK);
1202 
1203 	/* Don't conflict with vTPM. */
1204 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1205 		end = VTPM_BASE_ADDR - 1;
1206 
1207 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1208 	    (pc->pass == parse_64 && start >= FOUR_GB))
1209 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1210 		    start, end, 0);
1211 
1212 	return (AE_OK);
1213 }
1214 
1215 static void
1216 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1217 {
1218 	struct parse_context pc;
1219 	ACPI_STATUS status;
1220 
1221 	if (bootverbose)
1222 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1223 
1224 	pc.vmbus_dev = vmbus_dev;
1225 	pc.pass = pass;
1226 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1227 			parse_crs, &pc);
1228 
1229 	if (bootverbose && ACPI_FAILURE(status))
1230 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1231 }
1232 
1233 static void
1234 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1235 {
1236 	device_t acpi0, parent;
1237 
1238 	parent = device_get_parent(dev);
1239 
1240 	acpi0 = device_get_parent(parent);
1241 	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1242 		device_t *children;
1243 		int count;
1244 
1245 		/*
1246 		 * Try to locate VMBUS resources and find _CRS on them.
1247 		 */
1248 		if (device_get_children(acpi0, &children, &count) == 0) {
1249 			int i;
1250 
1251 			for (i = 0; i < count; ++i) {
1252 				if (!device_is_attached(children[i]))
1253 					continue;
1254 
1255 				if (strcmp("vmbus_res",
1256 				    device_get_name(children[i])) == 0)
1257 					vmbus_get_crs(children[i], dev, pass);
1258 			}
1259 			free(children, M_TEMP);
1260 		}
1261 
1262 		/*
1263 		 * Try to find _CRS on acpi.
1264 		 */
1265 		vmbus_get_crs(acpi0, dev, pass);
1266 	} else {
1267 		device_printf(dev, "not grandchild of acpi\n");
1268 	}
1269 
1270 	/*
1271 	 * Try to find _CRS on parent.
1272 	 */
1273 	vmbus_get_crs(parent, dev, pass);
1274 }
1275 
1276 static void
1277 vmbus_get_mmio_res(device_t dev)
1278 {
1279 	struct vmbus_softc *sc = device_get_softc(dev);
1280 	/*
1281 	 * We walk the resources twice to make sure that: in the resource
1282 	 * list, the 32-bit resources appear behind the 64-bit resources.
1283 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1284 	 * iterate through the list to find a range for a 64-bit BAR in
1285 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1286 	 * ranges first.
1287 	 */
1288 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1289 
1290 	vmbus_get_mmio_res_pass(dev, parse_64);
1291 	vmbus_get_mmio_res_pass(dev, parse_32);
1292 }
1293 
1294 /*
1295  * On Gen2 VMs, Hyper-V provides mmio space for framebuffer.
1296  * This mmio address range is not useable for other PCI devices.
1297  * Currently only efifb and vbefb drivers are using this range without
1298  * reserving it from system.
1299  * Therefore, vmbus driver reserves it before any other PCI device
1300  * drivers start to request mmio addresses.
1301  */
1302 static struct resource *hv_fb_res;
1303 
1304 static void
1305 vmbus_fb_mmio_res(device_t dev)
1306 {
1307 	struct efi_fb *efifb;
1308 #if !defined(__aarch64__)
1309 	struct vbe_fb *vbefb;
1310 #endif /* aarch64 */
1311 	rman_res_t fb_start, fb_end, fb_count;
1312 	int fb_height, fb_width;
1313 	caddr_t kmdp;
1314 
1315 	struct vmbus_softc *sc = device_get_softc(dev);
1316 	int rid = 0;
1317 
1318 	kmdp = preload_search_by_type("elf kernel");
1319 	if (kmdp == NULL)
1320 		kmdp = preload_search_by_type("elf64 kernel");
1321 	efifb = (struct efi_fb *)preload_search_info(kmdp,
1322 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
1323 #if !defined(__aarch64__)
1324 	vbefb = (struct vbe_fb *)preload_search_info(kmdp,
1325 	    MODINFO_METADATA | MODINFOMD_VBE_FB);
1326 #endif /* aarch64 */
1327 	if (efifb != NULL) {
1328 		fb_start = efifb->fb_addr;
1329 		fb_end = efifb->fb_addr + efifb->fb_size;
1330 		fb_count = efifb->fb_size;
1331 		fb_height = efifb->fb_height;
1332 		fb_width = efifb->fb_width;
1333 	}
1334 #if !defined(__aarch64__)
1335 	else if (vbefb != NULL) {
1336 		fb_start = vbefb->fb_addr;
1337 		fb_end = vbefb->fb_addr + vbefb->fb_size;
1338 		fb_count = vbefb->fb_size;
1339 		fb_height = vbefb->fb_height;
1340 		fb_width = vbefb->fb_width;
1341 	}
1342 #endif /* aarch64 */
1343 	else {
1344 		if (bootverbose)
1345 			device_printf(dev,
1346 			    "no preloaded kernel fb information\n");
1347 		/* We are on Gen1 VM, just return. */
1348 		return;
1349 	}
1350 
1351 	if (bootverbose)
1352 		device_printf(dev,
1353 		    "fb: fb_addr: %#jx, size: %#jx, "
1354 		    "actual size needed: 0x%x\n",
1355 		    fb_start, fb_count, fb_height * fb_width);
1356 
1357 	hv_fb_res = pcib_host_res_alloc(&sc->vmbus_mmio_res, dev,
1358 	    SYS_RES_MEMORY, &rid, fb_start, fb_end, fb_count,
1359 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1360 
1361 	if (hv_fb_res && bootverbose)
1362 		device_printf(dev,
1363 		    "successfully reserved memory for framebuffer "
1364 		    "starting at %#jx, size %#jx\n",
1365 		    fb_start, fb_count);
1366 }
1367 
1368 static void
1369 vmbus_free_mmio_res(device_t dev)
1370 {
1371 	struct vmbus_softc *sc = device_get_softc(dev);
1372 
1373 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1374 
1375 	if (hv_fb_res)
1376 		hv_fb_res = NULL;
1377 }
1378 #endif	/* NEW_PCIB */
1379 
1380 static void
1381 vmbus_identify(driver_t *driver, device_t parent)
1382 {
1383 
1384 	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1385 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1386 		return;
1387 	device_add_child(parent, "vmbus", -1);
1388 }
1389 
1390 static int
1391 vmbus_probe(device_t dev)
1392 {
1393 
1394 	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1395 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1396 		return (ENXIO);
1397 
1398 	device_set_desc(dev, "Hyper-V Vmbus");
1399 	return (BUS_PROBE_DEFAULT);
1400 }
1401 
1402 #if defined(__x86_64__)
1403 static int
1404 vmbus_alloc_cpu_mem(struct vmbus_softc *sc)
1405 {
1406 	int cpu;
1407 
1408 	CPU_FOREACH(cpu) {
1409 		void **hv_cpu_mem;
1410 
1411 		hv_cpu_mem = VMBUS_PCPU_PTR(sc, cpu_mem, cpu);
1412 		*hv_cpu_mem = contigmalloc(PAGE_SIZE, M_DEVBUF,
1413 		    M_NOWAIT | M_ZERO, 0ul, ~0ul, PAGE_SIZE, 0);
1414 
1415 		if (*hv_cpu_mem == NULL)
1416 			return ENOMEM;
1417 	}
1418 
1419 	return 0;
1420 }
1421 
1422 static void
1423 vmbus_free_cpu_mem(struct vmbus_softc *sc)
1424 {
1425 	int cpu;
1426 
1427 	CPU_FOREACH(cpu) {
1428 		void **hv_cpu_mem;
1429 		hv_cpu_mem = VMBUS_PCPU_PTR(sc, cpu_mem, cpu);
1430 		if(*hv_cpu_mem != NULL) {
1431 			contigfree(*hv_cpu_mem, PAGE_SIZE, M_DEVBUF);
1432 			*hv_cpu_mem = NULL;
1433 		}
1434 	}
1435 }
1436 #endif
1437 
1438 /**
1439  * @brief Main vmbus driver initialization routine.
1440  *
1441  * Here, we
1442  * - initialize the vmbus driver context
1443  * - setup various driver entry points
1444  * - invoke the vmbus hv main init routine
1445  * - get the irq resource
1446  * - invoke the vmbus to add the vmbus root device
1447  * - setup the vmbus root device
1448  * - retrieve the channel offers
1449  */
1450 static int
1451 vmbus_doattach(struct vmbus_softc *sc)
1452 {
1453 	struct sysctl_oid_list *child;
1454 	struct sysctl_ctx_list *ctx;
1455 	int ret;
1456 	device_t dev_res;
1457 	ACPI_HANDLE handle;
1458 	unsigned int coherent = 0;
1459 
1460 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1461 		return (0);
1462 
1463 #ifdef NEW_PCIB
1464 	vmbus_get_mmio_res(sc->vmbus_dev);
1465 	vmbus_fb_mmio_res(sc->vmbus_dev);
1466 #endif
1467 
1468 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1469 
1470 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1471 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1472 	TAILQ_INIT(&sc->vmbus_prichans);
1473 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1474 	TAILQ_INIT(&sc->vmbus_chans);
1475 	sc->vmbus_chmap = malloc(
1476 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1477 	    M_WAITOK | M_ZERO);
1478 
1479 	/* Coherency attribute */
1480 	dev_res =  devclass_get_device(devclass_find("vmbus_res"), 0);
1481 	if (dev_res != NULL) {
1482 		handle = acpi_get_handle(dev_res);
1483 
1484 		if (ACPI_FAILURE(acpi_GetInteger(handle, "_CCA", &coherent)))
1485 			coherent = 0;
1486 	}
1487 	if (bootverbose)
1488 		device_printf(sc->vmbus_dev, "Bus is%s cache-coherent\n",
1489 			coherent ? "" : " not");
1490 
1491 	bus_dma_tag_create(bus_get_dma_tag(sc->vmbus_dev),
1492 		1, 0,
1493 		BUS_SPACE_MAXADDR,
1494 		BUS_SPACE_MAXADDR,
1495 		NULL, NULL,
1496 		BUS_SPACE_MAXSIZE,
1497 		BUS_SPACE_UNRESTRICTED,
1498 		BUS_SPACE_MAXSIZE,
1499 		coherent ? BUS_DMA_COHERENT : 0,
1500 		NULL, NULL,
1501 		&sc->dmat);
1502 	/*
1503 	 * Create context for "post message" Hypercalls
1504 	 */
1505 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1506 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1507 	    sizeof(struct vmbus_msghc));
1508 	if (sc->vmbus_xc == NULL) {
1509 		ret = ENXIO;
1510 		goto cleanup;
1511 	}
1512 
1513 	/*
1514 	 * Allocate DMA stuffs.
1515 	 */
1516 	ret = vmbus_dma_alloc(sc);
1517 	if (ret != 0)
1518 		goto cleanup;
1519 
1520 	/*
1521 	 * Setup interrupt.
1522 	 */
1523 	ret = vmbus_intr_setup(sc);
1524 	if (ret != 0)
1525 		goto cleanup;
1526 
1527 #if defined(__x86_64__)
1528 	/*
1529 	 * Alloc per cpu memory for tlb flush hypercall
1530 	 */
1531 	if (hv_tlb_hcall) {
1532 		ret = vmbus_alloc_cpu_mem(sc);
1533 		if (ret != 0) {
1534 			hv_tlb_hcall = 0;
1535 			if (bootverbose)
1536 				device_printf(sc->vmbus_dev,
1537 				    "cannot alloc contig memory for "
1538 				    "cpu_mem, use system provided "
1539 				    "tlb flush call.\n");
1540 
1541 			vmbus_free_cpu_mem(sc);
1542 		}
1543 	}
1544 #endif
1545 
1546 	/*
1547 	 * Setup SynIC.
1548 	 */
1549 	if (bootverbose)
1550 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1551 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1552 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1553 
1554 #if defined(__x86_64__)
1555 	if (hv_tlb_hcall)
1556 		smp_targeted_tlb_shootdown = &hyperv_vm_tlb_flush;
1557 #endif
1558 
1559 	/*
1560 	 * Initialize vmbus, e.g. connect to Hypervisor.
1561 	 */
1562 	ret = vmbus_init(sc);
1563 	if (ret != 0)
1564 		goto cleanup;
1565 
1566 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1567 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1568 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1569 	else
1570 		sc->vmbus_event_proc = vmbus_event_proc;
1571 
1572 	ret = vmbus_scan(sc);
1573 	if (ret != 0)
1574 		goto cleanup;
1575 
1576 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1577 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1578 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1579 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1580 	    vmbus_sysctl_version, "A", "vmbus version");
1581 
1582 	return (ret);
1583 
1584 cleanup:
1585 	vmbus_scan_teardown(sc);
1586 	vmbus_intr_teardown(sc);
1587 	vmbus_dma_free(sc);
1588 	if (sc->vmbus_xc != NULL) {
1589 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1590 		sc->vmbus_xc = NULL;
1591 	}
1592 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1593 	mtx_destroy(&sc->vmbus_prichan_lock);
1594 	mtx_destroy(&sc->vmbus_chan_lock);
1595 
1596 	return (ret);
1597 }
1598 
1599 static void
1600 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1601 {
1602 }
1603 
1604 #if defined(EARLY_AP_STARTUP)
1605 
1606 static void
1607 vmbus_intrhook(void *xsc)
1608 {
1609 	struct vmbus_softc *sc = xsc;
1610 
1611 	if (bootverbose)
1612 		device_printf(sc->vmbus_dev, "intrhook\n");
1613 	vmbus_doattach(sc);
1614 	config_intrhook_disestablish(&sc->vmbus_intrhook);
1615 }
1616 
1617 #endif /* EARLY_AP_STARTUP */
1618 
1619 static int
1620 vmbus_attach(device_t dev)
1621 {
1622 	vmbus_sc = device_get_softc(dev);
1623 	vmbus_sc->vmbus_dev = dev;
1624 	vmbus_sc->vmbus_idtvec = -1;
1625 
1626 	/*
1627 	 * Event processing logic will be configured:
1628 	 * - After the vmbus protocol version negotiation.
1629 	 * - Before we request channel offers.
1630 	 */
1631 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1632 
1633 #if defined(EARLY_AP_STARTUP)
1634 	/*
1635 	 * Defer the real attach until the pause(9) works as expected.
1636 	 */
1637 	vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1638 	vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1639 	config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1640 #endif /* EARLY_AP_STARTUP  and aarch64 */
1641 
1642 	return (0);
1643 }
1644 
1645 static int
1646 vmbus_detach(device_t dev)
1647 {
1648 	struct vmbus_softc *sc = device_get_softc(dev);
1649 
1650 	bus_generic_detach(dev);
1651 	vmbus_chan_destroy_all(sc);
1652 
1653 	vmbus_scan_teardown(sc);
1654 
1655 	vmbus_disconnect(sc);
1656 
1657 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1658 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1659 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1660 	}
1661 
1662 #if defined(__x86_64__)
1663 	/*
1664 	 * Restore the tlb flush to native call
1665 	 */
1666 	if (hv_tlb_hcall) {
1667 		smp_targeted_tlb_shootdown = &smp_targeted_tlb_shootdown_native;
1668 		vmbus_free_cpu_mem(sc);
1669 	}
1670 #endif
1671 
1672 	vmbus_intr_teardown(sc);
1673 	vmbus_dma_free(sc);
1674 
1675 	if (sc->vmbus_xc != NULL) {
1676 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1677 		sc->vmbus_xc = NULL;
1678 	}
1679 
1680 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1681 	mtx_destroy(&sc->vmbus_prichan_lock);
1682 	mtx_destroy(&sc->vmbus_chan_lock);
1683 
1684 #ifdef NEW_PCIB
1685 	vmbus_free_mmio_res(dev);
1686 #endif
1687 
1688 #if defined(__aarch64__)
1689 	bus_release_resource(device_get_parent(dev), SYS_RES_IRQ, sc->vector,
1690 	    sc->ires);
1691 #endif
1692 	return (0);
1693 }
1694 
1695 #if !defined(EARLY_AP_STARTUP)
1696 
1697 static void
1698 vmbus_sysinit(void *arg __unused)
1699 {
1700 	struct vmbus_softc *sc = vmbus_get_softc();
1701 
1702 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1703 		return;
1704 
1705 	vmbus_doattach(sc);
1706 }
1707 /*
1708  * NOTE:
1709  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1710  * initialized.
1711  */
1712 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1713 #endif	/* !EARLY_AP_STARTUP */
1714