xref: /freebsd/sys/amd64/vmm/io/ppt.c (revision 788ca347b816afd83b2885e0c79aeeb88649b2ab)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/pciio.h>
39 #include <sys/rman.h>
40 #include <sys/smp.h>
41 #include <sys/sysctl.h>
42 
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
45 
46 #include <machine/resource.h>
47 
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50 
51 #include "vmm_lapic.h"
52 #include "vmm_ktr.h"
53 
54 #include "iommu.h"
55 #include "ppt.h"
56 
57 /* XXX locking */
58 
59 #define	MAX_MSIMSGS	32
60 
61 /*
62  * If the MSI-X table is located in the middle of a BAR then that MMIO
63  * region gets split into two segments - one segment above the MSI-X table
64  * and the other segment below the MSI-X table - with a hole in place of
65  * the MSI-X table so accesses to it can be trapped and emulated.
66  *
67  * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68  */
69 #define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
70 
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72 
73 struct pptintr_arg {				/* pptintr(pptintr_arg) */
74 	struct pptdev	*pptdev;
75 	uint64_t	addr;
76 	uint64_t	msg_data;
77 };
78 
79 struct pptdev {
80 	device_t	dev;
81 	struct vm	*vm;			/* owner of this device */
82 	TAILQ_ENTRY(pptdev)	next;
83 	struct vm_memory_segment mmio[MAX_MMIOSEGS];
84 	struct {
85 		int	num_msgs;		/* guest state */
86 
87 		int	startrid;		/* host state */
88 		struct resource *res[MAX_MSIMSGS];
89 		void	*cookie[MAX_MSIMSGS];
90 		struct pptintr_arg arg[MAX_MSIMSGS];
91 	} msi;
92 
93 	struct {
94 		int num_msgs;
95 		int startrid;
96 		int msix_table_rid;
97 		struct resource *msix_table_res;
98 		struct resource **res;
99 		void **cookie;
100 		struct pptintr_arg *arg;
101 	} msix;
102 };
103 
104 SYSCTL_DECL(_hw_vmm);
105 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
106 
107 static int num_pptdevs;
108 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
109     "number of pci passthru devices");
110 
111 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
112 
113 static int
114 ppt_probe(device_t dev)
115 {
116 	int bus, slot, func;
117 	struct pci_devinfo *dinfo;
118 
119 	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
120 
121 	bus = pci_get_bus(dev);
122 	slot = pci_get_slot(dev);
123 	func = pci_get_function(dev);
124 
125 	/*
126 	 * To qualify as a pci passthrough device a device must:
127 	 * - be allowed by administrator to be used in this role
128 	 * - be an endpoint device
129 	 */
130 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
131 		return (ENXIO);
132 	else if (vmm_is_pptdev(bus, slot, func))
133 		return (0);
134 	else
135 		/*
136 		 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
137 		 * SR-IOV infrastructure specified as "ppt" passthrough devices.
138 		 * All normal devices that did not have "ppt" specified as their
139 		 * driver will not be matched by this.
140 		 */
141 		return (BUS_PROBE_NOWILDCARD);
142 }
143 
144 static int
145 ppt_attach(device_t dev)
146 {
147 	struct pptdev *ppt;
148 
149 	ppt = device_get_softc(dev);
150 
151 	num_pptdevs++;
152 	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
153 	ppt->dev = dev;
154 
155 	if (bootverbose)
156 		device_printf(dev, "attached\n");
157 
158 	return (0);
159 }
160 
161 static int
162 ppt_detach(device_t dev)
163 {
164 	struct pptdev *ppt;
165 
166 	ppt = device_get_softc(dev);
167 
168 	if (ppt->vm != NULL)
169 		return (EBUSY);
170 	num_pptdevs--;
171 	TAILQ_REMOVE(&pptdev_list, ppt, next);
172 
173 	return (0);
174 }
175 
176 static device_method_t ppt_methods[] = {
177 	/* Device interface */
178 	DEVMETHOD(device_probe,		ppt_probe),
179 	DEVMETHOD(device_attach,	ppt_attach),
180 	DEVMETHOD(device_detach,	ppt_detach),
181 	{0, 0}
182 };
183 
184 static devclass_t ppt_devclass;
185 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
186 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
187 
188 static struct pptdev *
189 ppt_find(int bus, int slot, int func)
190 {
191 	device_t dev;
192 	struct pptdev *ppt;
193 	int b, s, f;
194 
195 	TAILQ_FOREACH(ppt, &pptdev_list, next) {
196 		dev = ppt->dev;
197 		b = pci_get_bus(dev);
198 		s = pci_get_slot(dev);
199 		f = pci_get_function(dev);
200 		if (bus == b && slot == s && func == f)
201 			return (ppt);
202 	}
203 	return (NULL);
204 }
205 
206 static void
207 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
208 {
209 	int i;
210 	struct vm_memory_segment *seg;
211 
212 	for (i = 0; i < MAX_MMIOSEGS; i++) {
213 		seg = &ppt->mmio[i];
214 		if (seg->len == 0)
215 			continue;
216 		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
217 		bzero(seg, sizeof(struct vm_memory_segment));
218 	}
219 }
220 
221 static void
222 ppt_teardown_msi(struct pptdev *ppt)
223 {
224 	int i, rid;
225 	void *cookie;
226 	struct resource *res;
227 
228 	if (ppt->msi.num_msgs == 0)
229 		return;
230 
231 	for (i = 0; i < ppt->msi.num_msgs; i++) {
232 		rid = ppt->msi.startrid + i;
233 		res = ppt->msi.res[i];
234 		cookie = ppt->msi.cookie[i];
235 
236 		if (cookie != NULL)
237 			bus_teardown_intr(ppt->dev, res, cookie);
238 
239 		if (res != NULL)
240 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
241 
242 		ppt->msi.res[i] = NULL;
243 		ppt->msi.cookie[i] = NULL;
244 	}
245 
246 	if (ppt->msi.startrid == 1)
247 		pci_release_msi(ppt->dev);
248 
249 	ppt->msi.num_msgs = 0;
250 }
251 
252 static void
253 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
254 {
255 	int rid;
256 	struct resource *res;
257 	void *cookie;
258 
259 	rid = ppt->msix.startrid + idx;
260 	res = ppt->msix.res[idx];
261 	cookie = ppt->msix.cookie[idx];
262 
263 	if (cookie != NULL)
264 		bus_teardown_intr(ppt->dev, res, cookie);
265 
266 	if (res != NULL)
267 		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
268 
269 	ppt->msix.res[idx] = NULL;
270 	ppt->msix.cookie[idx] = NULL;
271 }
272 
273 static void
274 ppt_teardown_msix(struct pptdev *ppt)
275 {
276 	int i;
277 
278 	if (ppt->msix.num_msgs == 0)
279 		return;
280 
281 	for (i = 0; i < ppt->msix.num_msgs; i++)
282 		ppt_teardown_msix_intr(ppt, i);
283 
284 	if (ppt->msix.msix_table_res) {
285 		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
286 				     ppt->msix.msix_table_rid,
287 				     ppt->msix.msix_table_res);
288 		ppt->msix.msix_table_res = NULL;
289 		ppt->msix.msix_table_rid = 0;
290 	}
291 
292 	free(ppt->msix.res, M_PPTMSIX);
293 	free(ppt->msix.cookie, M_PPTMSIX);
294 	free(ppt->msix.arg, M_PPTMSIX);
295 
296 	pci_release_msi(ppt->dev);
297 
298 	ppt->msix.num_msgs = 0;
299 }
300 
301 int
302 ppt_avail_devices(void)
303 {
304 
305 	return (num_pptdevs);
306 }
307 
308 int
309 ppt_assigned_devices(struct vm *vm)
310 {
311 	struct pptdev *ppt;
312 	int num;
313 
314 	num = 0;
315 	TAILQ_FOREACH(ppt, &pptdev_list, next) {
316 		if (ppt->vm == vm)
317 			num++;
318 	}
319 	return (num);
320 }
321 
322 boolean_t
323 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
324 {
325 	int i;
326 	struct pptdev *ppt;
327 	struct vm_memory_segment *seg;
328 
329 	TAILQ_FOREACH(ppt, &pptdev_list, next) {
330 		if (ppt->vm != vm)
331 			continue;
332 
333 		for (i = 0; i < MAX_MMIOSEGS; i++) {
334 			seg = &ppt->mmio[i];
335 			if (seg->len == 0)
336 				continue;
337 			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
338 				return (TRUE);
339 		}
340 	}
341 
342 	return (FALSE);
343 }
344 
345 int
346 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
347 {
348 	struct pptdev *ppt;
349 
350 	ppt = ppt_find(bus, slot, func);
351 	if (ppt != NULL) {
352 		/*
353 		 * If this device is owned by a different VM then we
354 		 * cannot change its owner.
355 		 */
356 		if (ppt->vm != NULL && ppt->vm != vm)
357 			return (EBUSY);
358 
359 		ppt->vm = vm;
360 		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
361 		return (0);
362 	}
363 	return (ENOENT);
364 }
365 
366 int
367 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
368 {
369 	struct pptdev *ppt;
370 
371 	ppt = ppt_find(bus, slot, func);
372 	if (ppt != NULL) {
373 		/*
374 		 * If this device is not owned by this 'vm' then bail out.
375 		 */
376 		if (ppt->vm != vm)
377 			return (EBUSY);
378 		ppt_unmap_mmio(vm, ppt);
379 		ppt_teardown_msi(ppt);
380 		ppt_teardown_msix(ppt);
381 		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
382 		ppt->vm = NULL;
383 		return (0);
384 	}
385 	return (ENOENT);
386 }
387 
388 int
389 ppt_unassign_all(struct vm *vm)
390 {
391 	struct pptdev *ppt;
392 	int bus, slot, func;
393 	device_t dev;
394 
395 	TAILQ_FOREACH(ppt, &pptdev_list, next) {
396 		if (ppt->vm == vm) {
397 			dev = ppt->dev;
398 			bus = pci_get_bus(dev);
399 			slot = pci_get_slot(dev);
400 			func = pci_get_function(dev);
401 			vm_unassign_pptdev(vm, bus, slot, func);
402 		}
403 	}
404 
405 	return (0);
406 }
407 
408 int
409 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
410 	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
411 {
412 	int i, error;
413 	struct vm_memory_segment *seg;
414 	struct pptdev *ppt;
415 
416 	ppt = ppt_find(bus, slot, func);
417 	if (ppt != NULL) {
418 		if (ppt->vm != vm)
419 			return (EBUSY);
420 
421 		for (i = 0; i < MAX_MMIOSEGS; i++) {
422 			seg = &ppt->mmio[i];
423 			if (seg->len == 0) {
424 				error = vm_map_mmio(vm, gpa, len, hpa);
425 				if (error == 0) {
426 					seg->gpa = gpa;
427 					seg->len = len;
428 				}
429 				return (error);
430 			}
431 		}
432 		return (ENOSPC);
433 	}
434 	return (ENOENT);
435 }
436 
437 static int
438 pptintr(void *arg)
439 {
440 	struct pptdev *ppt;
441 	struct pptintr_arg *pptarg;
442 
443 	pptarg = arg;
444 	ppt = pptarg->pptdev;
445 
446 	if (ppt->vm != NULL)
447 		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
448 	else {
449 		/*
450 		 * XXX
451 		 * This is not expected to happen - panic?
452 		 */
453 	}
454 
455 	/*
456 	 * For legacy interrupts give other filters a chance in case
457 	 * the interrupt was not generated by the passthrough device.
458 	 */
459 	if (ppt->msi.startrid == 0)
460 		return (FILTER_STRAY);
461 	else
462 		return (FILTER_HANDLED);
463 }
464 
465 int
466 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
467 	      uint64_t addr, uint64_t msg, int numvec)
468 {
469 	int i, rid, flags;
470 	int msi_count, startrid, error, tmp;
471 	struct pptdev *ppt;
472 
473 	if (numvec < 0 || numvec > MAX_MSIMSGS)
474 		return (EINVAL);
475 
476 	ppt = ppt_find(bus, slot, func);
477 	if (ppt == NULL)
478 		return (ENOENT);
479 	if (ppt->vm != vm)		/* Make sure we own this device */
480 		return (EBUSY);
481 
482 	/* Free any allocated resources */
483 	ppt_teardown_msi(ppt);
484 
485 	if (numvec == 0)		/* nothing more to do */
486 		return (0);
487 
488 	flags = RF_ACTIVE;
489 	msi_count = pci_msi_count(ppt->dev);
490 	if (msi_count == 0) {
491 		startrid = 0;		/* legacy interrupt */
492 		msi_count = 1;
493 		flags |= RF_SHAREABLE;
494 	} else
495 		startrid = 1;		/* MSI */
496 
497 	/*
498 	 * The device must be capable of supporting the number of vectors
499 	 * the guest wants to allocate.
500 	 */
501 	if (numvec > msi_count)
502 		return (EINVAL);
503 
504 	/*
505 	 * Make sure that we can allocate all the MSI vectors that are needed
506 	 * by the guest.
507 	 */
508 	if (startrid == 1) {
509 		tmp = numvec;
510 		error = pci_alloc_msi(ppt->dev, &tmp);
511 		if (error)
512 			return (error);
513 		else if (tmp != numvec) {
514 			pci_release_msi(ppt->dev);
515 			return (ENOSPC);
516 		} else {
517 			/* success */
518 		}
519 	}
520 
521 	ppt->msi.startrid = startrid;
522 
523 	/*
524 	 * Allocate the irq resource and attach it to the interrupt handler.
525 	 */
526 	for (i = 0; i < numvec; i++) {
527 		ppt->msi.num_msgs = i + 1;
528 		ppt->msi.cookie[i] = NULL;
529 
530 		rid = startrid + i;
531 		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
532 							 &rid, flags);
533 		if (ppt->msi.res[i] == NULL)
534 			break;
535 
536 		ppt->msi.arg[i].pptdev = ppt;
537 		ppt->msi.arg[i].addr = addr;
538 		ppt->msi.arg[i].msg_data = msg + i;
539 
540 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
541 				       INTR_TYPE_NET | INTR_MPSAFE,
542 				       pptintr, NULL, &ppt->msi.arg[i],
543 				       &ppt->msi.cookie[i]);
544 		if (error != 0)
545 			break;
546 	}
547 
548 	if (i < numvec) {
549 		ppt_teardown_msi(ppt);
550 		return (ENXIO);
551 	}
552 
553 	return (0);
554 }
555 
556 int
557 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
558 	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
559 {
560 	struct pptdev *ppt;
561 	struct pci_devinfo *dinfo;
562 	int numvec, alloced, rid, error;
563 	size_t res_size, cookie_size, arg_size;
564 
565 	ppt = ppt_find(bus, slot, func);
566 	if (ppt == NULL)
567 		return (ENOENT);
568 	if (ppt->vm != vm)		/* Make sure we own this device */
569 		return (EBUSY);
570 
571 	dinfo = device_get_ivars(ppt->dev);
572 	if (!dinfo)
573 		return (ENXIO);
574 
575 	/*
576 	 * First-time configuration:
577 	 * 	Allocate the MSI-X table
578 	 *	Allocate the IRQ resources
579 	 *	Set up some variables in ppt->msix
580 	 */
581 	if (ppt->msix.num_msgs == 0) {
582 		numvec = pci_msix_count(ppt->dev);
583 		if (numvec <= 0)
584 			return (EINVAL);
585 
586 		ppt->msix.startrid = 1;
587 		ppt->msix.num_msgs = numvec;
588 
589 		res_size = numvec * sizeof(ppt->msix.res[0]);
590 		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
591 		arg_size = numvec * sizeof(ppt->msix.arg[0]);
592 
593 		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
594 		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
595 					  M_WAITOK | M_ZERO);
596 		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
597 
598 		rid = dinfo->cfg.msix.msix_table_bar;
599 		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
600 					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
601 
602 		if (ppt->msix.msix_table_res == NULL) {
603 			ppt_teardown_msix(ppt);
604 			return (ENOSPC);
605 		}
606 		ppt->msix.msix_table_rid = rid;
607 
608 		alloced = numvec;
609 		error = pci_alloc_msix(ppt->dev, &alloced);
610 		if (error || alloced != numvec) {
611 			ppt_teardown_msix(ppt);
612 			return (error == 0 ? ENOSPC: error);
613 		}
614 	}
615 
616 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
617 		/* Tear down the IRQ if it's already set up */
618 		ppt_teardown_msix_intr(ppt, idx);
619 
620 		/* Allocate the IRQ resource */
621 		ppt->msix.cookie[idx] = NULL;
622 		rid = ppt->msix.startrid + idx;
623 		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
624 							    &rid, RF_ACTIVE);
625 		if (ppt->msix.res[idx] == NULL)
626 			return (ENXIO);
627 
628 		ppt->msix.arg[idx].pptdev = ppt;
629 		ppt->msix.arg[idx].addr = addr;
630 		ppt->msix.arg[idx].msg_data = msg;
631 
632 		/* Setup the MSI-X interrupt */
633 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
634 				       INTR_TYPE_NET | INTR_MPSAFE,
635 				       pptintr, NULL, &ppt->msix.arg[idx],
636 				       &ppt->msix.cookie[idx]);
637 
638 		if (error != 0) {
639 			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
640 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
641 			ppt->msix.cookie[idx] = NULL;
642 			ppt->msix.res[idx] = NULL;
643 			return (ENXIO);
644 		}
645 	} else {
646 		/* Masked, tear it down if it's already been set up */
647 		ppt_teardown_msix_intr(ppt, idx);
648 	}
649 
650 	return (0);
651 }
652