xref: /freebsd/sys/amd64/vmm/io/ppt.c (revision bc96366c864c07ef352edb92017357917c75b36c)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/pciio.h>
39 #include <sys/rman.h>
40 #include <sys/smp.h>
41 #include <sys/sysctl.h>
42 
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
45 
46 #include <machine/resource.h>
47 
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50 
51 #include "vmm_lapic.h"
52 #include "vmm_ktr.h"
53 
54 #include "iommu.h"
55 #include "ppt.h"
56 
57 /* XXX locking */
58 
59 #define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
60 #define	MAX_MSIMSGS	32
61 
62 /*
63  * If the MSI-X table is located in the middle of a BAR then that MMIO
64  * region gets split into two segments - one segment above the MSI-X table
65  * and the other segment below the MSI-X table - with a hole in place of
66  * the MSI-X table so accesses to it can be trapped and emulated.
67  *
68  * So, allocate a MMIO segment for each BAR register + 1 additional segment.
69  */
70 #define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
71 
72 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
73 
74 struct pptintr_arg {				/* pptintr(pptintr_arg) */
75 	struct pptdev	*pptdev;
76 	uint64_t	addr;
77 	uint64_t	msg_data;
78 };
79 
80 static struct pptdev {
81 	device_t	dev;
82 	struct vm	*vm;			/* owner of this device */
83 	struct vm_memory_segment mmio[MAX_MMIOSEGS];
84 	struct {
85 		int	num_msgs;		/* guest state */
86 
87 		int	startrid;		/* host state */
88 		struct resource *res[MAX_MSIMSGS];
89 		void	*cookie[MAX_MSIMSGS];
90 		struct pptintr_arg arg[MAX_MSIMSGS];
91 	} msi;
92 
93 	struct {
94 		int num_msgs;
95 		int startrid;
96 		int msix_table_rid;
97 		struct resource *msix_table_res;
98 		struct resource **res;
99 		void **cookie;
100 		struct pptintr_arg *arg;
101 	} msix;
102 } pptdevs[64];
103 
104 SYSCTL_DECL(_hw_vmm);
105 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
106 
107 static int num_pptdevs;
108 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
109     "number of pci passthru devices");
110 
111 static int
112 ppt_probe(device_t dev)
113 {
114 	int bus, slot, func;
115 	struct pci_devinfo *dinfo;
116 
117 	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
118 
119 	bus = pci_get_bus(dev);
120 	slot = pci_get_slot(dev);
121 	func = pci_get_function(dev);
122 
123 	/*
124 	 * To qualify as a pci passthrough device a device must:
125 	 * - be allowed by administrator to be used in this role
126 	 * - be an endpoint device
127 	 */
128 	if (vmm_is_pptdev(bus, slot, func) &&
129 	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
130 		return (0);
131 	else
132 		return (ENXIO);
133 }
134 
135 static int
136 ppt_attach(device_t dev)
137 {
138 	int n;
139 
140 	if (num_pptdevs >= MAX_PPTDEVS) {
141 		printf("ppt_attach: maximum number of pci passthrough devices "
142 		       "exceeded\n");
143 		return (ENXIO);
144 	}
145 
146 	n = num_pptdevs++;
147 	pptdevs[n].dev = dev;
148 
149 	if (bootverbose)
150 		device_printf(dev, "attached\n");
151 
152 	return (0);
153 }
154 
155 static int
156 ppt_detach(device_t dev)
157 {
158 	/*
159 	 * XXX check whether there are any pci passthrough devices assigned
160 	 * to guests before we allow this driver to detach.
161 	 */
162 
163 	return (0);
164 }
165 
166 static device_method_t ppt_methods[] = {
167 	/* Device interface */
168 	DEVMETHOD(device_probe,		ppt_probe),
169 	DEVMETHOD(device_attach,	ppt_attach),
170 	DEVMETHOD(device_detach,	ppt_detach),
171 	{0, 0}
172 };
173 
174 static devclass_t ppt_devclass;
175 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
176 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
177 
178 static struct pptdev *
179 ppt_find(int bus, int slot, int func)
180 {
181 	device_t dev;
182 	int i, b, s, f;
183 
184 	for (i = 0; i < num_pptdevs; i++) {
185 		dev = pptdevs[i].dev;
186 		b = pci_get_bus(dev);
187 		s = pci_get_slot(dev);
188 		f = pci_get_function(dev);
189 		if (bus == b && slot == s && func == f)
190 			return (&pptdevs[i]);
191 	}
192 	return (NULL);
193 }
194 
195 static void
196 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
197 {
198 	int i;
199 	struct vm_memory_segment *seg;
200 
201 	for (i = 0; i < MAX_MMIOSEGS; i++) {
202 		seg = &ppt->mmio[i];
203 		if (seg->len == 0)
204 			continue;
205 		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
206 		bzero(seg, sizeof(struct vm_memory_segment));
207 	}
208 }
209 
210 static void
211 ppt_teardown_msi(struct pptdev *ppt)
212 {
213 	int i, rid;
214 	void *cookie;
215 	struct resource *res;
216 
217 	if (ppt->msi.num_msgs == 0)
218 		return;
219 
220 	for (i = 0; i < ppt->msi.num_msgs; i++) {
221 		rid = ppt->msi.startrid + i;
222 		res = ppt->msi.res[i];
223 		cookie = ppt->msi.cookie[i];
224 
225 		if (cookie != NULL)
226 			bus_teardown_intr(ppt->dev, res, cookie);
227 
228 		if (res != NULL)
229 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
230 
231 		ppt->msi.res[i] = NULL;
232 		ppt->msi.cookie[i] = NULL;
233 	}
234 
235 	if (ppt->msi.startrid == 1)
236 		pci_release_msi(ppt->dev);
237 
238 	ppt->msi.num_msgs = 0;
239 }
240 
241 static void
242 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
243 {
244 	int rid;
245 	struct resource *res;
246 	void *cookie;
247 
248 	rid = ppt->msix.startrid + idx;
249 	res = ppt->msix.res[idx];
250 	cookie = ppt->msix.cookie[idx];
251 
252 	if (cookie != NULL)
253 		bus_teardown_intr(ppt->dev, res, cookie);
254 
255 	if (res != NULL)
256 		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
257 
258 	ppt->msix.res[idx] = NULL;
259 	ppt->msix.cookie[idx] = NULL;
260 }
261 
262 static void
263 ppt_teardown_msix(struct pptdev *ppt)
264 {
265 	int i;
266 
267 	if (ppt->msix.num_msgs == 0)
268 		return;
269 
270 	for (i = 0; i < ppt->msix.num_msgs; i++)
271 		ppt_teardown_msix_intr(ppt, i);
272 
273 	if (ppt->msix.msix_table_res) {
274 		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
275 				     ppt->msix.msix_table_rid,
276 				     ppt->msix.msix_table_res);
277 		ppt->msix.msix_table_res = NULL;
278 		ppt->msix.msix_table_rid = 0;
279 	}
280 
281 	free(ppt->msix.res, M_PPTMSIX);
282 	free(ppt->msix.cookie, M_PPTMSIX);
283 	free(ppt->msix.arg, M_PPTMSIX);
284 
285 	pci_release_msi(ppt->dev);
286 
287 	ppt->msix.num_msgs = 0;
288 }
289 
290 int
291 ppt_avail_devices(void)
292 {
293 
294 	return (num_pptdevs);
295 }
296 
297 int
298 ppt_assigned_devices(struct vm *vm)
299 {
300 	int i, num;
301 
302 	num = 0;
303 	for (i = 0; i < num_pptdevs; i++) {
304 		if (pptdevs[i].vm == vm)
305 			num++;
306 	}
307 	return (num);
308 }
309 
310 boolean_t
311 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
312 {
313 	int i, n;
314 	struct pptdev *ppt;
315 	struct vm_memory_segment *seg;
316 
317 	for (n = 0; n < num_pptdevs; n++) {
318 		ppt = &pptdevs[n];
319 		if (ppt->vm != vm)
320 			continue;
321 
322 		for (i = 0; i < MAX_MMIOSEGS; i++) {
323 			seg = &ppt->mmio[i];
324 			if (seg->len == 0)
325 				continue;
326 			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
327 				return (TRUE);
328 		}
329 	}
330 
331 	return (FALSE);
332 }
333 
334 int
335 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
336 {
337 	struct pptdev *ppt;
338 
339 	ppt = ppt_find(bus, slot, func);
340 	if (ppt != NULL) {
341 		/*
342 		 * If this device is owned by a different VM then we
343 		 * cannot change its owner.
344 		 */
345 		if (ppt->vm != NULL && ppt->vm != vm)
346 			return (EBUSY);
347 
348 		ppt->vm = vm;
349 		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
350 		return (0);
351 	}
352 	return (ENOENT);
353 }
354 
355 int
356 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
357 {
358 	struct pptdev *ppt;
359 
360 	ppt = ppt_find(bus, slot, func);
361 	if (ppt != NULL) {
362 		/*
363 		 * If this device is not owned by this 'vm' then bail out.
364 		 */
365 		if (ppt->vm != vm)
366 			return (EBUSY);
367 		ppt_unmap_mmio(vm, ppt);
368 		ppt_teardown_msi(ppt);
369 		ppt_teardown_msix(ppt);
370 		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
371 		ppt->vm = NULL;
372 		return (0);
373 	}
374 	return (ENOENT);
375 }
376 
377 int
378 ppt_unassign_all(struct vm *vm)
379 {
380 	int i, bus, slot, func;
381 	device_t dev;
382 
383 	for (i = 0; i < num_pptdevs; i++) {
384 		if (pptdevs[i].vm == vm) {
385 			dev = pptdevs[i].dev;
386 			bus = pci_get_bus(dev);
387 			slot = pci_get_slot(dev);
388 			func = pci_get_function(dev);
389 			vm_unassign_pptdev(vm, bus, slot, func);
390 		}
391 	}
392 
393 	return (0);
394 }
395 
396 int
397 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
398 	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
399 {
400 	int i, error;
401 	struct vm_memory_segment *seg;
402 	struct pptdev *ppt;
403 
404 	ppt = ppt_find(bus, slot, func);
405 	if (ppt != NULL) {
406 		if (ppt->vm != vm)
407 			return (EBUSY);
408 
409 		for (i = 0; i < MAX_MMIOSEGS; i++) {
410 			seg = &ppt->mmio[i];
411 			if (seg->len == 0) {
412 				error = vm_map_mmio(vm, gpa, len, hpa);
413 				if (error == 0) {
414 					seg->gpa = gpa;
415 					seg->len = len;
416 				}
417 				return (error);
418 			}
419 		}
420 		return (ENOSPC);
421 	}
422 	return (ENOENT);
423 }
424 
425 static int
426 pptintr(void *arg)
427 {
428 	struct pptdev *ppt;
429 	struct pptintr_arg *pptarg;
430 
431 	pptarg = arg;
432 	ppt = pptarg->pptdev;
433 
434 	if (ppt->vm != NULL)
435 		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
436 	else {
437 		/*
438 		 * XXX
439 		 * This is not expected to happen - panic?
440 		 */
441 	}
442 
443 	/*
444 	 * For legacy interrupts give other filters a chance in case
445 	 * the interrupt was not generated by the passthrough device.
446 	 */
447 	if (ppt->msi.startrid == 0)
448 		return (FILTER_STRAY);
449 	else
450 		return (FILTER_HANDLED);
451 }
452 
453 int
454 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
455 	      uint64_t addr, uint64_t msg, int numvec)
456 {
457 	int i, rid, flags;
458 	int msi_count, startrid, error, tmp;
459 	struct pptdev *ppt;
460 
461 	if (numvec < 0 || numvec > MAX_MSIMSGS)
462 		return (EINVAL);
463 
464 	ppt = ppt_find(bus, slot, func);
465 	if (ppt == NULL)
466 		return (ENOENT);
467 	if (ppt->vm != vm)		/* Make sure we own this device */
468 		return (EBUSY);
469 
470 	/* Free any allocated resources */
471 	ppt_teardown_msi(ppt);
472 
473 	if (numvec == 0)		/* nothing more to do */
474 		return (0);
475 
476 	flags = RF_ACTIVE;
477 	msi_count = pci_msi_count(ppt->dev);
478 	if (msi_count == 0) {
479 		startrid = 0;		/* legacy interrupt */
480 		msi_count = 1;
481 		flags |= RF_SHAREABLE;
482 	} else
483 		startrid = 1;		/* MSI */
484 
485 	/*
486 	 * The device must be capable of supporting the number of vectors
487 	 * the guest wants to allocate.
488 	 */
489 	if (numvec > msi_count)
490 		return (EINVAL);
491 
492 	/*
493 	 * Make sure that we can allocate all the MSI vectors that are needed
494 	 * by the guest.
495 	 */
496 	if (startrid == 1) {
497 		tmp = numvec;
498 		error = pci_alloc_msi(ppt->dev, &tmp);
499 		if (error)
500 			return (error);
501 		else if (tmp != numvec) {
502 			pci_release_msi(ppt->dev);
503 			return (ENOSPC);
504 		} else {
505 			/* success */
506 		}
507 	}
508 
509 	ppt->msi.startrid = startrid;
510 
511 	/*
512 	 * Allocate the irq resource and attach it to the interrupt handler.
513 	 */
514 	for (i = 0; i < numvec; i++) {
515 		ppt->msi.num_msgs = i + 1;
516 		ppt->msi.cookie[i] = NULL;
517 
518 		rid = startrid + i;
519 		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
520 							 &rid, flags);
521 		if (ppt->msi.res[i] == NULL)
522 			break;
523 
524 		ppt->msi.arg[i].pptdev = ppt;
525 		ppt->msi.arg[i].addr = addr;
526 		ppt->msi.arg[i].msg_data = msg + i;
527 
528 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
529 				       INTR_TYPE_NET | INTR_MPSAFE,
530 				       pptintr, NULL, &ppt->msi.arg[i],
531 				       &ppt->msi.cookie[i]);
532 		if (error != 0)
533 			break;
534 	}
535 
536 	if (i < numvec) {
537 		ppt_teardown_msi(ppt);
538 		return (ENXIO);
539 	}
540 
541 	return (0);
542 }
543 
544 int
545 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
546 	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
547 {
548 	struct pptdev *ppt;
549 	struct pci_devinfo *dinfo;
550 	int numvec, alloced, rid, error;
551 	size_t res_size, cookie_size, arg_size;
552 
553 	ppt = ppt_find(bus, slot, func);
554 	if (ppt == NULL)
555 		return (ENOENT);
556 	if (ppt->vm != vm)		/* Make sure we own this device */
557 		return (EBUSY);
558 
559 	dinfo = device_get_ivars(ppt->dev);
560 	if (!dinfo)
561 		return (ENXIO);
562 
563 	/*
564 	 * First-time configuration:
565 	 * 	Allocate the MSI-X table
566 	 *	Allocate the IRQ resources
567 	 *	Set up some variables in ppt->msix
568 	 */
569 	if (ppt->msix.num_msgs == 0) {
570 		numvec = pci_msix_count(ppt->dev);
571 		if (numvec <= 0)
572 			return (EINVAL);
573 
574 		ppt->msix.startrid = 1;
575 		ppt->msix.num_msgs = numvec;
576 
577 		res_size = numvec * sizeof(ppt->msix.res[0]);
578 		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
579 		arg_size = numvec * sizeof(ppt->msix.arg[0]);
580 
581 		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
582 		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
583 					  M_WAITOK | M_ZERO);
584 		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
585 
586 		rid = dinfo->cfg.msix.msix_table_bar;
587 		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
588 					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
589 
590 		if (ppt->msix.msix_table_res == NULL) {
591 			ppt_teardown_msix(ppt);
592 			return (ENOSPC);
593 		}
594 		ppt->msix.msix_table_rid = rid;
595 
596 		alloced = numvec;
597 		error = pci_alloc_msix(ppt->dev, &alloced);
598 		if (error || alloced != numvec) {
599 			ppt_teardown_msix(ppt);
600 			return (error == 0 ? ENOSPC: error);
601 		}
602 	}
603 
604 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
605 		/* Tear down the IRQ if it's already set up */
606 		ppt_teardown_msix_intr(ppt, idx);
607 
608 		/* Allocate the IRQ resource */
609 		ppt->msix.cookie[idx] = NULL;
610 		rid = ppt->msix.startrid + idx;
611 		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
612 							    &rid, RF_ACTIVE);
613 		if (ppt->msix.res[idx] == NULL)
614 			return (ENXIO);
615 
616 		ppt->msix.arg[idx].pptdev = ppt;
617 		ppt->msix.arg[idx].addr = addr;
618 		ppt->msix.arg[idx].msg_data = msg;
619 
620 		/* Setup the MSI-X interrupt */
621 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
622 				       INTR_TYPE_NET | INTR_MPSAFE,
623 				       pptintr, NULL, &ppt->msix.arg[idx],
624 				       &ppt->msix.cookie[idx]);
625 
626 		if (error != 0) {
627 			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
628 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
629 			ppt->msix.cookie[idx] = NULL;
630 			ppt->msix.res[idx] = NULL;
631 			return (ENXIO);
632 		}
633 	} else {
634 		/* Masked, tear it down if it's already been set up */
635 		ppt_teardown_msix_intr(ppt, idx);
636 	}
637 
638 	return (0);
639 }
640