xref: /freebsd/sys/amd64/vmm/io/ppt.c (revision a18eacbefdfa1085ca3db829e86ece78cd416493)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/pciio.h>
39 #include <sys/rman.h>
40 #include <sys/smp.h>
41 
42 #include <dev/pci/pcivar.h>
43 #include <dev/pci/pcireg.h>
44 
45 #include <machine/resource.h>
46 
47 #include <machine/vmm.h>
48 #include <machine/vmm_dev.h>
49 
50 #include "vmm_lapic.h"
51 #include "vmm_ktr.h"
52 
53 #include "iommu.h"
54 #include "ppt.h"
55 
56 /* XXX locking */
57 
58 #define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
59 #define	MAX_MSIMSGS	32
60 
61 /*
62  * If the MSI-X table is located in the middle of a BAR then that MMIO
63  * region gets split into two segments - one segment above the MSI-X table
64  * and the other segment below the MSI-X table - with a hole in place of
65  * the MSI-X table so accesses to it can be trapped and emulated.
66  *
67  * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68  */
69 #define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
70 
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72 
73 struct pptintr_arg {				/* pptintr(pptintr_arg) */
74 	struct pptdev	*pptdev;
75 	int		vec;
76 	int 		vcpu;
77 };
78 
79 static struct pptdev {
80 	device_t	dev;
81 	struct vm	*vm;			/* owner of this device */
82 	struct vm_memory_segment mmio[MAX_MMIOSEGS];
83 	struct {
84 		int	num_msgs;		/* guest state */
85 
86 		int	startrid;		/* host state */
87 		struct resource *res[MAX_MSIMSGS];
88 		void	*cookie[MAX_MSIMSGS];
89 		struct pptintr_arg arg[MAX_MSIMSGS];
90 	} msi;
91 
92 	struct {
93 		int num_msgs;
94 		int startrid;
95 		int msix_table_rid;
96 		struct resource *msix_table_res;
97 		struct resource **res;
98 		void **cookie;
99 		struct pptintr_arg *arg;
100 	} msix;
101 } pptdevs[64];
102 
103 static int num_pptdevs;
104 
105 static int
106 ppt_probe(device_t dev)
107 {
108 	int bus, slot, func;
109 	struct pci_devinfo *dinfo;
110 
111 	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
112 
113 	bus = pci_get_bus(dev);
114 	slot = pci_get_slot(dev);
115 	func = pci_get_function(dev);
116 
117 	/*
118 	 * To qualify as a pci passthrough device a device must:
119 	 * - be allowed by administrator to be used in this role
120 	 * - be an endpoint device
121 	 */
122 	if (vmm_is_pptdev(bus, slot, func) &&
123 	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
124 		return (0);
125 	else
126 		return (ENXIO);
127 }
128 
129 static int
130 ppt_attach(device_t dev)
131 {
132 	int n;
133 
134 	if (num_pptdevs >= MAX_PPTDEVS) {
135 		printf("ppt_attach: maximum number of pci passthrough devices "
136 		       "exceeded\n");
137 		return (ENXIO);
138 	}
139 
140 	n = num_pptdevs++;
141 	pptdevs[n].dev = dev;
142 
143 	if (bootverbose)
144 		device_printf(dev, "attached\n");
145 
146 	return (0);
147 }
148 
149 static int
150 ppt_detach(device_t dev)
151 {
152 	/*
153 	 * XXX check whether there are any pci passthrough devices assigned
154 	 * to guests before we allow this driver to detach.
155 	 */
156 
157 	return (0);
158 }
159 
160 static device_method_t ppt_methods[] = {
161 	/* Device interface */
162 	DEVMETHOD(device_probe,		ppt_probe),
163 	DEVMETHOD(device_attach,	ppt_attach),
164 	DEVMETHOD(device_detach,	ppt_detach),
165 	{0, 0}
166 };
167 
168 static devclass_t ppt_devclass;
169 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
170 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
171 
172 static struct pptdev *
173 ppt_find(int bus, int slot, int func)
174 {
175 	device_t dev;
176 	int i, b, s, f;
177 
178 	for (i = 0; i < num_pptdevs; i++) {
179 		dev = pptdevs[i].dev;
180 		b = pci_get_bus(dev);
181 		s = pci_get_slot(dev);
182 		f = pci_get_function(dev);
183 		if (bus == b && slot == s && func == f)
184 			return (&pptdevs[i]);
185 	}
186 	return (NULL);
187 }
188 
189 static void
190 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
191 {
192 	int i;
193 	struct vm_memory_segment *seg;
194 
195 	for (i = 0; i < MAX_MMIOSEGS; i++) {
196 		seg = &ppt->mmio[i];
197 		if (seg->len == 0)
198 			continue;
199 		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
200 		bzero(seg, sizeof(struct vm_memory_segment));
201 	}
202 }
203 
204 static void
205 ppt_teardown_msi(struct pptdev *ppt)
206 {
207 	int i, rid;
208 	void *cookie;
209 	struct resource *res;
210 
211 	if (ppt->msi.num_msgs == 0)
212 		return;
213 
214 	for (i = 0; i < ppt->msi.num_msgs; i++) {
215 		rid = ppt->msi.startrid + i;
216 		res = ppt->msi.res[i];
217 		cookie = ppt->msi.cookie[i];
218 
219 		if (cookie != NULL)
220 			bus_teardown_intr(ppt->dev, res, cookie);
221 
222 		if (res != NULL)
223 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
224 
225 		ppt->msi.res[i] = NULL;
226 		ppt->msi.cookie[i] = NULL;
227 	}
228 
229 	if (ppt->msi.startrid == 1)
230 		pci_release_msi(ppt->dev);
231 
232 	ppt->msi.num_msgs = 0;
233 }
234 
235 static void
236 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
237 {
238 	int rid;
239 	struct resource *res;
240 	void *cookie;
241 
242 	rid = ppt->msix.startrid + idx;
243 	res = ppt->msix.res[idx];
244 	cookie = ppt->msix.cookie[idx];
245 
246 	if (cookie != NULL)
247 		bus_teardown_intr(ppt->dev, res, cookie);
248 
249 	if (res != NULL)
250 		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
251 
252 	ppt->msix.res[idx] = NULL;
253 	ppt->msix.cookie[idx] = NULL;
254 }
255 
256 static void
257 ppt_teardown_msix(struct pptdev *ppt)
258 {
259 	int i;
260 
261 	if (ppt->msix.num_msgs == 0)
262 		return;
263 
264 	for (i = 0; i < ppt->msix.num_msgs; i++)
265 		ppt_teardown_msix_intr(ppt, i);
266 
267 	if (ppt->msix.msix_table_res) {
268 		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
269 				     ppt->msix.msix_table_rid,
270 				     ppt->msix.msix_table_res);
271 		ppt->msix.msix_table_res = NULL;
272 		ppt->msix.msix_table_rid = 0;
273 	}
274 
275 	free(ppt->msix.res, M_PPTMSIX);
276 	free(ppt->msix.cookie, M_PPTMSIX);
277 	free(ppt->msix.arg, M_PPTMSIX);
278 
279 	pci_release_msi(ppt->dev);
280 
281 	ppt->msix.num_msgs = 0;
282 }
283 
284 int
285 ppt_num_devices(struct vm *vm)
286 {
287 	int i, num;
288 
289 	num = 0;
290 	for (i = 0; i < num_pptdevs; i++) {
291 		if (pptdevs[i].vm == vm)
292 			num++;
293 	}
294 	return (num);
295 }
296 
297 boolean_t
298 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
299 {
300 	int i, n;
301 	struct pptdev *ppt;
302 	struct vm_memory_segment *seg;
303 
304 	for (n = 0; n < num_pptdevs; n++) {
305 		ppt = &pptdevs[n];
306 		if (ppt->vm != vm)
307 			continue;
308 
309 		for (i = 0; i < MAX_MMIOSEGS; i++) {
310 			seg = &ppt->mmio[i];
311 			if (seg->len == 0)
312 				continue;
313 			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
314 				return (TRUE);
315 		}
316 	}
317 
318 	return (FALSE);
319 }
320 
321 int
322 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
323 {
324 	struct pptdev *ppt;
325 
326 	ppt = ppt_find(bus, slot, func);
327 	if (ppt != NULL) {
328 		/*
329 		 * If this device is owned by a different VM then we
330 		 * cannot change its owner.
331 		 */
332 		if (ppt->vm != NULL && ppt->vm != vm)
333 			return (EBUSY);
334 
335 		ppt->vm = vm;
336 		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
337 		return (0);
338 	}
339 	return (ENOENT);
340 }
341 
342 int
343 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
344 {
345 	struct pptdev *ppt;
346 
347 	ppt = ppt_find(bus, slot, func);
348 	if (ppt != NULL) {
349 		/*
350 		 * If this device is not owned by this 'vm' then bail out.
351 		 */
352 		if (ppt->vm != vm)
353 			return (EBUSY);
354 		ppt_unmap_mmio(vm, ppt);
355 		ppt_teardown_msi(ppt);
356 		ppt_teardown_msix(ppt);
357 		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
358 		ppt->vm = NULL;
359 		return (0);
360 	}
361 	return (ENOENT);
362 }
363 
364 int
365 ppt_unassign_all(struct vm *vm)
366 {
367 	int i, bus, slot, func;
368 	device_t dev;
369 
370 	for (i = 0; i < num_pptdevs; i++) {
371 		if (pptdevs[i].vm == vm) {
372 			dev = pptdevs[i].dev;
373 			bus = pci_get_bus(dev);
374 			slot = pci_get_slot(dev);
375 			func = pci_get_function(dev);
376 			vm_unassign_pptdev(vm, bus, slot, func);
377 		}
378 	}
379 
380 	return (0);
381 }
382 
383 int
384 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
385 	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
386 {
387 	int i, error;
388 	struct vm_memory_segment *seg;
389 	struct pptdev *ppt;
390 
391 	ppt = ppt_find(bus, slot, func);
392 	if (ppt != NULL) {
393 		if (ppt->vm != vm)
394 			return (EBUSY);
395 
396 		for (i = 0; i < MAX_MMIOSEGS; i++) {
397 			seg = &ppt->mmio[i];
398 			if (seg->len == 0) {
399 				error = vm_map_mmio(vm, gpa, len, hpa);
400 				if (error == 0) {
401 					seg->gpa = gpa;
402 					seg->len = len;
403 				}
404 				return (error);
405 			}
406 		}
407 		return (ENOSPC);
408 	}
409 	return (ENOENT);
410 }
411 
412 static int
413 pptintr(void *arg)
414 {
415 	int vec;
416 	struct pptdev *ppt;
417 	struct pptintr_arg *pptarg;
418 
419 	pptarg = arg;
420 	ppt = pptarg->pptdev;
421 	vec = pptarg->vec;
422 
423 	if (ppt->vm != NULL)
424 		(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
425 	else {
426 		/*
427 		 * XXX
428 		 * This is not expected to happen - panic?
429 		 */
430 	}
431 
432 	/*
433 	 * For legacy interrupts give other filters a chance in case
434 	 * the interrupt was not generated by the passthrough device.
435 	 */
436 	if (ppt->msi.startrid == 0)
437 		return (FILTER_STRAY);
438 	else
439 		return (FILTER_HANDLED);
440 }
441 
442 int
443 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
444 	      int destcpu, int vector, int numvec)
445 {
446 	int i, rid, flags;
447 	int msi_count, startrid, error, tmp;
448 	struct pptdev *ppt;
449 
450 	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
451 	    (vector < 0 || vector > 255) ||
452 	    (numvec < 0 || numvec > MAX_MSIMSGS))
453 		return (EINVAL);
454 
455 	ppt = ppt_find(bus, slot, func);
456 	if (ppt == NULL)
457 		return (ENOENT);
458 	if (ppt->vm != vm)		/* Make sure we own this device */
459 		return (EBUSY);
460 
461 	/* Free any allocated resources */
462 	ppt_teardown_msi(ppt);
463 
464 	if (numvec == 0)		/* nothing more to do */
465 		return (0);
466 
467 	flags = RF_ACTIVE;
468 	msi_count = pci_msi_count(ppt->dev);
469 	if (msi_count == 0) {
470 		startrid = 0;		/* legacy interrupt */
471 		msi_count = 1;
472 		flags |= RF_SHAREABLE;
473 	} else
474 		startrid = 1;		/* MSI */
475 
476 	/*
477 	 * The device must be capable of supporting the number of vectors
478 	 * the guest wants to allocate.
479 	 */
480 	if (numvec > msi_count)
481 		return (EINVAL);
482 
483 	/*
484 	 * Make sure that we can allocate all the MSI vectors that are needed
485 	 * by the guest.
486 	 */
487 	if (startrid == 1) {
488 		tmp = numvec;
489 		error = pci_alloc_msi(ppt->dev, &tmp);
490 		if (error)
491 			return (error);
492 		else if (tmp != numvec) {
493 			pci_release_msi(ppt->dev);
494 			return (ENOSPC);
495 		} else {
496 			/* success */
497 		}
498 	}
499 
500 	ppt->msi.startrid = startrid;
501 
502 	/*
503 	 * Allocate the irq resource and attach it to the interrupt handler.
504 	 */
505 	for (i = 0; i < numvec; i++) {
506 		ppt->msi.num_msgs = i + 1;
507 		ppt->msi.cookie[i] = NULL;
508 
509 		rid = startrid + i;
510 		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
511 							 &rid, flags);
512 		if (ppt->msi.res[i] == NULL)
513 			break;
514 
515 		ppt->msi.arg[i].pptdev = ppt;
516 		ppt->msi.arg[i].vec = vector + i;
517 		ppt->msi.arg[i].vcpu = destcpu;
518 
519 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
520 				       INTR_TYPE_NET | INTR_MPSAFE,
521 				       pptintr, NULL, &ppt->msi.arg[i],
522 				       &ppt->msi.cookie[i]);
523 		if (error != 0)
524 			break;
525 	}
526 
527 	if (i < numvec) {
528 		ppt_teardown_msi(ppt);
529 		return (ENXIO);
530 	}
531 
532 	return (0);
533 }
534 
535 int
536 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
537 	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
538 {
539 	struct pptdev *ppt;
540 	struct pci_devinfo *dinfo;
541 	int numvec, alloced, rid, error;
542 	size_t res_size, cookie_size, arg_size;
543 
544 	ppt = ppt_find(bus, slot, func);
545 	if (ppt == NULL)
546 		return (ENOENT);
547 	if (ppt->vm != vm)		/* Make sure we own this device */
548 		return (EBUSY);
549 
550 	dinfo = device_get_ivars(ppt->dev);
551 	if (!dinfo)
552 		return (ENXIO);
553 
554 	/*
555 	 * First-time configuration:
556 	 * 	Allocate the MSI-X table
557 	 *	Allocate the IRQ resources
558 	 *	Set up some variables in ppt->msix
559 	 */
560 	if (ppt->msix.num_msgs == 0) {
561 		numvec = pci_msix_count(ppt->dev);
562 		if (numvec <= 0)
563 			return (EINVAL);
564 
565 		ppt->msix.startrid = 1;
566 		ppt->msix.num_msgs = numvec;
567 
568 		res_size = numvec * sizeof(ppt->msix.res[0]);
569 		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
570 		arg_size = numvec * sizeof(ppt->msix.arg[0]);
571 
572 		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
573 		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
574 					  M_WAITOK | M_ZERO);
575 		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
576 
577 		rid = dinfo->cfg.msix.msix_table_bar;
578 		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
579 					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
580 
581 		if (ppt->msix.msix_table_res == NULL) {
582 			ppt_teardown_msix(ppt);
583 			return (ENOSPC);
584 		}
585 		ppt->msix.msix_table_rid = rid;
586 
587 		alloced = numvec;
588 		error = pci_alloc_msix(ppt->dev, &alloced);
589 		if (error || alloced != numvec) {
590 			ppt_teardown_msix(ppt);
591 			return (error == 0 ? ENOSPC: error);
592 		}
593 	}
594 
595 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
596 		/* Tear down the IRQ if it's already set up */
597 		ppt_teardown_msix_intr(ppt, idx);
598 
599 		/* Allocate the IRQ resource */
600 		ppt->msix.cookie[idx] = NULL;
601 		rid = ppt->msix.startrid + idx;
602 		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
603 							    &rid, RF_ACTIVE);
604 		if (ppt->msix.res[idx] == NULL)
605 			return (ENXIO);
606 
607 		ppt->msix.arg[idx].pptdev = ppt;
608 		ppt->msix.arg[idx].vec = msg & 0xFF;
609 		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
610 
611 		/* Setup the MSI-X interrupt */
612 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
613 				       INTR_TYPE_NET | INTR_MPSAFE,
614 				       pptintr, NULL, &ppt->msix.arg[idx],
615 				       &ppt->msix.cookie[idx]);
616 
617 		if (error != 0) {
618 			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
619 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
620 			ppt->msix.cookie[idx] = NULL;
621 			ppt->msix.res[idx] = NULL;
622 			return (ENXIO);
623 		}
624 	} else {
625 		/* Masked, tear it down if it's already been set up */
626 		ppt_teardown_msix_intr(ppt, idx);
627 	}
628 
629 	return (0);
630 }
631