xref: /freebsd/sys/amd64/vmm/io/ppt.c (revision 9005607c8fa7317a759f1fc16cae4738f9a2fbb3)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/pciio.h>
39 #include <sys/rman.h>
40 #include <sys/smp.h>
41 
42 #include <dev/pci/pcivar.h>
43 #include <dev/pci/pcireg.h>
44 
45 #include <machine/resource.h>
46 
47 #include <machine/vmm.h>
48 #include <machine/vmm_dev.h>
49 
50 #include "vmm_lapic.h"
51 #include "vmm_ktr.h"
52 
53 #include "iommu.h"
54 #include "ppt.h"
55 
56 /* XXX locking */
57 
58 #define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
59 #define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
60 #define	MAX_MSIMSGS	32
61 
62 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
63 
64 struct pptintr_arg {				/* pptintr(pptintr_arg) */
65 	struct pptdev	*pptdev;
66 	int		vec;
67 	int 		vcpu;
68 };
69 
70 static struct pptdev {
71 	device_t	dev;
72 	struct vm	*vm;			/* owner of this device */
73 	struct vm_memory_segment mmio[MAX_MMIOSEGS];
74 	struct {
75 		int	num_msgs;		/* guest state */
76 
77 		int	startrid;		/* host state */
78 		struct resource *res[MAX_MSIMSGS];
79 		void	*cookie[MAX_MSIMSGS];
80 		struct pptintr_arg arg[MAX_MSIMSGS];
81 	} msi;
82 
83 	struct {
84 		int num_msgs;
85 		int startrid;
86 		int msix_table_rid;
87 		struct resource *msix_table_res;
88 		struct resource **res;
89 		void **cookie;
90 		struct pptintr_arg *arg;
91 	} msix;
92 } pptdevs[32];
93 
94 static int num_pptdevs;
95 
96 static int
97 ppt_probe(device_t dev)
98 {
99 	int bus, slot, func;
100 	struct pci_devinfo *dinfo;
101 
102 	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
103 
104 	bus = pci_get_bus(dev);
105 	slot = pci_get_slot(dev);
106 	func = pci_get_function(dev);
107 
108 	/*
109 	 * To qualify as a pci passthrough device a device must:
110 	 * - be allowed by administrator to be used in this role
111 	 * - be an endpoint device
112 	 */
113 	if (vmm_is_pptdev(bus, slot, func) &&
114 	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
115 		return (0);
116 	else
117 		return (ENXIO);
118 }
119 
120 static int
121 ppt_attach(device_t dev)
122 {
123 	int n;
124 
125 	if (num_pptdevs >= MAX_PPTDEVS) {
126 		printf("ppt_attach: maximum number of pci passthrough devices "
127 		       "exceeded\n");
128 		return (ENXIO);
129 	}
130 
131 	n = num_pptdevs++;
132 	pptdevs[n].dev = dev;
133 
134 	if (bootverbose)
135 		device_printf(dev, "attached\n");
136 
137 	return (0);
138 }
139 
140 static int
141 ppt_detach(device_t dev)
142 {
143 	/*
144 	 * XXX check whether there are any pci passthrough devices assigned
145 	 * to guests before we allow this driver to detach.
146 	 */
147 
148 	return (0);
149 }
150 
151 static device_method_t ppt_methods[] = {
152 	/* Device interface */
153 	DEVMETHOD(device_probe,		ppt_probe),
154 	DEVMETHOD(device_attach,	ppt_attach),
155 	DEVMETHOD(device_detach,	ppt_detach),
156 	{0, 0}
157 };
158 
159 static devclass_t ppt_devclass;
160 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
161 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
162 
163 static struct pptdev *
164 ppt_find(int bus, int slot, int func)
165 {
166 	device_t dev;
167 	int i, b, s, f;
168 
169 	for (i = 0; i < num_pptdevs; i++) {
170 		dev = pptdevs[i].dev;
171 		b = pci_get_bus(dev);
172 		s = pci_get_slot(dev);
173 		f = pci_get_function(dev);
174 		if (bus == b && slot == s && func == f)
175 			return (&pptdevs[i]);
176 	}
177 	return (NULL);
178 }
179 
180 static void
181 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
182 {
183 	int i;
184 	struct vm_memory_segment *seg;
185 
186 	for (i = 0; i < MAX_MMIOSEGS; i++) {
187 		seg = &ppt->mmio[i];
188 		if (seg->len == 0)
189 			continue;
190 		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
191 		bzero(seg, sizeof(struct vm_memory_segment));
192 	}
193 }
194 
195 static void
196 ppt_teardown_msi(struct pptdev *ppt)
197 {
198 	int i, rid;
199 	void *cookie;
200 	struct resource *res;
201 
202 	if (ppt->msi.num_msgs == 0)
203 		return;
204 
205 	for (i = 0; i < ppt->msi.num_msgs; i++) {
206 		rid = ppt->msi.startrid + i;
207 		res = ppt->msi.res[i];
208 		cookie = ppt->msi.cookie[i];
209 
210 		if (cookie != NULL)
211 			bus_teardown_intr(ppt->dev, res, cookie);
212 
213 		if (res != NULL)
214 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
215 
216 		ppt->msi.res[i] = NULL;
217 		ppt->msi.cookie[i] = NULL;
218 	}
219 
220 	if (ppt->msi.startrid == 1)
221 		pci_release_msi(ppt->dev);
222 
223 	ppt->msi.num_msgs = 0;
224 }
225 
226 static void
227 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
228 {
229 	int rid;
230 	struct resource *res;
231 	void *cookie;
232 
233 	rid = ppt->msix.startrid + idx;
234 	res = ppt->msix.res[idx];
235 	cookie = ppt->msix.cookie[idx];
236 
237 	if (cookie != NULL)
238 		bus_teardown_intr(ppt->dev, res, cookie);
239 
240 	if (res != NULL)
241 		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
242 
243 	ppt->msix.res[idx] = NULL;
244 	ppt->msix.cookie[idx] = NULL;
245 }
246 
247 static void
248 ppt_teardown_msix(struct pptdev *ppt)
249 {
250 	int i;
251 
252 	if (ppt->msix.num_msgs == 0)
253 		return;
254 
255 	for (i = 0; i < ppt->msix.num_msgs; i++)
256 		ppt_teardown_msix_intr(ppt, i);
257 
258 	if (ppt->msix.msix_table_res) {
259 		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
260 				     ppt->msix.msix_table_rid,
261 				     ppt->msix.msix_table_res);
262 		ppt->msix.msix_table_res = NULL;
263 		ppt->msix.msix_table_rid = 0;
264 	}
265 
266 	free(ppt->msix.res, M_PPTMSIX);
267 	free(ppt->msix.cookie, M_PPTMSIX);
268 	free(ppt->msix.arg, M_PPTMSIX);
269 
270 	pci_release_msi(ppt->dev);
271 
272 	ppt->msix.num_msgs = 0;
273 }
274 
275 int
276 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
277 {
278 	struct pptdev *ppt;
279 
280 	ppt = ppt_find(bus, slot, func);
281 	if (ppt != NULL) {
282 		/*
283 		 * If this device is owned by a different VM then we
284 		 * cannot change its owner.
285 		 */
286 		if (ppt->vm != NULL && ppt->vm != vm)
287 			return (EBUSY);
288 
289 		ppt->vm = vm;
290 		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
291 		return (0);
292 	}
293 	return (ENOENT);
294 }
295 
296 int
297 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
298 {
299 	struct pptdev *ppt;
300 
301 	ppt = ppt_find(bus, slot, func);
302 	if (ppt != NULL) {
303 		/*
304 		 * If this device is not owned by this 'vm' then bail out.
305 		 */
306 		if (ppt->vm != vm)
307 			return (EBUSY);
308 		ppt_unmap_mmio(vm, ppt);
309 		ppt_teardown_msi(ppt);
310 		ppt_teardown_msix(ppt);
311 		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
312 		ppt->vm = NULL;
313 		return (0);
314 	}
315 	return (ENOENT);
316 }
317 
318 int
319 ppt_unassign_all(struct vm *vm)
320 {
321 	int i, bus, slot, func;
322 	device_t dev;
323 
324 	for (i = 0; i < num_pptdevs; i++) {
325 		if (pptdevs[i].vm == vm) {
326 			dev = pptdevs[i].dev;
327 			bus = pci_get_bus(dev);
328 			slot = pci_get_slot(dev);
329 			func = pci_get_function(dev);
330 			ppt_unassign_device(vm, bus, slot, func);
331 		}
332 	}
333 
334 	return (0);
335 }
336 
337 int
338 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
339 	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
340 {
341 	int i, error;
342 	struct vm_memory_segment *seg;
343 	struct pptdev *ppt;
344 
345 	ppt = ppt_find(bus, slot, func);
346 	if (ppt != NULL) {
347 		if (ppt->vm != vm)
348 			return (EBUSY);
349 
350 		for (i = 0; i < MAX_MMIOSEGS; i++) {
351 			seg = &ppt->mmio[i];
352 			if (seg->len == 0) {
353 				error = vm_map_mmio(vm, gpa, len, hpa);
354 				if (error == 0) {
355 					seg->gpa = gpa;
356 					seg->len = len;
357 				}
358 				return (error);
359 			}
360 		}
361 		return (ENOSPC);
362 	}
363 	return (ENOENT);
364 }
365 
366 static int
367 pptintr(void *arg)
368 {
369 	int vec;
370 	struct pptdev *ppt;
371 	struct pptintr_arg *pptarg;
372 
373 	pptarg = arg;
374 	ppt = pptarg->pptdev;
375 	vec = pptarg->vec;
376 
377 	if (ppt->vm != NULL)
378 		(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
379 	else {
380 		/*
381 		 * XXX
382 		 * This is not expected to happen - panic?
383 		 */
384 	}
385 
386 	/*
387 	 * For legacy interrupts give other filters a chance in case
388 	 * the interrupt was not generated by the passthrough device.
389 	 */
390 	if (ppt->msi.startrid == 0)
391 		return (FILTER_STRAY);
392 	else
393 		return (FILTER_HANDLED);
394 }
395 
396 /*
397  * XXX
398  * When we try to free the MSI resource the kernel will bind the thread to
399  * the host cpu was originally handling the MSI. The function freeing the
400  * MSI vector (apic_free_vector()) will panic the kernel if the thread
401  * is already bound to a cpu.
402  *
403  * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
404  */
405 static void
406 PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
407 {
408 	int pincpu = -1;
409 
410 	vm_get_pinning(vm, vcpu, &pincpu);
411 
412 	if (pincpu >= 0)
413 		vm_set_pinning(vm, vcpu, -1);
414 
415 	ppt_teardown_msi(ppt);
416 
417 	if (pincpu >= 0)
418 		vm_set_pinning(vm, vcpu, pincpu);
419 }
420 
421 int
422 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
423 	      int destcpu, int vector, int numvec)
424 {
425 	int i, rid, flags;
426 	int msi_count, startrid, error, tmp;
427 	struct pptdev *ppt;
428 
429 	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
430 	    (vector < 0 || vector > 255) ||
431 	    (numvec < 0 || numvec > MAX_MSIMSGS))
432 		return (EINVAL);
433 
434 	ppt = ppt_find(bus, slot, func);
435 	if (ppt == NULL)
436 		return (ENOENT);
437 	if (ppt->vm != vm)		/* Make sure we own this device */
438 		return (EBUSY);
439 
440 	/* Free any allocated resources */
441 	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
442 
443 	if (numvec == 0)		/* nothing more to do */
444 		return (0);
445 
446 	flags = RF_ACTIVE;
447 	msi_count = pci_msi_count(ppt->dev);
448 	if (msi_count == 0) {
449 		startrid = 0;		/* legacy interrupt */
450 		msi_count = 1;
451 		flags |= RF_SHAREABLE;
452 	} else
453 		startrid = 1;		/* MSI */
454 
455 	/*
456 	 * The device must be capable of supporting the number of vectors
457 	 * the guest wants to allocate.
458 	 */
459 	if (numvec > msi_count)
460 		return (EINVAL);
461 
462 	/*
463 	 * Make sure that we can allocate all the MSI vectors that are needed
464 	 * by the guest.
465 	 */
466 	if (startrid == 1) {
467 		tmp = numvec;
468 		error = pci_alloc_msi(ppt->dev, &tmp);
469 		if (error)
470 			return (error);
471 		else if (tmp != numvec) {
472 			pci_release_msi(ppt->dev);
473 			return (ENOSPC);
474 		} else {
475 			/* success */
476 		}
477 	}
478 
479 	ppt->msi.startrid = startrid;
480 
481 	/*
482 	 * Allocate the irq resource and attach it to the interrupt handler.
483 	 */
484 	for (i = 0; i < numvec; i++) {
485 		ppt->msi.num_msgs = i + 1;
486 		ppt->msi.cookie[i] = NULL;
487 
488 		rid = startrid + i;
489 		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
490 							 &rid, flags);
491 		if (ppt->msi.res[i] == NULL)
492 			break;
493 
494 		ppt->msi.arg[i].pptdev = ppt;
495 		ppt->msi.arg[i].vec = vector + i;
496 		ppt->msi.arg[i].vcpu = destcpu;
497 
498 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
499 				       INTR_TYPE_NET | INTR_MPSAFE,
500 				       pptintr, NULL, &ppt->msi.arg[i],
501 				       &ppt->msi.cookie[i]);
502 		if (error != 0)
503 			break;
504 	}
505 
506 	if (i < numvec) {
507 		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
508 		return (ENXIO);
509 	}
510 
511 	return (0);
512 }
513 
514 int
515 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
516 	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
517 {
518 	struct pptdev *ppt;
519 	struct pci_devinfo *dinfo;
520 	int numvec, alloced, rid, error;
521 	size_t res_size, cookie_size, arg_size;
522 
523 	ppt = ppt_find(bus, slot, func);
524 	if (ppt == NULL)
525 		return (ENOENT);
526 	if (ppt->vm != vm)		/* Make sure we own this device */
527 		return (EBUSY);
528 
529 	dinfo = device_get_ivars(ppt->dev);
530 	if (!dinfo)
531 		return (ENXIO);
532 
533 	/*
534 	 * First-time configuration:
535 	 * 	Allocate the MSI-X table
536 	 *	Allocate the IRQ resources
537 	 *	Set up some variables in ppt->msix
538 	 */
539 	if (ppt->msix.num_msgs == 0) {
540 		numvec = pci_msix_count(ppt->dev);
541 		if (numvec <= 0)
542 			return (EINVAL);
543 
544 		ppt->msix.startrid = 1;
545 		ppt->msix.num_msgs = numvec;
546 
547 		res_size = numvec * sizeof(ppt->msix.res[0]);
548 		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
549 		arg_size = numvec * sizeof(ppt->msix.arg[0]);
550 
551 		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
552 		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
553 					  M_WAITOK | M_ZERO);
554 		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
555 
556 		rid = dinfo->cfg.msix.msix_table_bar;
557 		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
558 					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
559 
560 		if (ppt->msix.msix_table_res == NULL) {
561 			ppt_teardown_msix(ppt);
562 			return (ENOSPC);
563 		}
564 		ppt->msix.msix_table_rid = rid;
565 
566 		alloced = numvec;
567 		error = pci_alloc_msix(ppt->dev, &alloced);
568 		if (error || alloced != numvec) {
569 			ppt_teardown_msix(ppt);
570 			return (error == 0 ? ENOSPC: error);
571 		}
572 	}
573 
574 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
575 		/* Tear down the IRQ if it's already set up */
576 		ppt_teardown_msix_intr(ppt, idx);
577 
578 		/* Allocate the IRQ resource */
579 		ppt->msix.cookie[idx] = NULL;
580 		rid = ppt->msix.startrid + idx;
581 		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
582 							    &rid, RF_ACTIVE);
583 		if (ppt->msix.res[idx] == NULL)
584 			return (ENXIO);
585 
586 		ppt->msix.arg[idx].pptdev = ppt;
587 		ppt->msix.arg[idx].vec = msg;
588 		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
589 
590 		/* Setup the MSI-X interrupt */
591 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
592 				       INTR_TYPE_NET | INTR_MPSAFE,
593 				       pptintr, NULL, &ppt->msix.arg[idx],
594 				       &ppt->msix.cookie[idx]);
595 
596 		if (error != 0) {
597 			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
598 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
599 			ppt->msix.cookie[idx] = NULL;
600 			ppt->msix.res[idx] = NULL;
601 			return (ENXIO);
602 		}
603 	} else {
604 		/* Masked, tear it down if it's already been set up */
605 		ppt_teardown_msix_intr(ppt, idx);
606 	}
607 
608 	return (0);
609 }
610 
611