xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/ppt.c (revision fdad6fbf87b201fdb96a704fc41fa8be1e4efbc8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * This file and its contents are supplied under the terms of the
31  * Common Development and Distribution License ("CDDL"), version 1.0.
32  * You may only use this file in accordance with the terms of version
33  * 1.0 of the CDDL.
34  *
35  * A full copy of the text of the CDDL should have accompanied this
36  * source.  A copy of the CDDL is also available via the Internet at
37  * http://www.illumos.org/license/CDDL.
38  */
39 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
40 
41 /*
42  * Copyright 2019 Joyent, Inc.
43  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
44  */
45 
46 #include <sys/cdefs.h>
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/module.h>
53 #include <sys/bus.h>
54 #include <sys/pciio.h>
55 #include <sys/sysctl.h>
56 
57 #include <dev/pci/pcivar.h>
58 #include <dev/pci/pcireg.h>
59 
60 #include <machine/vmm.h>
61 #include <machine/vmm_dev.h>
62 
63 #include <sys/conf.h>
64 #include <sys/ddi.h>
65 #include <sys/stat.h>
66 #include <sys/sunddi.h>
67 #include <sys/pci.h>
68 #include <sys/pci_cap.h>
69 #include <sys/pcie_impl.h>
70 #include <sys/ppt_dev.h>
71 #include <sys/mkdev.h>
72 #include <sys/sysmacros.h>
73 
74 #include "vmm_lapic.h"
75 
76 #include "iommu.h"
77 #include "ppt.h"
78 
79 #define	MAX_MSIMSGS	32
80 
81 /*
82  * If the MSI-X table is located in the middle of a BAR then that MMIO
83  * region gets split into two segments - one segment above the MSI-X table
84  * and the other segment below the MSI-X table - with a hole in place of
85  * the MSI-X table so accesses to it can be trapped and emulated.
86  *
87  * So, allocate a MMIO segment for each BAR register + 1 additional segment.
88  */
89 #define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
90 
91 struct pptintr_arg {
92 	struct pptdev	*pptdev;
93 	uint64_t	addr;
94 	uint64_t	msg_data;
95 };
96 
97 struct pptseg {
98 	vm_paddr_t	gpa;
99 	size_t		len;
100 	int		wired;
101 };
102 
103 struct pptbar {
104 	uint64_t base;
105 	uint64_t size;
106 	uint_t type;
107 	ddi_acc_handle_t io_handle;
108 	caddr_t io_ptr;
109 	uint_t ddireg;
110 };
111 
112 struct pptdev {
113 	dev_info_t		*pptd_dip;
114 	list_node_t		pptd_node;
115 	ddi_acc_handle_t	pptd_cfg;
116 	struct pptbar		pptd_bars[PCI_BASE_NUM];
117 	struct vm		*vm;
118 	struct pptseg mmio[MAX_MMIOSEGS];
119 	struct {
120 		int	num_msgs;		/* guest state */
121 		boolean_t is_fixed;
122 		size_t	inth_sz;
123 		ddi_intr_handle_t *inth;
124 		struct pptintr_arg arg[MAX_MSIMSGS];
125 	} msi;
126 
127 	struct {
128 		int num_msgs;
129 		size_t inth_sz;
130 		size_t arg_sz;
131 		ddi_intr_handle_t *inth;
132 		struct pptintr_arg *arg;
133 	} msix;
134 };
135 
136 
137 static major_t		ppt_major;
138 static void		*ppt_state;
139 static kmutex_t		pptdev_mtx;
140 static list_t		pptdev_list;
141 
142 #define	PPT_MINOR_NAME	"ppt"
143 
144 static ddi_device_acc_attr_t ppt_attr = {
145 	DDI_DEVICE_ATTR_V0,
146 	DDI_NEVERSWAP_ACC,
147 	DDI_STORECACHING_OK_ACC,
148 	DDI_DEFAULT_ACC
149 };
150 
151 static int
ppt_open(dev_t * devp,int flag,int otyp,cred_t * cr)152 ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr)
153 {
154 	/* XXX: require extra privs? */
155 	return (0);
156 }
157 
158 #define	BAR_TO_IDX(bar)	(((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32)
159 #define	BAR_VALID(b)	(			\
160 		(b) >= PCI_CONF_BASE0 &&	\
161 		(b) <= PCI_CONF_BASE5 &&	\
162 		((b) & (PCI_BAR_SZ_32-1)) == 0)
163 
164 static int
ppt_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)165 ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
166 {
167 	minor_t minor = getminor(dev);
168 	struct pptdev *ppt;
169 	void *data = (void *)arg;
170 
171 	if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) {
172 		return (ENOENT);
173 	}
174 
175 	switch (cmd) {
176 	case PPT_CFG_READ: {
177 		struct ppt_cfg_io cio;
178 		ddi_acc_handle_t cfg = ppt->pptd_cfg;
179 
180 		if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
181 			return (EFAULT);
182 		}
183 		switch (cio.pci_width) {
184 		case 4:
185 			cio.pci_data = pci_config_get32(cfg, cio.pci_off);
186 			break;
187 		case 2:
188 			cio.pci_data = pci_config_get16(cfg, cio.pci_off);
189 			break;
190 		case 1:
191 			cio.pci_data = pci_config_get8(cfg, cio.pci_off);
192 			break;
193 		default:
194 			return (EINVAL);
195 		}
196 
197 		if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) {
198 			return (EFAULT);
199 		}
200 		return (0);
201 	}
202 	case PPT_CFG_WRITE: {
203 		struct ppt_cfg_io cio;
204 		ddi_acc_handle_t cfg = ppt->pptd_cfg;
205 
206 		if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
207 			return (EFAULT);
208 		}
209 		switch (cio.pci_width) {
210 		case 4:
211 			pci_config_put32(cfg, cio.pci_off, cio.pci_data);
212 			break;
213 		case 2:
214 			pci_config_put16(cfg, cio.pci_off, cio.pci_data);
215 			break;
216 		case 1:
217 			pci_config_put8(cfg, cio.pci_off, cio.pci_data);
218 			break;
219 		default:
220 			return (EINVAL);
221 		}
222 
223 		return (0);
224 	}
225 	case PPT_BAR_QUERY: {
226 		struct ppt_bar_query barg;
227 		struct pptbar *pbar;
228 
229 		if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) {
230 			return (EFAULT);
231 		}
232 		if (barg.pbq_baridx >= PCI_BASE_NUM) {
233 			return (EINVAL);
234 		}
235 		pbar = &ppt->pptd_bars[barg.pbq_baridx];
236 
237 		if (pbar->base == 0 || pbar->size == 0) {
238 			return (ENOENT);
239 		}
240 		barg.pbq_type = pbar->type;
241 		barg.pbq_base = pbar->base;
242 		barg.pbq_size = pbar->size;
243 
244 		if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) {
245 			return (EFAULT);
246 		}
247 		return (0);
248 	}
249 	case PPT_BAR_READ: {
250 		struct ppt_bar_io bio;
251 		struct pptbar *pbar;
252 		void *addr;
253 		uint_t rnum;
254 		ddi_acc_handle_t cfg;
255 
256 		if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
257 			return (EFAULT);
258 		}
259 		rnum = bio.pbi_bar;
260 		if (rnum >= PCI_BASE_NUM) {
261 			return (EINVAL);
262 		}
263 		pbar = &ppt->pptd_bars[rnum];
264 		if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
265 			return (EINVAL);
266 		}
267 		addr = pbar->io_ptr + bio.pbi_off;
268 
269 		switch (bio.pbi_width) {
270 		case 4:
271 			bio.pbi_data = ddi_get32(pbar->io_handle, addr);
272 			break;
273 		case 2:
274 			bio.pbi_data = ddi_get16(pbar->io_handle, addr);
275 			break;
276 		case 1:
277 			bio.pbi_data = ddi_get8(pbar->io_handle, addr);
278 			break;
279 		default:
280 			return (EINVAL);
281 		}
282 
283 		if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) {
284 			return (EFAULT);
285 		}
286 		return (0);
287 	}
288 	case PPT_BAR_WRITE: {
289 		struct ppt_bar_io bio;
290 		struct pptbar *pbar;
291 		void *addr;
292 		uint_t rnum;
293 		ddi_acc_handle_t cfg;
294 
295 		if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
296 			return (EFAULT);
297 		}
298 		rnum = bio.pbi_bar;
299 		if (rnum >= PCI_BASE_NUM) {
300 			return (EINVAL);
301 		}
302 		pbar = &ppt->pptd_bars[rnum];
303 		if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
304 			return (EINVAL);
305 		}
306 		addr = pbar->io_ptr + bio.pbi_off;
307 
308 		switch (bio.pbi_width) {
309 		case 4:
310 			ddi_put32(pbar->io_handle, addr, bio.pbi_data);
311 			break;
312 		case 2:
313 			ddi_put16(pbar->io_handle, addr, bio.pbi_data);
314 			break;
315 		case 1:
316 			ddi_put8(pbar->io_handle, addr, bio.pbi_data);
317 			break;
318 		default:
319 			return (EINVAL);
320 		}
321 
322 		return (0);
323 	}
324 
325 	default:
326 		return (ENOTTY);
327 	}
328 
329 	return (0);
330 }
331 
332 static int
ppt_find_msix_table_bar(struct pptdev * ppt)333 ppt_find_msix_table_bar(struct pptdev *ppt)
334 {
335 	uint16_t base;
336 	uint32_t off;
337 
338 	if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) !=
339 	    DDI_SUCCESS)
340 		return (-1);
341 
342 	off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_TBL_OFFSET);
343 
344 	if (off == PCI_EINVAL32)
345 		return (-1);
346 
347 	return (off & PCI_MSIX_TBL_BIR_MASK);
348 }
349 
350 static int
ppt_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)351 ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
352     size_t *maplen, uint_t model)
353 {
354 	minor_t minor;
355 	struct pptdev *ppt;
356 	int err, bar;
357 	uint_t ddireg;
358 
359 	minor = getminor(dev);
360 
361 	if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL)
362 		return (ENXIO);
363 
364 #ifdef _MULTI_DATAMODEL
365 	if (ddi_model_convert_from(model) != DDI_MODEL_NONE)
366 		return (ENXIO);
367 #endif
368 
369 	if (off < 0 || off != P2ALIGN(off, PAGESIZE))
370 		return (EINVAL);
371 
372 	if ((bar = ppt_find_msix_table_bar(ppt)) == -1)
373 		return (EINVAL);
374 
375 	ddireg = ppt->pptd_bars[bar].ddireg;
376 
377 	if (ddireg == 0)
378 		return (EINVAL);
379 
380 	err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, ddireg, off, len,
381 	    PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr);
382 
383 	if (err == DDI_SUCCESS)
384 		*maplen = len;
385 
386 	return (err);
387 }
388 
389 static void
ppt_bar_wipe(struct pptdev * ppt)390 ppt_bar_wipe(struct pptdev *ppt)
391 {
392 	uint_t i;
393 
394 	for (i = 0; i < PCI_BASE_NUM; i++) {
395 		struct pptbar *pbar = &ppt->pptd_bars[i];
396 		if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) {
397 			ddi_regs_map_free(&pbar->io_handle);
398 		}
399 	}
400 	bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars));
401 }
402 
403 static int
ppt_bar_crawl(struct pptdev * ppt)404 ppt_bar_crawl(struct pptdev *ppt)
405 {
406 	pci_regspec_t *regs;
407 	uint_t rcount, i;
408 	int err = 0, rlen;
409 
410 	if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS,
411 	    "assigned-addresses", (caddr_t)&regs, &rlen) != DDI_PROP_SUCCESS) {
412 		return (EIO);
413 	}
414 
415 	VERIFY3S(rlen, >, 0);
416 	rcount = rlen / sizeof (pci_regspec_t);
417 	for (i = 0; i < rcount; i++) {
418 		pci_regspec_t *reg = &regs[i];
419 		struct pptbar *pbar;
420 		uint_t bar, rnum;
421 
422 		DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg);
423 		bar = PCI_REG_REG_G(reg->pci_phys_hi);
424 		if (!BAR_VALID(bar)) {
425 			continue;
426 		}
427 
428 		rnum = BAR_TO_IDX(bar);
429 		pbar = &ppt->pptd_bars[rnum];
430 		/* is this somehow already populated? */
431 		if (pbar->base != 0 || pbar->size != 0) {
432 			err = EEXIST;
433 			break;
434 		}
435 
436 		/*
437 		 * Register 0 corresponds to the PCI config space.
438 		 * The registers which match the assigned-addresses list are
439 		 * offset by 1.
440 		 */
441 		pbar->ddireg = i + 1;
442 
443 		pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK;
444 		pbar->base = ((uint64_t)reg->pci_phys_mid << 32) |
445 		    (uint64_t)reg->pci_phys_low;
446 		pbar->size = ((uint64_t)reg->pci_size_hi << 32) |
447 		    (uint64_t)reg->pci_size_low;
448 		if (pbar->type == PCI_ADDR_IO) {
449 			err = ddi_regs_map_setup(ppt->pptd_dip, rnum,
450 			    &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle);
451 			if (err != 0) {
452 				break;
453 			}
454 		}
455 	}
456 	kmem_free(regs, rlen);
457 
458 	if (err != 0) {
459 		ppt_bar_wipe(ppt);
460 	}
461 	return (err);
462 }
463 
464 static boolean_t
ppt_bar_verify_mmio(struct pptdev * ppt,uint64_t base,uint64_t size)465 ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size)
466 {
467 	const uint64_t map_end = base + size;
468 
469 	/* Zero-length or overflow mappings are not valid */
470 	if (map_end <= base) {
471 		return (B_FALSE);
472 	}
473 	/* MMIO bounds should be page-aligned */
474 	if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
475 		return (B_FALSE);
476 	}
477 
478 	for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
479 		const struct pptbar *bar = &ppt->pptd_bars[i];
480 		const uint64_t bar_end = bar->base + bar->size;
481 
482 		/* Only memory BARs can be mapped */
483 		if (bar->type != PCI_ADDR_MEM32 &&
484 		    bar->type != PCI_ADDR_MEM64) {
485 			continue;
486 		}
487 
488 		/* Does the mapping fit within this BAR? */
489 		if (base < bar->base || base >= bar_end ||
490 		    map_end < bar->base || map_end > bar_end) {
491 			continue;
492 		}
493 
494 		/* This BAR satisfies the provided map */
495 		return (B_TRUE);
496 	}
497 	return (B_FALSE);
498 }
499 
500 static boolean_t
ppt_toggle_bar(struct pptdev * ppt,boolean_t enable)501 ppt_toggle_bar(struct pptdev *ppt, boolean_t enable)
502 {
503 	/*
504 	 * Enable/disable bus mastering and BAR decoding based on the BAR
505 	 * configuration. Bhyve emulates the COMMAND register so we won't see
506 	 * the bits changing there.
507 	 */
508 	ddi_acc_handle_t hdl;
509 	uint16_t cmd;
510 
511 	if (pci_config_setup(ppt->pptd_dip, &hdl) != DDI_SUCCESS)
512 		return (B_FALSE);
513 	cmd = pci_config_get16(hdl, PCI_CONF_COMM);
514 
515 	if (enable) {
516 		cmd |= PCI_COMM_ME;
517 
518 		for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
519 			const struct pptbar *bar = &ppt->pptd_bars[i];
520 
521 			switch (bar->type) {
522 			case PCI_ADDR_MEM32:
523 			case PCI_ADDR_MEM64:
524 				cmd |= PCI_COMM_MAE;
525 				break;
526 			case PCI_ADDR_IO:
527 				cmd |= PCI_COMM_IO;
528 				break;
529 			}
530 		}
531 	} else {
532 		cmd &= ~(PCI_COMM_ME | PCI_COMM_MAE | PCI_COMM_IO);
533 	}
534 
535 	pci_config_put16(hdl, PCI_CONF_COMM, cmd);
536 	pci_config_teardown(&hdl);
537 
538 	return (B_TRUE);
539 }
540 
541 static int
ppt_ddi_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)542 ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
543 {
544 	struct pptdev *ppt = NULL;
545 	char name[PPT_MAXNAMELEN];
546 	int inst;
547 
548 	if (cmd != DDI_ATTACH)
549 		return (DDI_FAILURE);
550 
551 	inst = ddi_get_instance(dip);
552 
553 	if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) {
554 		goto fail;
555 	}
556 	VERIFY(ppt = ddi_get_soft_state(ppt_state, inst));
557 	ppt->pptd_dip = dip;
558 	ddi_set_driver_private(dip, ppt);
559 
560 	if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) {
561 		goto fail;
562 	}
563 	if (ppt_bar_crawl(ppt) != 0) {
564 		goto fail;
565 	}
566 	if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst,
567 	    DDI_PSEUDO, 0) != DDI_SUCCESS) {
568 		goto fail;
569 	}
570 
571 	ppt_toggle_bar(ppt, B_FALSE);
572 
573 	mutex_enter(&pptdev_mtx);
574 	list_insert_tail(&pptdev_list, ppt);
575 	mutex_exit(&pptdev_mtx);
576 
577 	return (DDI_SUCCESS);
578 
579 fail:
580 	if (ppt != NULL) {
581 		ddi_remove_minor_node(dip, NULL);
582 		if (ppt->pptd_cfg != NULL) {
583 			pci_config_teardown(&ppt->pptd_cfg);
584 		}
585 		ppt_bar_wipe(ppt);
586 		ddi_soft_state_free(ppt_state, inst);
587 	}
588 	return (DDI_FAILURE);
589 }
590 
591 static int
ppt_ddi_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)592 ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
593 {
594 	struct pptdev *ppt;
595 	int inst;
596 
597 	if (cmd != DDI_DETACH)
598 		return (DDI_FAILURE);
599 
600 	ppt = ddi_get_driver_private(dip);
601 	inst = ddi_get_instance(dip);
602 
603 	ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt);
604 
605 	mutex_enter(&pptdev_mtx);
606 	if (ppt->vm != NULL) {
607 		mutex_exit(&pptdev_mtx);
608 		return (DDI_FAILURE);
609 	}
610 	list_remove(&pptdev_list, ppt);
611 	mutex_exit(&pptdev_mtx);
612 
613 	ddi_remove_minor_node(dip, PPT_MINOR_NAME);
614 	ppt_bar_wipe(ppt);
615 	pci_config_teardown(&ppt->pptd_cfg);
616 	ddi_set_driver_private(dip, NULL);
617 	ddi_soft_state_free(ppt_state, inst);
618 
619 	return (DDI_SUCCESS);
620 }
621 
622 static int
ppt_ddi_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)623 ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
624 {
625 	int error = DDI_FAILURE;
626 	int inst = getminor((dev_t)arg);
627 
628 	switch (cmd) {
629 	case DDI_INFO_DEVT2DEVINFO: {
630 		struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst);
631 
632 		if (ppt != NULL) {
633 			*result = (void *)ppt->pptd_dip;
634 			error = DDI_SUCCESS;
635 		}
636 		break;
637 	}
638 	case DDI_INFO_DEVT2INSTANCE: {
639 		*result = (void *)(uintptr_t)inst;
640 		error = DDI_SUCCESS;
641 		break;
642 	}
643 	default:
644 		break;
645 	}
646 	return (error);
647 }
648 
649 static struct cb_ops ppt_cb_ops = {
650 	ppt_open,
651 	nulldev,	/* close */
652 	nodev,		/* strategy */
653 	nodev,		/* print */
654 	nodev,		/* dump */
655 	nodev,		/* read */
656 	nodev,		/* write */
657 	ppt_ioctl,
658 	ppt_devmap,	/* devmap */
659 	NULL,		/* mmap */
660 	NULL,		/* segmap */
661 	nochpoll,	/* poll */
662 	ddi_prop_op,
663 	NULL,
664 	D_NEW | D_MP | D_64BIT | D_DEVMAP,
665 	CB_REV
666 };
667 
668 static struct dev_ops ppt_ops = {
669 	DEVO_REV,
670 	0,
671 	ppt_ddi_info,
672 	nulldev,	/* identify */
673 	nulldev,	/* probe */
674 	ppt_ddi_attach,
675 	ppt_ddi_detach,
676 	nodev,		/* reset */
677 	&ppt_cb_ops,
678 	(struct bus_ops *)NULL
679 };
680 
681 static struct modldrv modldrv = {
682 	&mod_driverops,
683 	"bhyve pci pass-thru",
684 	&ppt_ops
685 };
686 
687 static struct modlinkage modlinkage = {
688 	MODREV_1,
689 	&modldrv,
690 	NULL
691 };
692 
693 int
_init(void)694 _init(void)
695 {
696 	int error;
697 
698 	mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL);
699 	list_create(&pptdev_list, sizeof (struct pptdev),
700 	    offsetof(struct pptdev, pptd_node));
701 
702 	error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0);
703 	if (error) {
704 		goto fail;
705 	}
706 
707 	error = mod_install(&modlinkage);
708 
709 	ppt_major = ddi_name_to_major("ppt");
710 fail:
711 	if (error) {
712 		ddi_soft_state_fini(&ppt_state);
713 	}
714 	return (error);
715 }
716 
717 int
_fini(void)718 _fini(void)
719 {
720 	int error;
721 
722 	error = mod_remove(&modlinkage);
723 	if (error)
724 		return (error);
725 	ddi_soft_state_fini(&ppt_state);
726 
727 	return (0);
728 }
729 
730 int
_info(struct modinfo * modinfop)731 _info(struct modinfo *modinfop)
732 {
733 	return (mod_info(&modlinkage, modinfop));
734 }
735 
736 static boolean_t
ppt_wait_for_pending_txn(dev_info_t * dip,uint_t max_delay_us)737 ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us)
738 {
739 	uint16_t cap_ptr, devsts;
740 	ddi_acc_handle_t hdl;
741 
742 	if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
743 		return (B_FALSE);
744 
745 	if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) {
746 		pci_config_teardown(&hdl);
747 		return (B_FALSE);
748 	}
749 
750 	devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
751 	while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) {
752 		if (max_delay_us == 0) {
753 			pci_config_teardown(&hdl);
754 			return (B_FALSE);
755 		}
756 
757 		/* Poll once every 100 milliseconds up to the timeout. */
758 		if (max_delay_us > 100000) {
759 			delay(drv_usectohz(100000));
760 			max_delay_us -= 100000;
761 		} else {
762 			delay(drv_usectohz(max_delay_us));
763 			max_delay_us = 0;
764 		}
765 		devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
766 	}
767 
768 	pci_config_teardown(&hdl);
769 	return (B_TRUE);
770 }
771 
772 static uint_t
ppt_max_completion_tmo_us(dev_info_t * dip)773 ppt_max_completion_tmo_us(dev_info_t *dip)
774 {
775 	uint_t timo = 0;
776 	uint16_t cap_ptr;
777 	ddi_acc_handle_t hdl;
778 	uint_t timo_ranges[] = {	/* timeout ranges */
779 		50000,		/* 50ms */
780 		100,		/* 100us */
781 		10000,		/* 10ms */
782 		0,
783 		0,
784 		55000,		/* 55ms */
785 		210000,		/* 210ms */
786 		0,
787 		0,
788 		900000,		/* 900ms */
789 		3500000,	/* 3.5s */
790 		0,
791 		0,
792 		13000000,	/* 13s */
793 		64000000,	/* 64s */
794 		0
795 	};
796 
797 	if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
798 		return (50000); /* default 50ms */
799 
800 	if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
801 		goto out;
802 
803 	if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) &
804 	    PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0)
805 		goto out;
806 
807 	if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP2) &
808 	    PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0)
809 		goto out;
810 
811 	timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) &
812 	    PCIE_DEVCAP2_COM_TO_RANGE_MASK];
813 
814 out:
815 	if (timo == 0)
816 		timo = 50000; /* default 50ms */
817 
818 	pci_config_teardown(&hdl);
819 	return (timo);
820 }
821 
822 static boolean_t
ppt_flr(dev_info_t * dip,boolean_t force)823 ppt_flr(dev_info_t *dip, boolean_t force)
824 {
825 	uint16_t cap_ptr, ctl, cmd;
826 	ddi_acc_handle_t hdl;
827 	uint_t compl_delay = 0, max_delay_us;
828 
829 	if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
830 		return (B_FALSE);
831 
832 	if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
833 		goto fail;
834 
835 	if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR)
836 	    == 0)
837 		goto fail;
838 
839 	max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000);
840 
841 	/*
842 	 * Disable busmastering to prevent generation of new transactions while
843 	 * waiting for the device to go idle.  If the idle timeout fails, the
844 	 * command register is restored which will re-enable busmastering.
845 	 */
846 	cmd = pci_config_get16(hdl, PCI_CONF_COMM);
847 	pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME);
848 	if (!ppt_wait_for_pending_txn(dip, max_delay_us)) {
849 		if (!force) {
850 			pci_config_put16(hdl, PCI_CONF_COMM, cmd);
851 			goto fail;
852 		}
853 		dev_err(dip, CE_WARN,
854 		    "?Resetting with transactions pending after %u us\n",
855 		    max_delay_us);
856 
857 		/*
858 		 * Extend the post-FLR delay to cover the maximum Completion
859 		 * Timeout delay of anything in flight during the FLR delay.
860 		 * Enforce a minimum delay of at least 10ms.
861 		 */
862 		compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000));
863 	}
864 
865 	/* Initiate the reset. */
866 	ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL);
867 	(void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL,
868 	    ctl | PCIE_DEVCTL_INITIATE_FLR);
869 
870 	/* Wait for at least 100ms */
871 	delay(drv_usectohz((100 + compl_delay) * 1000));
872 
873 	pci_config_teardown(&hdl);
874 	return (B_TRUE);
875 
876 fail:
877 	/*
878 	 * TODO: If the FLR fails for some reason, we should attempt a reset
879 	 * using the PCI power management facilities (if possible).
880 	 */
881 	pci_config_teardown(&hdl);
882 	return (B_FALSE);
883 }
884 
885 static int
ppt_findf(struct vm * vm,int fd,struct pptdev ** pptp)886 ppt_findf(struct vm *vm, int fd, struct pptdev **pptp)
887 {
888 	struct pptdev *ppt = NULL;
889 	file_t *fp;
890 	vattr_t va;
891 	int err = 0;
892 
893 	ASSERT(MUTEX_HELD(&pptdev_mtx));
894 
895 	if ((fp = getf(fd)) == NULL)
896 		return (EBADF);
897 
898 	va.va_mask = AT_RDEV;
899 	if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 ||
900 	    getmajor(va.va_rdev) != ppt_major) {
901 		err = EBADF;
902 		goto fail;
903 	}
904 
905 	ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev));
906 
907 	if (ppt == NULL) {
908 		err = EBADF;
909 		goto fail;
910 	}
911 
912 	if (ppt->vm != vm) {
913 		err = EBUSY;
914 		goto fail;
915 	}
916 
917 	*pptp = ppt;
918 	return (0);
919 
920 fail:
921 	releasef(fd);
922 	return (err);
923 }
924 
925 static void
ppt_unmap_all_mmio(struct vm * vm,struct pptdev * ppt)926 ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
927 {
928 	int i;
929 	struct pptseg *seg;
930 
931 	for (i = 0; i < MAX_MMIOSEGS; i++) {
932 		seg = &ppt->mmio[i];
933 		if (seg->len == 0)
934 			continue;
935 		(void) vm_unmap_mmio(vm, seg->gpa, seg->len);
936 		bzero(seg, sizeof (struct pptseg));
937 	}
938 }
939 
940 static void
ppt_teardown_msi(struct pptdev * ppt)941 ppt_teardown_msi(struct pptdev *ppt)
942 {
943 	int i;
944 
945 	if (ppt->msi.num_msgs == 0)
946 		return;
947 
948 	for (i = 0; i < ppt->msi.num_msgs; i++) {
949 		int intr_cap;
950 
951 		(void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
952 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
953 			ddi_intr_block_disable(&ppt->msi.inth[i], 1);
954 		else
955 			ddi_intr_disable(ppt->msi.inth[i]);
956 
957 		ddi_intr_remove_handler(ppt->msi.inth[i]);
958 		ddi_intr_free(ppt->msi.inth[i]);
959 
960 		ppt->msi.inth[i] = NULL;
961 	}
962 
963 	kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
964 	ppt->msi.inth = NULL;
965 	ppt->msi.inth_sz = 0;
966 	ppt->msi.is_fixed = B_FALSE;
967 
968 	ppt->msi.num_msgs = 0;
969 }
970 
971 static void
ppt_teardown_msix_intr(struct pptdev * ppt,int idx)972 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
973 {
974 	if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) {
975 		int intr_cap;
976 
977 		(void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
978 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
979 			ddi_intr_block_disable(&ppt->msix.inth[idx], 1);
980 		else
981 			ddi_intr_disable(ppt->msix.inth[idx]);
982 
983 		ddi_intr_remove_handler(ppt->msix.inth[idx]);
984 	}
985 }
986 
987 static void
ppt_teardown_msix(struct pptdev * ppt)988 ppt_teardown_msix(struct pptdev *ppt)
989 {
990 	uint_t i;
991 
992 	if (ppt->msix.num_msgs == 0)
993 		return;
994 
995 	for (i = 0; i < ppt->msix.num_msgs; i++)
996 		ppt_teardown_msix_intr(ppt, i);
997 
998 	if (ppt->msix.inth) {
999 		for (i = 0; i < ppt->msix.num_msgs; i++)
1000 			ddi_intr_free(ppt->msix.inth[i]);
1001 		kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
1002 		ppt->msix.inth = NULL;
1003 		ppt->msix.inth_sz = 0;
1004 		kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
1005 		ppt->msix.arg = NULL;
1006 		ppt->msix.arg_sz = 0;
1007 	}
1008 
1009 	ppt->msix.num_msgs = 0;
1010 }
1011 
1012 int
ppt_assigned_devices(struct vm * vm)1013 ppt_assigned_devices(struct vm *vm)
1014 {
1015 	struct pptdev *ppt;
1016 	uint_t num = 0;
1017 
1018 	mutex_enter(&pptdev_mtx);
1019 	for (ppt = list_head(&pptdev_list); ppt != NULL;
1020 	    ppt = list_next(&pptdev_list, ppt)) {
1021 		if (ppt->vm == vm) {
1022 			num++;
1023 		}
1024 	}
1025 	mutex_exit(&pptdev_mtx);
1026 	return (num);
1027 }
1028 
1029 boolean_t
ppt_is_mmio(struct vm * vm,vm_paddr_t gpa)1030 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
1031 {
1032 	struct pptdev *ppt = list_head(&pptdev_list);
1033 
1034 	/* XXX: this should probably be restructured to avoid the lock */
1035 	mutex_enter(&pptdev_mtx);
1036 	for (ppt = list_head(&pptdev_list); ppt != NULL;
1037 	    ppt = list_next(&pptdev_list, ppt)) {
1038 		if (ppt->vm != vm) {
1039 			continue;
1040 		}
1041 
1042 		for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1043 			struct pptseg *seg = &ppt->mmio[i];
1044 
1045 			if (seg->len == 0)
1046 				continue;
1047 			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) {
1048 				mutex_exit(&pptdev_mtx);
1049 				return (B_TRUE);
1050 			}
1051 		}
1052 	}
1053 
1054 	mutex_exit(&pptdev_mtx);
1055 	return (B_FALSE);
1056 }
1057 
1058 int
ppt_assign_device(struct vm * vm,int pptfd)1059 ppt_assign_device(struct vm *vm, int pptfd)
1060 {
1061 	struct pptdev *ppt;
1062 	int err = 0;
1063 
1064 	mutex_enter(&pptdev_mtx);
1065 	/* Passing NULL requires the device to be unowned. */
1066 	err = ppt_findf(NULL, pptfd, &ppt);
1067 	if (err != 0) {
1068 		mutex_exit(&pptdev_mtx);
1069 		return (err);
1070 	}
1071 
1072 	if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1073 		err = EIO;
1074 		goto done;
1075 	}
1076 	ppt_flr(ppt->pptd_dip, B_TRUE);
1077 
1078 	/*
1079 	 * Restore the device state after reset and then perform another save
1080 	 * so the "pristine" state can be restored when the device is removed
1081 	 * from the guest.
1082 	 */
1083 	if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS ||
1084 	    pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1085 		err = EIO;
1086 		goto done;
1087 	}
1088 
1089 	ppt_toggle_bar(ppt, B_TRUE);
1090 
1091 	ppt->vm = vm;
1092 	iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1093 	iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1094 	pf_set_passthru(ppt->pptd_dip, B_TRUE);
1095 
1096 done:
1097 	releasef(pptfd);
1098 	mutex_exit(&pptdev_mtx);
1099 	return (err);
1100 }
1101 
1102 static void
ppt_reset_pci_power_state(dev_info_t * dip)1103 ppt_reset_pci_power_state(dev_info_t *dip)
1104 {
1105 	ddi_acc_handle_t cfg;
1106 	uint16_t cap_ptr;
1107 
1108 	if (pci_config_setup(dip, &cfg) != DDI_SUCCESS)
1109 		return;
1110 
1111 	if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) {
1112 		uint16_t val;
1113 
1114 		val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR);
1115 		if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
1116 			val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0;
1117 			(void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR,
1118 			    val);
1119 		}
1120 	}
1121 
1122 	pci_config_teardown(&cfg);
1123 }
1124 
1125 static void
ppt_do_unassign(struct pptdev * ppt)1126 ppt_do_unassign(struct pptdev *ppt)
1127 {
1128 	struct vm *vm = ppt->vm;
1129 
1130 	ASSERT3P(vm, !=, NULL);
1131 	ASSERT(MUTEX_HELD(&pptdev_mtx));
1132 
1133 	ppt_flr(ppt->pptd_dip, B_TRUE);
1134 
1135 	/*
1136 	 * Restore from the state saved during device assignment.
1137 	 * If the device power state has been altered, that must be remedied
1138 	 * first, as it will reset register state during the transition.
1139 	 */
1140 	ppt_reset_pci_power_state(ppt->pptd_dip);
1141 	(void) pci_restore_config_regs(ppt->pptd_dip);
1142 
1143 	pf_set_passthru(ppt->pptd_dip, B_FALSE);
1144 
1145 	ppt_unmap_all_mmio(vm, ppt);
1146 	ppt_teardown_msi(ppt);
1147 	ppt_teardown_msix(ppt);
1148 	iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1149 	iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1150 	ppt->vm = NULL;
1151 }
1152 
1153 int
ppt_unassign_device(struct vm * vm,int pptfd)1154 ppt_unassign_device(struct vm *vm, int pptfd)
1155 {
1156 	struct pptdev *ppt;
1157 	int err = 0;
1158 
1159 	mutex_enter(&pptdev_mtx);
1160 	err = ppt_findf(vm, pptfd, &ppt);
1161 	if (err != 0) {
1162 		mutex_exit(&pptdev_mtx);
1163 		return (err);
1164 	}
1165 
1166 	ppt_do_unassign(ppt);
1167 
1168 	releasef(pptfd);
1169 	mutex_exit(&pptdev_mtx);
1170 	return (err);
1171 }
1172 
1173 void
ppt_unassign_all(struct vm * vm)1174 ppt_unassign_all(struct vm *vm)
1175 {
1176 	struct pptdev *ppt;
1177 
1178 	mutex_enter(&pptdev_mtx);
1179 	for (ppt = list_head(&pptdev_list); ppt != NULL;
1180 	    ppt = list_next(&pptdev_list, ppt)) {
1181 		if (ppt->vm == vm) {
1182 			ppt_do_unassign(ppt);
1183 		}
1184 	}
1185 	mutex_exit(&pptdev_mtx);
1186 }
1187 
1188 int
ppt_map_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len,vm_paddr_t hpa)1189 ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
1190     vm_paddr_t hpa)
1191 {
1192 	struct pptdev *ppt;
1193 	int err = 0;
1194 
1195 	if ((len & PAGEOFFSET) != 0 || len == 0 || (gpa & PAGEOFFSET) != 0 ||
1196 	    (hpa & PAGEOFFSET) != 0 || gpa + len < gpa || hpa + len < hpa) {
1197 		return (EINVAL);
1198 	}
1199 
1200 	mutex_enter(&pptdev_mtx);
1201 	err = ppt_findf(vm, pptfd, &ppt);
1202 	if (err != 0) {
1203 		mutex_exit(&pptdev_mtx);
1204 		return (err);
1205 	}
1206 
1207 	/*
1208 	 * Ensure that the host-physical range of the requested mapping fits
1209 	 * within one of the MMIO BARs of the device.
1210 	 */
1211 	if (!ppt_bar_verify_mmio(ppt, hpa, len)) {
1212 		err = EINVAL;
1213 		goto done;
1214 	}
1215 
1216 	for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1217 		struct pptseg *seg = &ppt->mmio[i];
1218 
1219 		if (seg->len == 0) {
1220 			err = vm_map_mmio(vm, gpa, len, hpa);
1221 			if (err == 0) {
1222 				seg->gpa = gpa;
1223 				seg->len = len;
1224 			}
1225 			goto done;
1226 		}
1227 	}
1228 	err = ENOSPC;
1229 
1230 done:
1231 	releasef(pptfd);
1232 	mutex_exit(&pptdev_mtx);
1233 	return (err);
1234 }
1235 
1236 int
ppt_unmap_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len)1237 ppt_unmap_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len)
1238 {
1239 	struct pptdev *ppt;
1240 	int err = 0;
1241 	uint_t i;
1242 
1243 	mutex_enter(&pptdev_mtx);
1244 	err = ppt_findf(vm, pptfd, &ppt);
1245 	if (err != 0) {
1246 		mutex_exit(&pptdev_mtx);
1247 		return (err);
1248 	}
1249 
1250 	for (i = 0; i < MAX_MMIOSEGS; i++) {
1251 		struct pptseg *seg = &ppt->mmio[i];
1252 
1253 		if (seg->gpa == gpa && seg->len == len) {
1254 			err = vm_unmap_mmio(vm, seg->gpa, seg->len);
1255 			if (err == 0) {
1256 				seg->gpa = 0;
1257 				seg->len = 0;
1258 			}
1259 			goto out;
1260 		}
1261 	}
1262 	err = ENOENT;
1263 out:
1264 	releasef(pptfd);
1265 	mutex_exit(&pptdev_mtx);
1266 	return (err);
1267 }
1268 
1269 static uint_t
pptintr(caddr_t arg,caddr_t unused)1270 pptintr(caddr_t arg, caddr_t unused)
1271 {
1272 	struct pptintr_arg *pptarg = (struct pptintr_arg *)arg;
1273 	struct pptdev *ppt = pptarg->pptdev;
1274 
1275 	if (ppt->vm != NULL) {
1276 		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
1277 	} else {
1278 		/*
1279 		 * XXX
1280 		 * This is not expected to happen - panic?
1281 		 */
1282 	}
1283 
1284 	/*
1285 	 * For legacy interrupts give other filters a chance in case
1286 	 * the interrupt was not generated by the passthrough device.
1287 	 */
1288 	return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
1289 }
1290 
1291 int
ppt_setup_msi(struct vm * vm,int vcpu,int pptfd,uint64_t addr,uint64_t msg,int numvec)1292 ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg,
1293     int numvec)
1294 {
1295 	int i, msi_count, intr_type;
1296 	struct pptdev *ppt;
1297 	int err = 0;
1298 
1299 	if (numvec < 0 || numvec > MAX_MSIMSGS)
1300 		return (EINVAL);
1301 
1302 	mutex_enter(&pptdev_mtx);
1303 	err = ppt_findf(vm, pptfd, &ppt);
1304 	if (err != 0) {
1305 		mutex_exit(&pptdev_mtx);
1306 		return (err);
1307 	}
1308 
1309 	/* Reject attempts to enable MSI while MSI-X is active. */
1310 	if (ppt->msix.num_msgs != 0 && numvec != 0) {
1311 		err = EBUSY;
1312 		goto done;
1313 	}
1314 
1315 	/* Free any allocated resources */
1316 	ppt_teardown_msi(ppt);
1317 
1318 	if (numvec == 0) {
1319 		/* nothing more to do */
1320 		goto done;
1321 	}
1322 
1323 	if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1324 	    &msi_count) != DDI_SUCCESS) {
1325 		if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED,
1326 		    &msi_count) != DDI_SUCCESS) {
1327 			err = EINVAL;
1328 			goto done;
1329 		}
1330 
1331 		intr_type = DDI_INTR_TYPE_FIXED;
1332 		ppt->msi.is_fixed = B_TRUE;
1333 	} else {
1334 		intr_type = DDI_INTR_TYPE_MSI;
1335 	}
1336 
1337 	/*
1338 	 * The device must be capable of supporting the number of vectors
1339 	 * the guest wants to allocate.
1340 	 */
1341 	if (numvec > msi_count) {
1342 		err = EINVAL;
1343 		goto done;
1344 	}
1345 
1346 	ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1347 	ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP);
1348 	if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0,
1349 	    numvec, &msi_count, 0) != DDI_SUCCESS) {
1350 		kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
1351 		err = EINVAL;
1352 		goto done;
1353 	}
1354 
1355 	/* Verify that we got as many vectors as the guest requested */
1356 	if (numvec != msi_count) {
1357 		ppt_teardown_msi(ppt);
1358 		err = EINVAL;
1359 		goto done;
1360 	}
1361 
1362 	/* Set up & enable interrupt handler for each vector. */
1363 	for (i = 0; i < numvec; i++) {
1364 		int res, intr_cap = 0;
1365 
1366 		ppt->msi.num_msgs = i + 1;
1367 		ppt->msi.arg[i].pptdev = ppt;
1368 		ppt->msi.arg[i].addr = addr;
1369 		ppt->msi.arg[i].msg_data = msg + i;
1370 
1371 		if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr,
1372 		    &ppt->msi.arg[i], NULL) != DDI_SUCCESS)
1373 			break;
1374 
1375 		(void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
1376 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
1377 			res = ddi_intr_block_enable(&ppt->msi.inth[i], 1);
1378 		else
1379 			res = ddi_intr_enable(ppt->msi.inth[i]);
1380 
1381 		if (res != DDI_SUCCESS)
1382 			break;
1383 	}
1384 	if (i < numvec) {
1385 		ppt_teardown_msi(ppt);
1386 		err = ENXIO;
1387 	}
1388 
1389 done:
1390 	releasef(pptfd);
1391 	mutex_exit(&pptdev_mtx);
1392 	return (err);
1393 }
1394 
1395 int
ppt_setup_msix(struct vm * vm,int vcpu,int pptfd,int idx,uint64_t addr,uint64_t msg,uint32_t vector_control)1396 ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
1397     uint64_t msg, uint32_t vector_control)
1398 {
1399 	struct pptdev *ppt;
1400 	int numvec, alloced;
1401 	int err = 0;
1402 
1403 	mutex_enter(&pptdev_mtx);
1404 	err = ppt_findf(vm, pptfd, &ppt);
1405 	if (err != 0) {
1406 		mutex_exit(&pptdev_mtx);
1407 		return (err);
1408 	}
1409 
1410 	/* Reject attempts to enable MSI-X while MSI is active. */
1411 	if (ppt->msi.num_msgs != 0) {
1412 		err = EBUSY;
1413 		goto done;
1414 	}
1415 
1416 	/*
1417 	 * First-time configuration:
1418 	 *	Allocate the MSI-X table
1419 	 *	Allocate the IRQ resources
1420 	 *	Set up some variables in ppt->msix
1421 	 */
1422 	if (ppt->msix.num_msgs == 0) {
1423 		dev_info_t *dip = ppt->pptd_dip;
1424 
1425 		if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX,
1426 		    &numvec) != DDI_SUCCESS) {
1427 			err = EINVAL;
1428 			goto done;
1429 		}
1430 
1431 		ppt->msix.num_msgs = numvec;
1432 
1433 		ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]);
1434 		ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP);
1435 		ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1436 		ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP);
1437 
1438 		if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0,
1439 		    numvec, &alloced, 0) != DDI_SUCCESS) {
1440 			kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
1441 			kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
1442 			ppt->msix.arg = NULL;
1443 			ppt->msix.inth = NULL;
1444 			ppt->msix.arg_sz = ppt->msix.inth_sz = 0;
1445 			err = EINVAL;
1446 			goto done;
1447 		}
1448 
1449 		if (numvec != alloced) {
1450 			ppt_teardown_msix(ppt);
1451 			err = EINVAL;
1452 			goto done;
1453 		}
1454 	}
1455 
1456 	if (idx >= ppt->msix.num_msgs) {
1457 		err = EINVAL;
1458 		goto done;
1459 	}
1460 
1461 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1462 		int intr_cap, res;
1463 
1464 		/* Tear down the IRQ if it's already set up */
1465 		ppt_teardown_msix_intr(ppt, idx);
1466 
1467 		ppt->msix.arg[idx].pptdev = ppt;
1468 		ppt->msix.arg[idx].addr = addr;
1469 		ppt->msix.arg[idx].msg_data = msg;
1470 
1471 		/* Setup the MSI-X interrupt */
1472 		if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr,
1473 		    &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) {
1474 			err = ENXIO;
1475 			goto done;
1476 		}
1477 
1478 		(void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
1479 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
1480 			res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1);
1481 		else
1482 			res = ddi_intr_enable(ppt->msix.inth[idx]);
1483 
1484 		if (res != DDI_SUCCESS) {
1485 			ddi_intr_remove_handler(ppt->msix.inth[idx]);
1486 			err = ENXIO;
1487 			goto done;
1488 		}
1489 	} else {
1490 		/* Masked, tear it down if it's already been set up */
1491 		ppt_teardown_msix_intr(ppt, idx);
1492 	}
1493 
1494 done:
1495 	releasef(pptfd);
1496 	mutex_exit(&pptdev_mtx);
1497 	return (err);
1498 }
1499 
1500 int
ppt_get_limits(struct vm * vm,int pptfd,int * msilimit,int * msixlimit)1501 ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit)
1502 {
1503 	struct pptdev *ppt;
1504 	int err = 0;
1505 
1506 	mutex_enter(&pptdev_mtx);
1507 	err = ppt_findf(vm, pptfd, &ppt);
1508 	if (err != 0) {
1509 		mutex_exit(&pptdev_mtx);
1510 		return (err);
1511 	}
1512 
1513 	if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1514 	    msilimit) != DDI_SUCCESS) {
1515 		*msilimit = -1;
1516 	}
1517 	if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX,
1518 	    msixlimit) != DDI_SUCCESS) {
1519 		*msixlimit = -1;
1520 	}
1521 
1522 	releasef(pptfd);
1523 	mutex_exit(&pptdev_mtx);
1524 	return (err);
1525 }
1526 
1527 int
ppt_disable_msix(struct vm * vm,int pptfd)1528 ppt_disable_msix(struct vm *vm, int pptfd)
1529 {
1530 	struct pptdev *ppt;
1531 	int err = 0;
1532 
1533 	mutex_enter(&pptdev_mtx);
1534 	err = ppt_findf(vm, pptfd, &ppt);
1535 	if (err != 0) {
1536 		mutex_exit(&pptdev_mtx);
1537 		return (err);
1538 	}
1539 
1540 	ppt_teardown_msix(ppt);
1541 
1542 	releasef(pptfd);
1543 	mutex_exit(&pptdev_mtx);
1544 	return (err);
1545 }
1546