1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * This file and its contents are supplied under the terms of the
31 * Common Development and Distribution License ("CDDL"), version 1.0.
32 * You may only use this file in accordance with the terms of version
33 * 1.0 of the CDDL.
34 *
35 * A full copy of the text of the CDDL should have accompanied this
36 * source. A copy of the CDDL is also available via the Internet at
37 * http://www.illumos.org/license/CDDL.
38 */
39 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
40
41 /*
42 * Copyright 2019 Joyent, Inc.
43 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
44 */
45
46 #include <sys/cdefs.h>
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/kmem.h>
52 #include <sys/module.h>
53 #include <sys/bus.h>
54 #include <sys/pciio.h>
55 #include <sys/sysctl.h>
56
57 #include <dev/pci/pcivar.h>
58 #include <dev/pci/pcireg.h>
59
60 #include <machine/vmm.h>
61 #include <machine/vmm_dev.h>
62
63 #include <sys/conf.h>
64 #include <sys/ddi.h>
65 #include <sys/stat.h>
66 #include <sys/sunddi.h>
67 #include <sys/pci.h>
68 #include <sys/pci_cap.h>
69 #include <sys/pcie_impl.h>
70 #include <sys/ppt_dev.h>
71 #include <sys/mkdev.h>
72 #include <sys/sysmacros.h>
73
74 #include "vmm_lapic.h"
75
76 #include "iommu.h"
77 #include "ppt.h"
78
79 #define MAX_MSIMSGS 32
80
81 /*
82 * If the MSI-X table is located in the middle of a BAR then that MMIO
83 * region gets split into two segments - one segment above the MSI-X table
84 * and the other segment below the MSI-X table - with a hole in place of
85 * the MSI-X table so accesses to it can be trapped and emulated.
86 *
87 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
88 */
89 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
90
91 struct pptintr_arg {
92 struct pptdev *pptdev;
93 uint64_t addr;
94 uint64_t msg_data;
95 };
96
97 struct pptseg {
98 vm_paddr_t gpa;
99 size_t len;
100 int wired;
101 };
102
103 struct pptbar {
104 uint64_t base;
105 uint64_t size;
106 uint_t type;
107 ddi_acc_handle_t io_handle;
108 caddr_t io_ptr;
109 uint_t ddireg;
110 };
111
112 struct pptdev {
113 dev_info_t *pptd_dip;
114 list_node_t pptd_node;
115 ddi_acc_handle_t pptd_cfg;
116 struct pptbar pptd_bars[PCI_BASE_NUM];
117 struct vm *vm;
118 struct pptseg mmio[MAX_MMIOSEGS];
119 struct {
120 int num_msgs; /* guest state */
121 boolean_t is_fixed;
122 size_t inth_sz;
123 ddi_intr_handle_t *inth;
124 struct pptintr_arg arg[MAX_MSIMSGS];
125 } msi;
126
127 struct {
128 int num_msgs;
129 size_t inth_sz;
130 size_t arg_sz;
131 ddi_intr_handle_t *inth;
132 struct pptintr_arg *arg;
133 } msix;
134 };
135
136
137 static major_t ppt_major;
138 static void *ppt_state;
139 static kmutex_t pptdev_mtx;
140 static list_t pptdev_list;
141
142 #define PPT_MINOR_NAME "ppt"
143
144 static ddi_device_acc_attr_t ppt_attr = {
145 DDI_DEVICE_ATTR_V0,
146 DDI_NEVERSWAP_ACC,
147 DDI_STORECACHING_OK_ACC,
148 DDI_DEFAULT_ACC
149 };
150
151 static int
ppt_open(dev_t * devp,int flag,int otyp,cred_t * cr)152 ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr)
153 {
154 /* XXX: require extra privs? */
155 return (0);
156 }
157
158 #define BAR_TO_IDX(bar) (((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32)
159 #define BAR_VALID(b) ( \
160 (b) >= PCI_CONF_BASE0 && \
161 (b) <= PCI_CONF_BASE5 && \
162 ((b) & (PCI_BAR_SZ_32-1)) == 0)
163
164 static int
ppt_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)165 ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
166 {
167 minor_t minor = getminor(dev);
168 struct pptdev *ppt;
169 void *data = (void *)arg;
170
171 if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) {
172 return (ENOENT);
173 }
174
175 switch (cmd) {
176 case PPT_CFG_READ: {
177 struct ppt_cfg_io cio;
178 ddi_acc_handle_t cfg = ppt->pptd_cfg;
179
180 if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
181 return (EFAULT);
182 }
183 switch (cio.pci_width) {
184 case 4:
185 cio.pci_data = pci_config_get32(cfg, cio.pci_off);
186 break;
187 case 2:
188 cio.pci_data = pci_config_get16(cfg, cio.pci_off);
189 break;
190 case 1:
191 cio.pci_data = pci_config_get8(cfg, cio.pci_off);
192 break;
193 default:
194 return (EINVAL);
195 }
196
197 if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) {
198 return (EFAULT);
199 }
200 return (0);
201 }
202 case PPT_CFG_WRITE: {
203 struct ppt_cfg_io cio;
204 ddi_acc_handle_t cfg = ppt->pptd_cfg;
205
206 if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
207 return (EFAULT);
208 }
209 switch (cio.pci_width) {
210 case 4:
211 pci_config_put32(cfg, cio.pci_off, cio.pci_data);
212 break;
213 case 2:
214 pci_config_put16(cfg, cio.pci_off, cio.pci_data);
215 break;
216 case 1:
217 pci_config_put8(cfg, cio.pci_off, cio.pci_data);
218 break;
219 default:
220 return (EINVAL);
221 }
222
223 return (0);
224 }
225 case PPT_BAR_QUERY: {
226 struct ppt_bar_query barg;
227 struct pptbar *pbar;
228
229 if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) {
230 return (EFAULT);
231 }
232 if (barg.pbq_baridx >= PCI_BASE_NUM) {
233 return (EINVAL);
234 }
235 pbar = &ppt->pptd_bars[barg.pbq_baridx];
236
237 if (pbar->base == 0 || pbar->size == 0) {
238 return (ENOENT);
239 }
240 barg.pbq_type = pbar->type;
241 barg.pbq_base = pbar->base;
242 barg.pbq_size = pbar->size;
243
244 if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) {
245 return (EFAULT);
246 }
247 return (0);
248 }
249 case PPT_BAR_READ: {
250 struct ppt_bar_io bio;
251 struct pptbar *pbar;
252 void *addr;
253 uint_t rnum;
254 ddi_acc_handle_t cfg;
255
256 if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
257 return (EFAULT);
258 }
259 rnum = bio.pbi_bar;
260 if (rnum >= PCI_BASE_NUM) {
261 return (EINVAL);
262 }
263 pbar = &ppt->pptd_bars[rnum];
264 if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
265 return (EINVAL);
266 }
267 addr = pbar->io_ptr + bio.pbi_off;
268
269 switch (bio.pbi_width) {
270 case 4:
271 bio.pbi_data = ddi_get32(pbar->io_handle, addr);
272 break;
273 case 2:
274 bio.pbi_data = ddi_get16(pbar->io_handle, addr);
275 break;
276 case 1:
277 bio.pbi_data = ddi_get8(pbar->io_handle, addr);
278 break;
279 default:
280 return (EINVAL);
281 }
282
283 if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) {
284 return (EFAULT);
285 }
286 return (0);
287 }
288 case PPT_BAR_WRITE: {
289 struct ppt_bar_io bio;
290 struct pptbar *pbar;
291 void *addr;
292 uint_t rnum;
293 ddi_acc_handle_t cfg;
294
295 if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
296 return (EFAULT);
297 }
298 rnum = bio.pbi_bar;
299 if (rnum >= PCI_BASE_NUM) {
300 return (EINVAL);
301 }
302 pbar = &ppt->pptd_bars[rnum];
303 if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
304 return (EINVAL);
305 }
306 addr = pbar->io_ptr + bio.pbi_off;
307
308 switch (bio.pbi_width) {
309 case 4:
310 ddi_put32(pbar->io_handle, addr, bio.pbi_data);
311 break;
312 case 2:
313 ddi_put16(pbar->io_handle, addr, bio.pbi_data);
314 break;
315 case 1:
316 ddi_put8(pbar->io_handle, addr, bio.pbi_data);
317 break;
318 default:
319 return (EINVAL);
320 }
321
322 return (0);
323 }
324
325 default:
326 return (ENOTTY);
327 }
328
329 return (0);
330 }
331
332 static int
ppt_find_msix_table_bar(struct pptdev * ppt)333 ppt_find_msix_table_bar(struct pptdev *ppt)
334 {
335 uint16_t base;
336 uint32_t off;
337
338 if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) !=
339 DDI_SUCCESS)
340 return (-1);
341
342 off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_TBL_OFFSET);
343
344 if (off == PCI_EINVAL32)
345 return (-1);
346
347 return (off & PCI_MSIX_TBL_BIR_MASK);
348 }
349
350 static int
ppt_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)351 ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
352 size_t *maplen, uint_t model)
353 {
354 minor_t minor;
355 struct pptdev *ppt;
356 int err, bar;
357 uint_t ddireg;
358
359 minor = getminor(dev);
360
361 if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL)
362 return (ENXIO);
363
364 #ifdef _MULTI_DATAMODEL
365 if (ddi_model_convert_from(model) != DDI_MODEL_NONE)
366 return (ENXIO);
367 #endif
368
369 if (off < 0 || off != P2ALIGN(off, PAGESIZE))
370 return (EINVAL);
371
372 if ((bar = ppt_find_msix_table_bar(ppt)) == -1)
373 return (EINVAL);
374
375 ddireg = ppt->pptd_bars[bar].ddireg;
376
377 if (ddireg == 0)
378 return (EINVAL);
379
380 err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, ddireg, off, len,
381 PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr);
382
383 if (err == DDI_SUCCESS)
384 *maplen = len;
385
386 return (err);
387 }
388
389 static void
ppt_bar_wipe(struct pptdev * ppt)390 ppt_bar_wipe(struct pptdev *ppt)
391 {
392 uint_t i;
393
394 for (i = 0; i < PCI_BASE_NUM; i++) {
395 struct pptbar *pbar = &ppt->pptd_bars[i];
396 if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) {
397 ddi_regs_map_free(&pbar->io_handle);
398 }
399 }
400 bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars));
401 }
402
403 static int
ppt_bar_crawl(struct pptdev * ppt)404 ppt_bar_crawl(struct pptdev *ppt)
405 {
406 pci_regspec_t *regs;
407 uint_t rcount, i;
408 int err = 0, rlen;
409
410 if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS,
411 "assigned-addresses", (caddr_t)®s, &rlen) != DDI_PROP_SUCCESS) {
412 return (EIO);
413 }
414
415 VERIFY3S(rlen, >, 0);
416 rcount = rlen / sizeof (pci_regspec_t);
417 for (i = 0; i < rcount; i++) {
418 pci_regspec_t *reg = ®s[i];
419 struct pptbar *pbar;
420 uint_t bar, rnum;
421
422 DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg);
423 bar = PCI_REG_REG_G(reg->pci_phys_hi);
424 if (!BAR_VALID(bar)) {
425 continue;
426 }
427
428 rnum = BAR_TO_IDX(bar);
429 pbar = &ppt->pptd_bars[rnum];
430 /* is this somehow already populated? */
431 if (pbar->base != 0 || pbar->size != 0) {
432 err = EEXIST;
433 break;
434 }
435
436 /*
437 * Register 0 corresponds to the PCI config space.
438 * The registers which match the assigned-addresses list are
439 * offset by 1.
440 */
441 pbar->ddireg = i + 1;
442
443 pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK;
444 pbar->base = ((uint64_t)reg->pci_phys_mid << 32) |
445 (uint64_t)reg->pci_phys_low;
446 pbar->size = ((uint64_t)reg->pci_size_hi << 32) |
447 (uint64_t)reg->pci_size_low;
448 if (pbar->type == PCI_ADDR_IO) {
449 err = ddi_regs_map_setup(ppt->pptd_dip, rnum,
450 &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle);
451 if (err != 0) {
452 break;
453 }
454 }
455 }
456 kmem_free(regs, rlen);
457
458 if (err != 0) {
459 ppt_bar_wipe(ppt);
460 }
461 return (err);
462 }
463
464 static boolean_t
ppt_bar_verify_mmio(struct pptdev * ppt,uint64_t base,uint64_t size)465 ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size)
466 {
467 const uint64_t map_end = base + size;
468
469 /* Zero-length or overflow mappings are not valid */
470 if (map_end <= base) {
471 return (B_FALSE);
472 }
473 /* MMIO bounds should be page-aligned */
474 if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
475 return (B_FALSE);
476 }
477
478 for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
479 const struct pptbar *bar = &ppt->pptd_bars[i];
480 const uint64_t bar_end = bar->base + bar->size;
481
482 /* Only memory BARs can be mapped */
483 if (bar->type != PCI_ADDR_MEM32 &&
484 bar->type != PCI_ADDR_MEM64) {
485 continue;
486 }
487
488 /* Does the mapping fit within this BAR? */
489 if (base < bar->base || base >= bar_end ||
490 map_end < bar->base || map_end > bar_end) {
491 continue;
492 }
493
494 /* This BAR satisfies the provided map */
495 return (B_TRUE);
496 }
497 return (B_FALSE);
498 }
499
500 static boolean_t
ppt_toggle_bar(struct pptdev * ppt,boolean_t enable)501 ppt_toggle_bar(struct pptdev *ppt, boolean_t enable)
502 {
503 /*
504 * Enable/disable bus mastering and BAR decoding based on the BAR
505 * configuration. Bhyve emulates the COMMAND register so we won't see
506 * the bits changing there.
507 */
508 ddi_acc_handle_t hdl;
509 uint16_t cmd;
510
511 if (pci_config_setup(ppt->pptd_dip, &hdl) != DDI_SUCCESS)
512 return (B_FALSE);
513 cmd = pci_config_get16(hdl, PCI_CONF_COMM);
514
515 if (enable) {
516 cmd |= PCI_COMM_ME;
517
518 for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
519 const struct pptbar *bar = &ppt->pptd_bars[i];
520
521 switch (bar->type) {
522 case PCI_ADDR_MEM32:
523 case PCI_ADDR_MEM64:
524 cmd |= PCI_COMM_MAE;
525 break;
526 case PCI_ADDR_IO:
527 cmd |= PCI_COMM_IO;
528 break;
529 }
530 }
531 } else {
532 cmd &= ~(PCI_COMM_ME | PCI_COMM_MAE | PCI_COMM_IO);
533 }
534
535 pci_config_put16(hdl, PCI_CONF_COMM, cmd);
536 pci_config_teardown(&hdl);
537
538 return (B_TRUE);
539 }
540
541 static int
ppt_ddi_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)542 ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
543 {
544 struct pptdev *ppt = NULL;
545 char name[PPT_MAXNAMELEN];
546 int inst;
547
548 if (cmd != DDI_ATTACH)
549 return (DDI_FAILURE);
550
551 inst = ddi_get_instance(dip);
552
553 if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) {
554 goto fail;
555 }
556 VERIFY(ppt = ddi_get_soft_state(ppt_state, inst));
557 ppt->pptd_dip = dip;
558 ddi_set_driver_private(dip, ppt);
559
560 if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) {
561 goto fail;
562 }
563 if (ppt_bar_crawl(ppt) != 0) {
564 goto fail;
565 }
566 if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst,
567 DDI_PSEUDO, 0) != DDI_SUCCESS) {
568 goto fail;
569 }
570
571 ppt_toggle_bar(ppt, B_FALSE);
572
573 mutex_enter(&pptdev_mtx);
574 list_insert_tail(&pptdev_list, ppt);
575 mutex_exit(&pptdev_mtx);
576
577 return (DDI_SUCCESS);
578
579 fail:
580 if (ppt != NULL) {
581 ddi_remove_minor_node(dip, NULL);
582 if (ppt->pptd_cfg != NULL) {
583 pci_config_teardown(&ppt->pptd_cfg);
584 }
585 ppt_bar_wipe(ppt);
586 ddi_soft_state_free(ppt_state, inst);
587 }
588 return (DDI_FAILURE);
589 }
590
591 static int
ppt_ddi_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)592 ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
593 {
594 struct pptdev *ppt;
595 int inst;
596
597 if (cmd != DDI_DETACH)
598 return (DDI_FAILURE);
599
600 ppt = ddi_get_driver_private(dip);
601 inst = ddi_get_instance(dip);
602
603 ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt);
604
605 mutex_enter(&pptdev_mtx);
606 if (ppt->vm != NULL) {
607 mutex_exit(&pptdev_mtx);
608 return (DDI_FAILURE);
609 }
610 list_remove(&pptdev_list, ppt);
611 mutex_exit(&pptdev_mtx);
612
613 ddi_remove_minor_node(dip, PPT_MINOR_NAME);
614 ppt_bar_wipe(ppt);
615 pci_config_teardown(&ppt->pptd_cfg);
616 ddi_set_driver_private(dip, NULL);
617 ddi_soft_state_free(ppt_state, inst);
618
619 return (DDI_SUCCESS);
620 }
621
622 static int
ppt_ddi_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)623 ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
624 {
625 int error = DDI_FAILURE;
626 int inst = getminor((dev_t)arg);
627
628 switch (cmd) {
629 case DDI_INFO_DEVT2DEVINFO: {
630 struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst);
631
632 if (ppt != NULL) {
633 *result = (void *)ppt->pptd_dip;
634 error = DDI_SUCCESS;
635 }
636 break;
637 }
638 case DDI_INFO_DEVT2INSTANCE: {
639 *result = (void *)(uintptr_t)inst;
640 error = DDI_SUCCESS;
641 break;
642 }
643 default:
644 break;
645 }
646 return (error);
647 }
648
649 static struct cb_ops ppt_cb_ops = {
650 ppt_open,
651 nulldev, /* close */
652 nodev, /* strategy */
653 nodev, /* print */
654 nodev, /* dump */
655 nodev, /* read */
656 nodev, /* write */
657 ppt_ioctl,
658 ppt_devmap, /* devmap */
659 NULL, /* mmap */
660 NULL, /* segmap */
661 nochpoll, /* poll */
662 ddi_prop_op,
663 NULL,
664 D_NEW | D_MP | D_64BIT | D_DEVMAP,
665 CB_REV
666 };
667
668 static struct dev_ops ppt_ops = {
669 DEVO_REV,
670 0,
671 ppt_ddi_info,
672 nulldev, /* identify */
673 nulldev, /* probe */
674 ppt_ddi_attach,
675 ppt_ddi_detach,
676 nodev, /* reset */
677 &ppt_cb_ops,
678 (struct bus_ops *)NULL
679 };
680
681 static struct modldrv modldrv = {
682 &mod_driverops,
683 "bhyve pci pass-thru",
684 &ppt_ops
685 };
686
687 static struct modlinkage modlinkage = {
688 MODREV_1,
689 &modldrv,
690 NULL
691 };
692
693 int
_init(void)694 _init(void)
695 {
696 int error;
697
698 mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL);
699 list_create(&pptdev_list, sizeof (struct pptdev),
700 offsetof(struct pptdev, pptd_node));
701
702 error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0);
703 if (error) {
704 goto fail;
705 }
706
707 error = mod_install(&modlinkage);
708
709 ppt_major = ddi_name_to_major("ppt");
710 fail:
711 if (error) {
712 ddi_soft_state_fini(&ppt_state);
713 }
714 return (error);
715 }
716
717 int
_fini(void)718 _fini(void)
719 {
720 int error;
721
722 error = mod_remove(&modlinkage);
723 if (error)
724 return (error);
725 ddi_soft_state_fini(&ppt_state);
726
727 return (0);
728 }
729
730 int
_info(struct modinfo * modinfop)731 _info(struct modinfo *modinfop)
732 {
733 return (mod_info(&modlinkage, modinfop));
734 }
735
736 static boolean_t
ppt_wait_for_pending_txn(dev_info_t * dip,uint_t max_delay_us)737 ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us)
738 {
739 uint16_t cap_ptr, devsts;
740 ddi_acc_handle_t hdl;
741
742 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
743 return (B_FALSE);
744
745 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) {
746 pci_config_teardown(&hdl);
747 return (B_FALSE);
748 }
749
750 devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
751 while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) {
752 if (max_delay_us == 0) {
753 pci_config_teardown(&hdl);
754 return (B_FALSE);
755 }
756
757 /* Poll once every 100 milliseconds up to the timeout. */
758 if (max_delay_us > 100000) {
759 delay(drv_usectohz(100000));
760 max_delay_us -= 100000;
761 } else {
762 delay(drv_usectohz(max_delay_us));
763 max_delay_us = 0;
764 }
765 devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
766 }
767
768 pci_config_teardown(&hdl);
769 return (B_TRUE);
770 }
771
772 static uint_t
ppt_max_completion_tmo_us(dev_info_t * dip)773 ppt_max_completion_tmo_us(dev_info_t *dip)
774 {
775 uint_t timo = 0;
776 uint16_t cap_ptr;
777 ddi_acc_handle_t hdl;
778 uint_t timo_ranges[] = { /* timeout ranges */
779 50000, /* 50ms */
780 100, /* 100us */
781 10000, /* 10ms */
782 0,
783 0,
784 55000, /* 55ms */
785 210000, /* 210ms */
786 0,
787 0,
788 900000, /* 900ms */
789 3500000, /* 3.5s */
790 0,
791 0,
792 13000000, /* 13s */
793 64000000, /* 64s */
794 0
795 };
796
797 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
798 return (50000); /* default 50ms */
799
800 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
801 goto out;
802
803 if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) &
804 PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0)
805 goto out;
806
807 if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP2) &
808 PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0)
809 goto out;
810
811 timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) &
812 PCIE_DEVCAP2_COM_TO_RANGE_MASK];
813
814 out:
815 if (timo == 0)
816 timo = 50000; /* default 50ms */
817
818 pci_config_teardown(&hdl);
819 return (timo);
820 }
821
822 static boolean_t
ppt_flr(dev_info_t * dip,boolean_t force)823 ppt_flr(dev_info_t *dip, boolean_t force)
824 {
825 uint16_t cap_ptr, ctl, cmd;
826 ddi_acc_handle_t hdl;
827 uint_t compl_delay = 0, max_delay_us;
828
829 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
830 return (B_FALSE);
831
832 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
833 goto fail;
834
835 if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR)
836 == 0)
837 goto fail;
838
839 max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000);
840
841 /*
842 * Disable busmastering to prevent generation of new transactions while
843 * waiting for the device to go idle. If the idle timeout fails, the
844 * command register is restored which will re-enable busmastering.
845 */
846 cmd = pci_config_get16(hdl, PCI_CONF_COMM);
847 pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME);
848 if (!ppt_wait_for_pending_txn(dip, max_delay_us)) {
849 if (!force) {
850 pci_config_put16(hdl, PCI_CONF_COMM, cmd);
851 goto fail;
852 }
853 dev_err(dip, CE_WARN,
854 "?Resetting with transactions pending after %u us\n",
855 max_delay_us);
856
857 /*
858 * Extend the post-FLR delay to cover the maximum Completion
859 * Timeout delay of anything in flight during the FLR delay.
860 * Enforce a minimum delay of at least 10ms.
861 */
862 compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000));
863 }
864
865 /* Initiate the reset. */
866 ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL);
867 (void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL,
868 ctl | PCIE_DEVCTL_INITIATE_FLR);
869
870 /* Wait for at least 100ms */
871 delay(drv_usectohz((100 + compl_delay) * 1000));
872
873 pci_config_teardown(&hdl);
874 return (B_TRUE);
875
876 fail:
877 /*
878 * TODO: If the FLR fails for some reason, we should attempt a reset
879 * using the PCI power management facilities (if possible).
880 */
881 pci_config_teardown(&hdl);
882 return (B_FALSE);
883 }
884
885 static int
ppt_findf(struct vm * vm,int fd,struct pptdev ** pptp)886 ppt_findf(struct vm *vm, int fd, struct pptdev **pptp)
887 {
888 struct pptdev *ppt = NULL;
889 file_t *fp;
890 vattr_t va;
891 int err = 0;
892
893 ASSERT(MUTEX_HELD(&pptdev_mtx));
894
895 if ((fp = getf(fd)) == NULL)
896 return (EBADF);
897
898 va.va_mask = AT_RDEV;
899 if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 ||
900 getmajor(va.va_rdev) != ppt_major) {
901 err = EBADF;
902 goto fail;
903 }
904
905 ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev));
906
907 if (ppt == NULL) {
908 err = EBADF;
909 goto fail;
910 }
911
912 if (ppt->vm != vm) {
913 err = EBUSY;
914 goto fail;
915 }
916
917 *pptp = ppt;
918 return (0);
919
920 fail:
921 releasef(fd);
922 return (err);
923 }
924
925 static void
ppt_unmap_all_mmio(struct vm * vm,struct pptdev * ppt)926 ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
927 {
928 int i;
929 struct pptseg *seg;
930
931 for (i = 0; i < MAX_MMIOSEGS; i++) {
932 seg = &ppt->mmio[i];
933 if (seg->len == 0)
934 continue;
935 (void) vm_unmap_mmio(vm, seg->gpa, seg->len);
936 bzero(seg, sizeof (struct pptseg));
937 }
938 }
939
940 static void
ppt_teardown_msi(struct pptdev * ppt)941 ppt_teardown_msi(struct pptdev *ppt)
942 {
943 int i;
944
945 if (ppt->msi.num_msgs == 0)
946 return;
947
948 for (i = 0; i < ppt->msi.num_msgs; i++) {
949 int intr_cap;
950
951 (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
952 if (intr_cap & DDI_INTR_FLAG_BLOCK)
953 ddi_intr_block_disable(&ppt->msi.inth[i], 1);
954 else
955 ddi_intr_disable(ppt->msi.inth[i]);
956
957 ddi_intr_remove_handler(ppt->msi.inth[i]);
958 ddi_intr_free(ppt->msi.inth[i]);
959
960 ppt->msi.inth[i] = NULL;
961 }
962
963 kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
964 ppt->msi.inth = NULL;
965 ppt->msi.inth_sz = 0;
966 ppt->msi.is_fixed = B_FALSE;
967
968 ppt->msi.num_msgs = 0;
969 }
970
971 static void
ppt_teardown_msix_intr(struct pptdev * ppt,int idx)972 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
973 {
974 if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) {
975 int intr_cap;
976
977 (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
978 if (intr_cap & DDI_INTR_FLAG_BLOCK)
979 ddi_intr_block_disable(&ppt->msix.inth[idx], 1);
980 else
981 ddi_intr_disable(ppt->msix.inth[idx]);
982
983 ddi_intr_remove_handler(ppt->msix.inth[idx]);
984 }
985 }
986
987 static void
ppt_teardown_msix(struct pptdev * ppt)988 ppt_teardown_msix(struct pptdev *ppt)
989 {
990 uint_t i;
991
992 if (ppt->msix.num_msgs == 0)
993 return;
994
995 for (i = 0; i < ppt->msix.num_msgs; i++)
996 ppt_teardown_msix_intr(ppt, i);
997
998 if (ppt->msix.inth) {
999 for (i = 0; i < ppt->msix.num_msgs; i++)
1000 ddi_intr_free(ppt->msix.inth[i]);
1001 kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
1002 ppt->msix.inth = NULL;
1003 ppt->msix.inth_sz = 0;
1004 kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
1005 ppt->msix.arg = NULL;
1006 ppt->msix.arg_sz = 0;
1007 }
1008
1009 ppt->msix.num_msgs = 0;
1010 }
1011
1012 int
ppt_assigned_devices(struct vm * vm)1013 ppt_assigned_devices(struct vm *vm)
1014 {
1015 struct pptdev *ppt;
1016 uint_t num = 0;
1017
1018 mutex_enter(&pptdev_mtx);
1019 for (ppt = list_head(&pptdev_list); ppt != NULL;
1020 ppt = list_next(&pptdev_list, ppt)) {
1021 if (ppt->vm == vm) {
1022 num++;
1023 }
1024 }
1025 mutex_exit(&pptdev_mtx);
1026 return (num);
1027 }
1028
1029 boolean_t
ppt_is_mmio(struct vm * vm,vm_paddr_t gpa)1030 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
1031 {
1032 struct pptdev *ppt = list_head(&pptdev_list);
1033
1034 /* XXX: this should probably be restructured to avoid the lock */
1035 mutex_enter(&pptdev_mtx);
1036 for (ppt = list_head(&pptdev_list); ppt != NULL;
1037 ppt = list_next(&pptdev_list, ppt)) {
1038 if (ppt->vm != vm) {
1039 continue;
1040 }
1041
1042 for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1043 struct pptseg *seg = &ppt->mmio[i];
1044
1045 if (seg->len == 0)
1046 continue;
1047 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) {
1048 mutex_exit(&pptdev_mtx);
1049 return (B_TRUE);
1050 }
1051 }
1052 }
1053
1054 mutex_exit(&pptdev_mtx);
1055 return (B_FALSE);
1056 }
1057
1058 int
ppt_assign_device(struct vm * vm,int pptfd)1059 ppt_assign_device(struct vm *vm, int pptfd)
1060 {
1061 struct pptdev *ppt;
1062 int err = 0;
1063
1064 mutex_enter(&pptdev_mtx);
1065 /* Passing NULL requires the device to be unowned. */
1066 err = ppt_findf(NULL, pptfd, &ppt);
1067 if (err != 0) {
1068 mutex_exit(&pptdev_mtx);
1069 return (err);
1070 }
1071
1072 if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1073 err = EIO;
1074 goto done;
1075 }
1076 ppt_flr(ppt->pptd_dip, B_TRUE);
1077
1078 /*
1079 * Restore the device state after reset and then perform another save
1080 * so the "pristine" state can be restored when the device is removed
1081 * from the guest.
1082 */
1083 if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS ||
1084 pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1085 err = EIO;
1086 goto done;
1087 }
1088
1089 ppt_toggle_bar(ppt, B_TRUE);
1090
1091 ppt->vm = vm;
1092 iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1093 iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1094 pf_set_passthru(ppt->pptd_dip, B_TRUE);
1095
1096 done:
1097 releasef(pptfd);
1098 mutex_exit(&pptdev_mtx);
1099 return (err);
1100 }
1101
1102 static void
ppt_reset_pci_power_state(dev_info_t * dip)1103 ppt_reset_pci_power_state(dev_info_t *dip)
1104 {
1105 ddi_acc_handle_t cfg;
1106 uint16_t cap_ptr;
1107
1108 if (pci_config_setup(dip, &cfg) != DDI_SUCCESS)
1109 return;
1110
1111 if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) {
1112 uint16_t val;
1113
1114 val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR);
1115 if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
1116 val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0;
1117 (void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR,
1118 val);
1119 }
1120 }
1121
1122 pci_config_teardown(&cfg);
1123 }
1124
1125 static void
ppt_do_unassign(struct pptdev * ppt)1126 ppt_do_unassign(struct pptdev *ppt)
1127 {
1128 struct vm *vm = ppt->vm;
1129
1130 ASSERT3P(vm, !=, NULL);
1131 ASSERT(MUTEX_HELD(&pptdev_mtx));
1132
1133 ppt_flr(ppt->pptd_dip, B_TRUE);
1134
1135 /*
1136 * Restore from the state saved during device assignment.
1137 * If the device power state has been altered, that must be remedied
1138 * first, as it will reset register state during the transition.
1139 */
1140 ppt_reset_pci_power_state(ppt->pptd_dip);
1141 (void) pci_restore_config_regs(ppt->pptd_dip);
1142
1143 pf_set_passthru(ppt->pptd_dip, B_FALSE);
1144
1145 ppt_unmap_all_mmio(vm, ppt);
1146 ppt_teardown_msi(ppt);
1147 ppt_teardown_msix(ppt);
1148 iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1149 iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1150 ppt->vm = NULL;
1151 }
1152
1153 int
ppt_unassign_device(struct vm * vm,int pptfd)1154 ppt_unassign_device(struct vm *vm, int pptfd)
1155 {
1156 struct pptdev *ppt;
1157 int err = 0;
1158
1159 mutex_enter(&pptdev_mtx);
1160 err = ppt_findf(vm, pptfd, &ppt);
1161 if (err != 0) {
1162 mutex_exit(&pptdev_mtx);
1163 return (err);
1164 }
1165
1166 ppt_do_unassign(ppt);
1167
1168 releasef(pptfd);
1169 mutex_exit(&pptdev_mtx);
1170 return (err);
1171 }
1172
1173 void
ppt_unassign_all(struct vm * vm)1174 ppt_unassign_all(struct vm *vm)
1175 {
1176 struct pptdev *ppt;
1177
1178 mutex_enter(&pptdev_mtx);
1179 for (ppt = list_head(&pptdev_list); ppt != NULL;
1180 ppt = list_next(&pptdev_list, ppt)) {
1181 if (ppt->vm == vm) {
1182 ppt_do_unassign(ppt);
1183 }
1184 }
1185 mutex_exit(&pptdev_mtx);
1186 }
1187
1188 int
ppt_map_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len,vm_paddr_t hpa)1189 ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
1190 vm_paddr_t hpa)
1191 {
1192 struct pptdev *ppt;
1193 int err = 0;
1194
1195 if ((len & PAGEOFFSET) != 0 || len == 0 || (gpa & PAGEOFFSET) != 0 ||
1196 (hpa & PAGEOFFSET) != 0 || gpa + len < gpa || hpa + len < hpa) {
1197 return (EINVAL);
1198 }
1199
1200 mutex_enter(&pptdev_mtx);
1201 err = ppt_findf(vm, pptfd, &ppt);
1202 if (err != 0) {
1203 mutex_exit(&pptdev_mtx);
1204 return (err);
1205 }
1206
1207 /*
1208 * Ensure that the host-physical range of the requested mapping fits
1209 * within one of the MMIO BARs of the device.
1210 */
1211 if (!ppt_bar_verify_mmio(ppt, hpa, len)) {
1212 err = EINVAL;
1213 goto done;
1214 }
1215
1216 for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1217 struct pptseg *seg = &ppt->mmio[i];
1218
1219 if (seg->len == 0) {
1220 err = vm_map_mmio(vm, gpa, len, hpa);
1221 if (err == 0) {
1222 seg->gpa = gpa;
1223 seg->len = len;
1224 }
1225 goto done;
1226 }
1227 }
1228 err = ENOSPC;
1229
1230 done:
1231 releasef(pptfd);
1232 mutex_exit(&pptdev_mtx);
1233 return (err);
1234 }
1235
1236 int
ppt_unmap_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len)1237 ppt_unmap_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len)
1238 {
1239 struct pptdev *ppt;
1240 int err = 0;
1241 uint_t i;
1242
1243 mutex_enter(&pptdev_mtx);
1244 err = ppt_findf(vm, pptfd, &ppt);
1245 if (err != 0) {
1246 mutex_exit(&pptdev_mtx);
1247 return (err);
1248 }
1249
1250 for (i = 0; i < MAX_MMIOSEGS; i++) {
1251 struct pptseg *seg = &ppt->mmio[i];
1252
1253 if (seg->gpa == gpa && seg->len == len) {
1254 err = vm_unmap_mmio(vm, seg->gpa, seg->len);
1255 if (err == 0) {
1256 seg->gpa = 0;
1257 seg->len = 0;
1258 }
1259 goto out;
1260 }
1261 }
1262 err = ENOENT;
1263 out:
1264 releasef(pptfd);
1265 mutex_exit(&pptdev_mtx);
1266 return (err);
1267 }
1268
1269 static uint_t
pptintr(caddr_t arg,caddr_t unused)1270 pptintr(caddr_t arg, caddr_t unused)
1271 {
1272 struct pptintr_arg *pptarg = (struct pptintr_arg *)arg;
1273 struct pptdev *ppt = pptarg->pptdev;
1274
1275 if (ppt->vm != NULL) {
1276 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
1277 } else {
1278 /*
1279 * XXX
1280 * This is not expected to happen - panic?
1281 */
1282 }
1283
1284 /*
1285 * For legacy interrupts give other filters a chance in case
1286 * the interrupt was not generated by the passthrough device.
1287 */
1288 return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
1289 }
1290
1291 int
ppt_setup_msi(struct vm * vm,int vcpu,int pptfd,uint64_t addr,uint64_t msg,int numvec)1292 ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg,
1293 int numvec)
1294 {
1295 int i, msi_count, intr_type;
1296 struct pptdev *ppt;
1297 int err = 0;
1298
1299 if (numvec < 0 || numvec > MAX_MSIMSGS)
1300 return (EINVAL);
1301
1302 mutex_enter(&pptdev_mtx);
1303 err = ppt_findf(vm, pptfd, &ppt);
1304 if (err != 0) {
1305 mutex_exit(&pptdev_mtx);
1306 return (err);
1307 }
1308
1309 /* Reject attempts to enable MSI while MSI-X is active. */
1310 if (ppt->msix.num_msgs != 0 && numvec != 0) {
1311 err = EBUSY;
1312 goto done;
1313 }
1314
1315 /* Free any allocated resources */
1316 ppt_teardown_msi(ppt);
1317
1318 if (numvec == 0) {
1319 /* nothing more to do */
1320 goto done;
1321 }
1322
1323 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1324 &msi_count) != DDI_SUCCESS) {
1325 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED,
1326 &msi_count) != DDI_SUCCESS) {
1327 err = EINVAL;
1328 goto done;
1329 }
1330
1331 intr_type = DDI_INTR_TYPE_FIXED;
1332 ppt->msi.is_fixed = B_TRUE;
1333 } else {
1334 intr_type = DDI_INTR_TYPE_MSI;
1335 }
1336
1337 /*
1338 * The device must be capable of supporting the number of vectors
1339 * the guest wants to allocate.
1340 */
1341 if (numvec > msi_count) {
1342 err = EINVAL;
1343 goto done;
1344 }
1345
1346 ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1347 ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP);
1348 if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0,
1349 numvec, &msi_count, 0) != DDI_SUCCESS) {
1350 kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
1351 err = EINVAL;
1352 goto done;
1353 }
1354
1355 /* Verify that we got as many vectors as the guest requested */
1356 if (numvec != msi_count) {
1357 ppt_teardown_msi(ppt);
1358 err = EINVAL;
1359 goto done;
1360 }
1361
1362 /* Set up & enable interrupt handler for each vector. */
1363 for (i = 0; i < numvec; i++) {
1364 int res, intr_cap = 0;
1365
1366 ppt->msi.num_msgs = i + 1;
1367 ppt->msi.arg[i].pptdev = ppt;
1368 ppt->msi.arg[i].addr = addr;
1369 ppt->msi.arg[i].msg_data = msg + i;
1370
1371 if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr,
1372 &ppt->msi.arg[i], NULL) != DDI_SUCCESS)
1373 break;
1374
1375 (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
1376 if (intr_cap & DDI_INTR_FLAG_BLOCK)
1377 res = ddi_intr_block_enable(&ppt->msi.inth[i], 1);
1378 else
1379 res = ddi_intr_enable(ppt->msi.inth[i]);
1380
1381 if (res != DDI_SUCCESS)
1382 break;
1383 }
1384 if (i < numvec) {
1385 ppt_teardown_msi(ppt);
1386 err = ENXIO;
1387 }
1388
1389 done:
1390 releasef(pptfd);
1391 mutex_exit(&pptdev_mtx);
1392 return (err);
1393 }
1394
1395 int
ppt_setup_msix(struct vm * vm,int vcpu,int pptfd,int idx,uint64_t addr,uint64_t msg,uint32_t vector_control)1396 ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
1397 uint64_t msg, uint32_t vector_control)
1398 {
1399 struct pptdev *ppt;
1400 int numvec, alloced;
1401 int err = 0;
1402
1403 mutex_enter(&pptdev_mtx);
1404 err = ppt_findf(vm, pptfd, &ppt);
1405 if (err != 0) {
1406 mutex_exit(&pptdev_mtx);
1407 return (err);
1408 }
1409
1410 /* Reject attempts to enable MSI-X while MSI is active. */
1411 if (ppt->msi.num_msgs != 0) {
1412 err = EBUSY;
1413 goto done;
1414 }
1415
1416 /*
1417 * First-time configuration:
1418 * Allocate the MSI-X table
1419 * Allocate the IRQ resources
1420 * Set up some variables in ppt->msix
1421 */
1422 if (ppt->msix.num_msgs == 0) {
1423 dev_info_t *dip = ppt->pptd_dip;
1424
1425 if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX,
1426 &numvec) != DDI_SUCCESS) {
1427 err = EINVAL;
1428 goto done;
1429 }
1430
1431 ppt->msix.num_msgs = numvec;
1432
1433 ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]);
1434 ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP);
1435 ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1436 ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP);
1437
1438 if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0,
1439 numvec, &alloced, 0) != DDI_SUCCESS) {
1440 kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
1441 kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
1442 ppt->msix.arg = NULL;
1443 ppt->msix.inth = NULL;
1444 ppt->msix.arg_sz = ppt->msix.inth_sz = 0;
1445 err = EINVAL;
1446 goto done;
1447 }
1448
1449 if (numvec != alloced) {
1450 ppt_teardown_msix(ppt);
1451 err = EINVAL;
1452 goto done;
1453 }
1454 }
1455
1456 if (idx >= ppt->msix.num_msgs) {
1457 err = EINVAL;
1458 goto done;
1459 }
1460
1461 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1462 int intr_cap, res;
1463
1464 /* Tear down the IRQ if it's already set up */
1465 ppt_teardown_msix_intr(ppt, idx);
1466
1467 ppt->msix.arg[idx].pptdev = ppt;
1468 ppt->msix.arg[idx].addr = addr;
1469 ppt->msix.arg[idx].msg_data = msg;
1470
1471 /* Setup the MSI-X interrupt */
1472 if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr,
1473 &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) {
1474 err = ENXIO;
1475 goto done;
1476 }
1477
1478 (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
1479 if (intr_cap & DDI_INTR_FLAG_BLOCK)
1480 res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1);
1481 else
1482 res = ddi_intr_enable(ppt->msix.inth[idx]);
1483
1484 if (res != DDI_SUCCESS) {
1485 ddi_intr_remove_handler(ppt->msix.inth[idx]);
1486 err = ENXIO;
1487 goto done;
1488 }
1489 } else {
1490 /* Masked, tear it down if it's already been set up */
1491 ppt_teardown_msix_intr(ppt, idx);
1492 }
1493
1494 done:
1495 releasef(pptfd);
1496 mutex_exit(&pptdev_mtx);
1497 return (err);
1498 }
1499
1500 int
ppt_get_limits(struct vm * vm,int pptfd,int * msilimit,int * msixlimit)1501 ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit)
1502 {
1503 struct pptdev *ppt;
1504 int err = 0;
1505
1506 mutex_enter(&pptdev_mtx);
1507 err = ppt_findf(vm, pptfd, &ppt);
1508 if (err != 0) {
1509 mutex_exit(&pptdev_mtx);
1510 return (err);
1511 }
1512
1513 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1514 msilimit) != DDI_SUCCESS) {
1515 *msilimit = -1;
1516 }
1517 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX,
1518 msixlimit) != DDI_SUCCESS) {
1519 *msixlimit = -1;
1520 }
1521
1522 releasef(pptfd);
1523 mutex_exit(&pptdev_mtx);
1524 return (err);
1525 }
1526
1527 int
ppt_disable_msix(struct vm * vm,int pptfd)1528 ppt_disable_msix(struct vm *vm, int pptfd)
1529 {
1530 struct pptdev *ppt;
1531 int err = 0;
1532
1533 mutex_enter(&pptdev_mtx);
1534 err = ppt_findf(vm, pptfd, &ppt);
1535 if (err != 0) {
1536 mutex_exit(&pptdev_mtx);
1537 return (err);
1538 }
1539
1540 ppt_teardown_msix(ppt);
1541
1542 releasef(pptfd);
1543 mutex_exit(&pptdev_mtx);
1544 return (err);
1545 }
1546