1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright 2019 Joyent, Inc.
31 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
32 */
33
34 #include <sys/cdefs.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/kmem.h>
40 #include <sys/module.h>
41 #include <sys/bus.h>
42 #include <sys/pciio.h>
43 #include <sys/sysctl.h>
44
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50
51 #include <sys/conf.h>
52 #include <sys/ddi.h>
53 #include <sys/stat.h>
54 #include <sys/sunddi.h>
55 #include <sys/pci.h>
56 #include <sys/pci_cap.h>
57 #include <sys/pcie_impl.h>
58 #include <sys/ppt_dev.h>
59 #include <sys/mkdev.h>
60 #include <sys/sysmacros.h>
61
62 #include "vmm_lapic.h"
63
64 #include "iommu.h"
65 #include "ppt.h"
66
67 #define MAX_MSIMSGS 32
68
69 /*
70 * If the MSI-X table is located in the middle of a BAR then that MMIO
71 * region gets split into two segments - one segment above the MSI-X table
72 * and the other segment below the MSI-X table - with a hole in place of
73 * the MSI-X table so accesses to it can be trapped and emulated.
74 *
75 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
76 */
77 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
78
79 struct pptintr_arg {
80 struct pptdev *pptdev;
81 uint64_t addr;
82 uint64_t msg_data;
83 };
84
85 struct pptseg {
86 vm_paddr_t gpa;
87 size_t len;
88 int wired;
89 };
90
91 struct pptbar {
92 uint64_t base;
93 uint64_t size;
94 uint_t type;
95 ddi_acc_handle_t io_handle;
96 caddr_t io_ptr;
97 uint_t ddireg;
98 };
99
100 struct pptdev {
101 dev_info_t *pptd_dip;
102 list_node_t pptd_node;
103 ddi_acc_handle_t pptd_cfg;
104 struct pptbar pptd_bars[PCI_BASE_NUM];
105 struct vm *vm;
106 struct pptseg mmio[MAX_MMIOSEGS];
107 struct {
108 int num_msgs; /* guest state */
109 boolean_t is_fixed;
110 size_t inth_sz;
111 ddi_intr_handle_t *inth;
112 struct pptintr_arg arg[MAX_MSIMSGS];
113 } msi;
114
115 struct {
116 int num_msgs;
117 size_t inth_sz;
118 size_t arg_sz;
119 ddi_intr_handle_t *inth;
120 struct pptintr_arg *arg;
121 } msix;
122 };
123
124
125 static major_t ppt_major;
126 static void *ppt_state;
127 static kmutex_t pptdev_mtx;
128 static list_t pptdev_list;
129
130 #define PPT_MINOR_NAME "ppt"
131
132 static ddi_device_acc_attr_t ppt_attr = {
133 DDI_DEVICE_ATTR_V0,
134 DDI_NEVERSWAP_ACC,
135 DDI_STORECACHING_OK_ACC,
136 DDI_DEFAULT_ACC
137 };
138
139 static int
ppt_open(dev_t * devp,int flag,int otyp,cred_t * cr)140 ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr)
141 {
142 /* XXX: require extra privs? */
143 return (0);
144 }
145
146 #define BAR_TO_IDX(bar) (((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32)
147 #define BAR_VALID(b) ( \
148 (b) >= PCI_CONF_BASE0 && \
149 (b) <= PCI_CONF_BASE5 && \
150 ((b) & (PCI_BAR_SZ_32-1)) == 0)
151
152 static int
ppt_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)153 ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
154 {
155 minor_t minor = getminor(dev);
156 struct pptdev *ppt;
157 void *data = (void *)arg;
158
159 if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) {
160 return (ENOENT);
161 }
162
163 switch (cmd) {
164 case PPT_CFG_READ: {
165 struct ppt_cfg_io cio;
166 ddi_acc_handle_t cfg = ppt->pptd_cfg;
167
168 if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
169 return (EFAULT);
170 }
171 switch (cio.pci_width) {
172 case 4:
173 cio.pci_data = pci_config_get32(cfg, cio.pci_off);
174 break;
175 case 2:
176 cio.pci_data = pci_config_get16(cfg, cio.pci_off);
177 break;
178 case 1:
179 cio.pci_data = pci_config_get8(cfg, cio.pci_off);
180 break;
181 default:
182 return (EINVAL);
183 }
184
185 if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) {
186 return (EFAULT);
187 }
188 return (0);
189 }
190 case PPT_CFG_WRITE: {
191 struct ppt_cfg_io cio;
192 ddi_acc_handle_t cfg = ppt->pptd_cfg;
193
194 if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
195 return (EFAULT);
196 }
197 switch (cio.pci_width) {
198 case 4:
199 pci_config_put32(cfg, cio.pci_off, cio.pci_data);
200 break;
201 case 2:
202 pci_config_put16(cfg, cio.pci_off, cio.pci_data);
203 break;
204 case 1:
205 pci_config_put8(cfg, cio.pci_off, cio.pci_data);
206 break;
207 default:
208 return (EINVAL);
209 }
210
211 return (0);
212 }
213 case PPT_BAR_QUERY: {
214 struct ppt_bar_query barg;
215 struct pptbar *pbar;
216
217 if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) {
218 return (EFAULT);
219 }
220 if (barg.pbq_baridx >= PCI_BASE_NUM) {
221 return (EINVAL);
222 }
223 pbar = &ppt->pptd_bars[barg.pbq_baridx];
224
225 if (pbar->base == 0 || pbar->size == 0) {
226 return (ENOENT);
227 }
228 barg.pbq_type = pbar->type;
229 barg.pbq_base = pbar->base;
230 barg.pbq_size = pbar->size;
231
232 if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) {
233 return (EFAULT);
234 }
235 return (0);
236 }
237 case PPT_BAR_READ: {
238 struct ppt_bar_io bio;
239 struct pptbar *pbar;
240 void *addr;
241 uint_t rnum;
242 ddi_acc_handle_t cfg;
243
244 if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
245 return (EFAULT);
246 }
247 rnum = bio.pbi_bar;
248 if (rnum >= PCI_BASE_NUM) {
249 return (EINVAL);
250 }
251 pbar = &ppt->pptd_bars[rnum];
252 if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
253 return (EINVAL);
254 }
255 addr = pbar->io_ptr + bio.pbi_off;
256
257 switch (bio.pbi_width) {
258 case 4:
259 bio.pbi_data = ddi_get32(pbar->io_handle, addr);
260 break;
261 case 2:
262 bio.pbi_data = ddi_get16(pbar->io_handle, addr);
263 break;
264 case 1:
265 bio.pbi_data = ddi_get8(pbar->io_handle, addr);
266 break;
267 default:
268 return (EINVAL);
269 }
270
271 if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) {
272 return (EFAULT);
273 }
274 return (0);
275 }
276 case PPT_BAR_WRITE: {
277 struct ppt_bar_io bio;
278 struct pptbar *pbar;
279 void *addr;
280 uint_t rnum;
281 ddi_acc_handle_t cfg;
282
283 if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
284 return (EFAULT);
285 }
286 rnum = bio.pbi_bar;
287 if (rnum >= PCI_BASE_NUM) {
288 return (EINVAL);
289 }
290 pbar = &ppt->pptd_bars[rnum];
291 if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
292 return (EINVAL);
293 }
294 addr = pbar->io_ptr + bio.pbi_off;
295
296 switch (bio.pbi_width) {
297 case 4:
298 ddi_put32(pbar->io_handle, addr, bio.pbi_data);
299 break;
300 case 2:
301 ddi_put16(pbar->io_handle, addr, bio.pbi_data);
302 break;
303 case 1:
304 ddi_put8(pbar->io_handle, addr, bio.pbi_data);
305 break;
306 default:
307 return (EINVAL);
308 }
309
310 return (0);
311 }
312
313 default:
314 return (ENOTTY);
315 }
316
317 return (0);
318 }
319
320 static int
ppt_find_msix_table_bar(struct pptdev * ppt)321 ppt_find_msix_table_bar(struct pptdev *ppt)
322 {
323 uint16_t base;
324 uint32_t off;
325
326 if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) !=
327 DDI_SUCCESS)
328 return (-1);
329
330 off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_TBL_OFFSET);
331
332 if (off == PCI_EINVAL32)
333 return (-1);
334
335 return (off & PCI_MSIX_TBL_BIR_MASK);
336 }
337
338 static int
ppt_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)339 ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
340 size_t *maplen, uint_t model)
341 {
342 minor_t minor;
343 struct pptdev *ppt;
344 int err, bar;
345 uint_t ddireg;
346
347 minor = getminor(dev);
348
349 if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL)
350 return (ENXIO);
351
352 #ifdef _MULTI_DATAMODEL
353 if (ddi_model_convert_from(model) != DDI_MODEL_NONE)
354 return (ENXIO);
355 #endif
356
357 if (off < 0 || off != P2ALIGN(off, PAGESIZE))
358 return (EINVAL);
359
360 if ((bar = ppt_find_msix_table_bar(ppt)) == -1)
361 return (EINVAL);
362
363 ddireg = ppt->pptd_bars[bar].ddireg;
364
365 if (ddireg == 0)
366 return (EINVAL);
367
368 err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, ddireg, off, len,
369 PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr);
370
371 if (err == DDI_SUCCESS)
372 *maplen = len;
373
374 return (err);
375 }
376
377 static void
ppt_bar_wipe(struct pptdev * ppt)378 ppt_bar_wipe(struct pptdev *ppt)
379 {
380 uint_t i;
381
382 for (i = 0; i < PCI_BASE_NUM; i++) {
383 struct pptbar *pbar = &ppt->pptd_bars[i];
384 if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) {
385 ddi_regs_map_free(&pbar->io_handle);
386 }
387 }
388 bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars));
389 }
390
391 static int
ppt_bar_crawl(struct pptdev * ppt)392 ppt_bar_crawl(struct pptdev *ppt)
393 {
394 pci_regspec_t *regs;
395 uint_t rcount, i;
396 int err = 0, rlen;
397
398 if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS,
399 "assigned-addresses", (caddr_t)®s, &rlen) != DDI_PROP_SUCCESS) {
400 return (EIO);
401 }
402
403 VERIFY3S(rlen, >, 0);
404 rcount = rlen / sizeof (pci_regspec_t);
405 for (i = 0; i < rcount; i++) {
406 pci_regspec_t *reg = ®s[i];
407 struct pptbar *pbar;
408 uint_t bar, rnum;
409
410 DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg);
411 bar = PCI_REG_REG_G(reg->pci_phys_hi);
412 if (!BAR_VALID(bar)) {
413 continue;
414 }
415
416 rnum = BAR_TO_IDX(bar);
417 pbar = &ppt->pptd_bars[rnum];
418 /* is this somehow already populated? */
419 if (pbar->base != 0 || pbar->size != 0) {
420 err = EEXIST;
421 break;
422 }
423
424 /*
425 * Register 0 corresponds to the PCI config space.
426 * The registers which match the assigned-addresses list are
427 * offset by 1.
428 */
429 pbar->ddireg = i + 1;
430
431 pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK;
432 pbar->base = ((uint64_t)reg->pci_phys_mid << 32) |
433 (uint64_t)reg->pci_phys_low;
434 pbar->size = ((uint64_t)reg->pci_size_hi << 32) |
435 (uint64_t)reg->pci_size_low;
436 if (pbar->type == PCI_ADDR_IO) {
437 err = ddi_regs_map_setup(ppt->pptd_dip, rnum,
438 &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle);
439 if (err != 0) {
440 break;
441 }
442 }
443 }
444 kmem_free(regs, rlen);
445
446 if (err != 0) {
447 ppt_bar_wipe(ppt);
448 }
449 return (err);
450 }
451
452 static boolean_t
ppt_bar_verify_mmio(struct pptdev * ppt,uint64_t base,uint64_t size)453 ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size)
454 {
455 const uint64_t map_end = base + size;
456
457 /* Zero-length or overflow mappings are not valid */
458 if (map_end <= base) {
459 return (B_FALSE);
460 }
461 /* MMIO bounds should be page-aligned */
462 if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
463 return (B_FALSE);
464 }
465
466 for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
467 const struct pptbar *bar = &ppt->pptd_bars[i];
468 const uint64_t bar_end = bar->base + bar->size;
469
470 /* Only memory BARs can be mapped */
471 if (bar->type != PCI_ADDR_MEM32 &&
472 bar->type != PCI_ADDR_MEM64) {
473 continue;
474 }
475
476 /* Does the mapping fit within this BAR? */
477 if (base < bar->base || base >= bar_end ||
478 map_end < bar->base || map_end > bar_end) {
479 continue;
480 }
481
482 /* This BAR satisfies the provided map */
483 return (B_TRUE);
484 }
485 return (B_FALSE);
486 }
487
488 static boolean_t
ppt_toggle_bar(struct pptdev * ppt,boolean_t enable)489 ppt_toggle_bar(struct pptdev *ppt, boolean_t enable)
490 {
491 /*
492 * Enable/disable bus mastering and BAR decoding based on the BAR
493 * configuration. Bhyve emulates the COMMAND register so we won't see
494 * the bits changing there.
495 */
496 ddi_acc_handle_t hdl;
497 uint16_t cmd;
498
499 if (pci_config_setup(ppt->pptd_dip, &hdl) != DDI_SUCCESS)
500 return (B_FALSE);
501 cmd = pci_config_get16(hdl, PCI_CONF_COMM);
502
503 if (enable) {
504 cmd |= PCI_COMM_ME;
505
506 for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
507 const struct pptbar *bar = &ppt->pptd_bars[i];
508
509 switch (bar->type) {
510 case PCI_ADDR_MEM32:
511 case PCI_ADDR_MEM64:
512 cmd |= PCI_COMM_MAE;
513 break;
514 case PCI_ADDR_IO:
515 cmd |= PCI_COMM_IO;
516 break;
517 }
518 }
519 } else {
520 cmd &= ~(PCI_COMM_ME | PCI_COMM_MAE | PCI_COMM_IO);
521 }
522
523 pci_config_put16(hdl, PCI_CONF_COMM, cmd);
524 pci_config_teardown(&hdl);
525
526 return (B_TRUE);
527 }
528
529 static int
ppt_ddi_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)530 ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
531 {
532 struct pptdev *ppt = NULL;
533 char name[PPT_MAXNAMELEN];
534 int inst;
535
536 if (cmd != DDI_ATTACH)
537 return (DDI_FAILURE);
538
539 inst = ddi_get_instance(dip);
540
541 if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) {
542 goto fail;
543 }
544 VERIFY(ppt = ddi_get_soft_state(ppt_state, inst));
545 ppt->pptd_dip = dip;
546 ddi_set_driver_private(dip, ppt);
547
548 if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) {
549 goto fail;
550 }
551 if (ppt_bar_crawl(ppt) != 0) {
552 goto fail;
553 }
554 if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst,
555 DDI_PSEUDO, 0) != DDI_SUCCESS) {
556 goto fail;
557 }
558
559 ppt_toggle_bar(ppt, B_FALSE);
560
561 mutex_enter(&pptdev_mtx);
562 list_insert_tail(&pptdev_list, ppt);
563 mutex_exit(&pptdev_mtx);
564
565 return (DDI_SUCCESS);
566
567 fail:
568 if (ppt != NULL) {
569 ddi_remove_minor_node(dip, NULL);
570 if (ppt->pptd_cfg != NULL) {
571 pci_config_teardown(&ppt->pptd_cfg);
572 }
573 ppt_bar_wipe(ppt);
574 ddi_soft_state_free(ppt_state, inst);
575 }
576 return (DDI_FAILURE);
577 }
578
579 static int
ppt_ddi_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)580 ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
581 {
582 struct pptdev *ppt;
583 int inst;
584
585 if (cmd != DDI_DETACH)
586 return (DDI_FAILURE);
587
588 ppt = ddi_get_driver_private(dip);
589 inst = ddi_get_instance(dip);
590
591 ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt);
592
593 mutex_enter(&pptdev_mtx);
594 if (ppt->vm != NULL) {
595 mutex_exit(&pptdev_mtx);
596 return (DDI_FAILURE);
597 }
598 list_remove(&pptdev_list, ppt);
599 mutex_exit(&pptdev_mtx);
600
601 ddi_remove_minor_node(dip, PPT_MINOR_NAME);
602 ppt_bar_wipe(ppt);
603 pci_config_teardown(&ppt->pptd_cfg);
604 ddi_set_driver_private(dip, NULL);
605 ddi_soft_state_free(ppt_state, inst);
606
607 return (DDI_SUCCESS);
608 }
609
610 static int
ppt_ddi_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)611 ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
612 {
613 int error = DDI_FAILURE;
614 int inst = getminor((dev_t)arg);
615
616 switch (cmd) {
617 case DDI_INFO_DEVT2DEVINFO: {
618 struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst);
619
620 if (ppt != NULL) {
621 *result = (void *)ppt->pptd_dip;
622 error = DDI_SUCCESS;
623 }
624 break;
625 }
626 case DDI_INFO_DEVT2INSTANCE: {
627 *result = (void *)(uintptr_t)inst;
628 error = DDI_SUCCESS;
629 break;
630 }
631 default:
632 break;
633 }
634 return (error);
635 }
636
637 static struct cb_ops ppt_cb_ops = {
638 ppt_open,
639 nulldev, /* close */
640 nodev, /* strategy */
641 nodev, /* print */
642 nodev, /* dump */
643 nodev, /* read */
644 nodev, /* write */
645 ppt_ioctl,
646 ppt_devmap, /* devmap */
647 NULL, /* mmap */
648 NULL, /* segmap */
649 nochpoll, /* poll */
650 ddi_prop_op,
651 NULL,
652 D_NEW | D_MP | D_64BIT | D_DEVMAP,
653 CB_REV
654 };
655
656 static struct dev_ops ppt_ops = {
657 DEVO_REV,
658 0,
659 ppt_ddi_info,
660 nulldev, /* identify */
661 nulldev, /* probe */
662 ppt_ddi_attach,
663 ppt_ddi_detach,
664 nodev, /* reset */
665 &ppt_cb_ops,
666 (struct bus_ops *)NULL
667 };
668
669 static struct modldrv modldrv = {
670 &mod_driverops,
671 "bhyve pci pass-thru",
672 &ppt_ops
673 };
674
675 static struct modlinkage modlinkage = {
676 MODREV_1,
677 &modldrv,
678 NULL
679 };
680
681 int
_init(void)682 _init(void)
683 {
684 int error;
685
686 mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL);
687 list_create(&pptdev_list, sizeof (struct pptdev),
688 offsetof(struct pptdev, pptd_node));
689
690 error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0);
691 if (error) {
692 goto fail;
693 }
694
695 error = mod_install(&modlinkage);
696
697 ppt_major = ddi_name_to_major("ppt");
698 fail:
699 if (error) {
700 ddi_soft_state_fini(&ppt_state);
701 }
702 return (error);
703 }
704
705 int
_fini(void)706 _fini(void)
707 {
708 int error;
709
710 error = mod_remove(&modlinkage);
711 if (error)
712 return (error);
713 ddi_soft_state_fini(&ppt_state);
714
715 return (0);
716 }
717
718 int
_info(struct modinfo * modinfop)719 _info(struct modinfo *modinfop)
720 {
721 return (mod_info(&modlinkage, modinfop));
722 }
723
724 static boolean_t
ppt_wait_for_pending_txn(dev_info_t * dip,uint_t max_delay_us)725 ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us)
726 {
727 uint16_t cap_ptr, devsts;
728 ddi_acc_handle_t hdl;
729
730 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
731 return (B_FALSE);
732
733 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) {
734 pci_config_teardown(&hdl);
735 return (B_FALSE);
736 }
737
738 devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
739 while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) {
740 if (max_delay_us == 0) {
741 pci_config_teardown(&hdl);
742 return (B_FALSE);
743 }
744
745 /* Poll once every 100 milliseconds up to the timeout. */
746 if (max_delay_us > 100000) {
747 delay(drv_usectohz(100000));
748 max_delay_us -= 100000;
749 } else {
750 delay(drv_usectohz(max_delay_us));
751 max_delay_us = 0;
752 }
753 devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
754 }
755
756 pci_config_teardown(&hdl);
757 return (B_TRUE);
758 }
759
760 static uint_t
ppt_max_completion_tmo_us(dev_info_t * dip)761 ppt_max_completion_tmo_us(dev_info_t *dip)
762 {
763 uint_t timo = 0;
764 uint16_t cap_ptr;
765 ddi_acc_handle_t hdl;
766 uint_t timo_ranges[] = { /* timeout ranges */
767 50000, /* 50ms */
768 100, /* 100us */
769 10000, /* 10ms */
770 0,
771 0,
772 55000, /* 55ms */
773 210000, /* 210ms */
774 0,
775 0,
776 900000, /* 900ms */
777 3500000, /* 3.5s */
778 0,
779 0,
780 13000000, /* 13s */
781 64000000, /* 64s */
782 0
783 };
784
785 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
786 return (50000); /* default 50ms */
787
788 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
789 goto out;
790
791 if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) &
792 PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0)
793 goto out;
794
795 if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP2) &
796 PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0)
797 goto out;
798
799 timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) &
800 PCIE_DEVCAP2_COM_TO_RANGE_MASK];
801
802 out:
803 if (timo == 0)
804 timo = 50000; /* default 50ms */
805
806 pci_config_teardown(&hdl);
807 return (timo);
808 }
809
810 static boolean_t
ppt_flr(dev_info_t * dip,boolean_t force)811 ppt_flr(dev_info_t *dip, boolean_t force)
812 {
813 uint16_t cap_ptr, ctl, cmd;
814 ddi_acc_handle_t hdl;
815 uint_t compl_delay = 0, max_delay_us;
816
817 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
818 return (B_FALSE);
819
820 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
821 goto fail;
822
823 if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR)
824 == 0)
825 goto fail;
826
827 max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000);
828
829 /*
830 * Disable busmastering to prevent generation of new transactions while
831 * waiting for the device to go idle. If the idle timeout fails, the
832 * command register is restored which will re-enable busmastering.
833 */
834 cmd = pci_config_get16(hdl, PCI_CONF_COMM);
835 pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME);
836 if (!ppt_wait_for_pending_txn(dip, max_delay_us)) {
837 if (!force) {
838 pci_config_put16(hdl, PCI_CONF_COMM, cmd);
839 goto fail;
840 }
841 dev_err(dip, CE_WARN,
842 "?Resetting with transactions pending after %u us\n",
843 max_delay_us);
844
845 /*
846 * Extend the post-FLR delay to cover the maximum Completion
847 * Timeout delay of anything in flight during the FLR delay.
848 * Enforce a minimum delay of at least 10ms.
849 */
850 compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000));
851 }
852
853 /* Initiate the reset. */
854 ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL);
855 (void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL,
856 ctl | PCIE_DEVCTL_INITIATE_FLR);
857
858 /* Wait for at least 100ms */
859 delay(drv_usectohz((100 + compl_delay) * 1000));
860
861 pci_config_teardown(&hdl);
862 return (B_TRUE);
863
864 fail:
865 /*
866 * TODO: If the FLR fails for some reason, we should attempt a reset
867 * using the PCI power management facilities (if possible).
868 */
869 pci_config_teardown(&hdl);
870 return (B_FALSE);
871 }
872
873 static int
ppt_findf(struct vm * vm,int fd,struct pptdev ** pptp)874 ppt_findf(struct vm *vm, int fd, struct pptdev **pptp)
875 {
876 struct pptdev *ppt = NULL;
877 file_t *fp;
878 vattr_t va;
879 int err = 0;
880
881 ASSERT(MUTEX_HELD(&pptdev_mtx));
882
883 if ((fp = getf(fd)) == NULL)
884 return (EBADF);
885
886 va.va_mask = AT_RDEV;
887 if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 ||
888 getmajor(va.va_rdev) != ppt_major) {
889 err = EBADF;
890 goto fail;
891 }
892
893 ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev));
894
895 if (ppt == NULL) {
896 err = EBADF;
897 goto fail;
898 }
899
900 if (ppt->vm != vm) {
901 err = EBUSY;
902 goto fail;
903 }
904
905 *pptp = ppt;
906 return (0);
907
908 fail:
909 releasef(fd);
910 return (err);
911 }
912
913 static void
ppt_unmap_all_mmio(struct vm * vm,struct pptdev * ppt)914 ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
915 {
916 int i;
917 struct pptseg *seg;
918
919 for (i = 0; i < MAX_MMIOSEGS; i++) {
920 seg = &ppt->mmio[i];
921 if (seg->len == 0)
922 continue;
923 (void) vm_unmap_mmio(vm, seg->gpa, seg->len);
924 bzero(seg, sizeof (struct pptseg));
925 }
926 }
927
928 static void
ppt_teardown_msi(struct pptdev * ppt)929 ppt_teardown_msi(struct pptdev *ppt)
930 {
931 int i;
932
933 if (ppt->msi.num_msgs == 0)
934 return;
935
936 for (i = 0; i < ppt->msi.num_msgs; i++) {
937 int intr_cap;
938
939 (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
940 if (intr_cap & DDI_INTR_FLAG_BLOCK)
941 ddi_intr_block_disable(&ppt->msi.inth[i], 1);
942 else
943 ddi_intr_disable(ppt->msi.inth[i]);
944
945 ddi_intr_remove_handler(ppt->msi.inth[i]);
946 ddi_intr_free(ppt->msi.inth[i]);
947
948 ppt->msi.inth[i] = NULL;
949 }
950
951 kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
952 ppt->msi.inth = NULL;
953 ppt->msi.inth_sz = 0;
954 ppt->msi.is_fixed = B_FALSE;
955
956 ppt->msi.num_msgs = 0;
957 }
958
959 static void
ppt_teardown_msix_intr(struct pptdev * ppt,int idx)960 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
961 {
962 if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) {
963 int intr_cap;
964
965 (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
966 if (intr_cap & DDI_INTR_FLAG_BLOCK)
967 ddi_intr_block_disable(&ppt->msix.inth[idx], 1);
968 else
969 ddi_intr_disable(ppt->msix.inth[idx]);
970
971 ddi_intr_remove_handler(ppt->msix.inth[idx]);
972 }
973 }
974
975 static void
ppt_teardown_msix(struct pptdev * ppt)976 ppt_teardown_msix(struct pptdev *ppt)
977 {
978 uint_t i;
979
980 if (ppt->msix.num_msgs == 0)
981 return;
982
983 for (i = 0; i < ppt->msix.num_msgs; i++)
984 ppt_teardown_msix_intr(ppt, i);
985
986 if (ppt->msix.inth) {
987 for (i = 0; i < ppt->msix.num_msgs; i++)
988 ddi_intr_free(ppt->msix.inth[i]);
989 kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
990 ppt->msix.inth = NULL;
991 ppt->msix.inth_sz = 0;
992 kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
993 ppt->msix.arg = NULL;
994 ppt->msix.arg_sz = 0;
995 }
996
997 ppt->msix.num_msgs = 0;
998 }
999
1000 int
ppt_assigned_devices(struct vm * vm)1001 ppt_assigned_devices(struct vm *vm)
1002 {
1003 struct pptdev *ppt;
1004 uint_t num = 0;
1005
1006 mutex_enter(&pptdev_mtx);
1007 for (ppt = list_head(&pptdev_list); ppt != NULL;
1008 ppt = list_next(&pptdev_list, ppt)) {
1009 if (ppt->vm == vm) {
1010 num++;
1011 }
1012 }
1013 mutex_exit(&pptdev_mtx);
1014 return (num);
1015 }
1016
1017 boolean_t
ppt_is_mmio(struct vm * vm,vm_paddr_t gpa)1018 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
1019 {
1020 struct pptdev *ppt = list_head(&pptdev_list);
1021
1022 /* XXX: this should probably be restructured to avoid the lock */
1023 mutex_enter(&pptdev_mtx);
1024 for (ppt = list_head(&pptdev_list); ppt != NULL;
1025 ppt = list_next(&pptdev_list, ppt)) {
1026 if (ppt->vm != vm) {
1027 continue;
1028 }
1029
1030 for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1031 struct pptseg *seg = &ppt->mmio[i];
1032
1033 if (seg->len == 0)
1034 continue;
1035 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) {
1036 mutex_exit(&pptdev_mtx);
1037 return (B_TRUE);
1038 }
1039 }
1040 }
1041
1042 mutex_exit(&pptdev_mtx);
1043 return (B_FALSE);
1044 }
1045
1046 int
ppt_assign_device(struct vm * vm,int pptfd)1047 ppt_assign_device(struct vm *vm, int pptfd)
1048 {
1049 struct pptdev *ppt;
1050 int err = 0;
1051
1052 mutex_enter(&pptdev_mtx);
1053 /* Passing NULL requires the device to be unowned. */
1054 err = ppt_findf(NULL, pptfd, &ppt);
1055 if (err != 0) {
1056 mutex_exit(&pptdev_mtx);
1057 return (err);
1058 }
1059
1060 if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1061 err = EIO;
1062 goto done;
1063 }
1064 ppt_flr(ppt->pptd_dip, B_TRUE);
1065
1066 /*
1067 * Restore the device state after reset and then perform another save
1068 * so the "pristine" state can be restored when the device is removed
1069 * from the guest.
1070 */
1071 if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS ||
1072 pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1073 err = EIO;
1074 goto done;
1075 }
1076
1077 ppt_toggle_bar(ppt, B_TRUE);
1078
1079 ppt->vm = vm;
1080 iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1081 iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1082 pf_set_passthru(ppt->pptd_dip, B_TRUE);
1083
1084 done:
1085 releasef(pptfd);
1086 mutex_exit(&pptdev_mtx);
1087 return (err);
1088 }
1089
1090 static void
ppt_reset_pci_power_state(dev_info_t * dip)1091 ppt_reset_pci_power_state(dev_info_t *dip)
1092 {
1093 ddi_acc_handle_t cfg;
1094 uint16_t cap_ptr;
1095
1096 if (pci_config_setup(dip, &cfg) != DDI_SUCCESS)
1097 return;
1098
1099 if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) {
1100 uint16_t val;
1101
1102 val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR);
1103 if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
1104 val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0;
1105 (void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR,
1106 val);
1107 }
1108 }
1109
1110 pci_config_teardown(&cfg);
1111 }
1112
1113 static void
ppt_do_unassign(struct pptdev * ppt)1114 ppt_do_unassign(struct pptdev *ppt)
1115 {
1116 struct vm *vm = ppt->vm;
1117
1118 ASSERT3P(vm, !=, NULL);
1119 ASSERT(MUTEX_HELD(&pptdev_mtx));
1120
1121 ppt_flr(ppt->pptd_dip, B_TRUE);
1122
1123 /*
1124 * Restore from the state saved during device assignment.
1125 * If the device power state has been altered, that must be remedied
1126 * first, as it will reset register state during the transition.
1127 */
1128 ppt_reset_pci_power_state(ppt->pptd_dip);
1129 (void) pci_restore_config_regs(ppt->pptd_dip);
1130
1131 pf_set_passthru(ppt->pptd_dip, B_FALSE);
1132
1133 ppt_unmap_all_mmio(vm, ppt);
1134 ppt_teardown_msi(ppt);
1135 ppt_teardown_msix(ppt);
1136 iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1137 iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1138 ppt->vm = NULL;
1139 }
1140
1141 int
ppt_unassign_device(struct vm * vm,int pptfd)1142 ppt_unassign_device(struct vm *vm, int pptfd)
1143 {
1144 struct pptdev *ppt;
1145 int err = 0;
1146
1147 mutex_enter(&pptdev_mtx);
1148 err = ppt_findf(vm, pptfd, &ppt);
1149 if (err != 0) {
1150 mutex_exit(&pptdev_mtx);
1151 return (err);
1152 }
1153
1154 ppt_do_unassign(ppt);
1155
1156 releasef(pptfd);
1157 mutex_exit(&pptdev_mtx);
1158 return (err);
1159 }
1160
1161 void
ppt_unassign_all(struct vm * vm)1162 ppt_unassign_all(struct vm *vm)
1163 {
1164 struct pptdev *ppt;
1165
1166 mutex_enter(&pptdev_mtx);
1167 for (ppt = list_head(&pptdev_list); ppt != NULL;
1168 ppt = list_next(&pptdev_list, ppt)) {
1169 if (ppt->vm == vm) {
1170 ppt_do_unassign(ppt);
1171 }
1172 }
1173 mutex_exit(&pptdev_mtx);
1174 }
1175
1176 int
ppt_map_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len,vm_paddr_t hpa)1177 ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
1178 vm_paddr_t hpa)
1179 {
1180 struct pptdev *ppt;
1181 int err = 0;
1182
1183 if ((len & PAGEOFFSET) != 0 || len == 0 || (gpa & PAGEOFFSET) != 0 ||
1184 (hpa & PAGEOFFSET) != 0 || gpa + len < gpa || hpa + len < hpa) {
1185 return (EINVAL);
1186 }
1187
1188 mutex_enter(&pptdev_mtx);
1189 err = ppt_findf(vm, pptfd, &ppt);
1190 if (err != 0) {
1191 mutex_exit(&pptdev_mtx);
1192 return (err);
1193 }
1194
1195 /*
1196 * Ensure that the host-physical range of the requested mapping fits
1197 * within one of the MMIO BARs of the device.
1198 */
1199 if (!ppt_bar_verify_mmio(ppt, hpa, len)) {
1200 err = EINVAL;
1201 goto done;
1202 }
1203
1204 for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1205 struct pptseg *seg = &ppt->mmio[i];
1206
1207 if (seg->len == 0) {
1208 err = vm_map_mmio(vm, gpa, len, hpa);
1209 if (err == 0) {
1210 seg->gpa = gpa;
1211 seg->len = len;
1212 }
1213 goto done;
1214 }
1215 }
1216 err = ENOSPC;
1217
1218 done:
1219 releasef(pptfd);
1220 mutex_exit(&pptdev_mtx);
1221 return (err);
1222 }
1223
1224 int
ppt_unmap_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len)1225 ppt_unmap_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len)
1226 {
1227 struct pptdev *ppt;
1228 int err = 0;
1229 uint_t i;
1230
1231 mutex_enter(&pptdev_mtx);
1232 err = ppt_findf(vm, pptfd, &ppt);
1233 if (err != 0) {
1234 mutex_exit(&pptdev_mtx);
1235 return (err);
1236 }
1237
1238 for (i = 0; i < MAX_MMIOSEGS; i++) {
1239 struct pptseg *seg = &ppt->mmio[i];
1240
1241 if (seg->gpa == gpa && seg->len == len) {
1242 err = vm_unmap_mmio(vm, seg->gpa, seg->len);
1243 if (err == 0) {
1244 seg->gpa = 0;
1245 seg->len = 0;
1246 }
1247 goto out;
1248 }
1249 }
1250 err = ENOENT;
1251 out:
1252 releasef(pptfd);
1253 mutex_exit(&pptdev_mtx);
1254 return (err);
1255 }
1256
1257 static uint_t
pptintr(caddr_t arg,caddr_t unused)1258 pptintr(caddr_t arg, caddr_t unused)
1259 {
1260 struct pptintr_arg *pptarg = (struct pptintr_arg *)arg;
1261 struct pptdev *ppt = pptarg->pptdev;
1262
1263 if (ppt->vm != NULL) {
1264 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
1265 } else {
1266 /*
1267 * XXX
1268 * This is not expected to happen - panic?
1269 */
1270 }
1271
1272 /*
1273 * For legacy interrupts give other filters a chance in case
1274 * the interrupt was not generated by the passthrough device.
1275 */
1276 return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
1277 }
1278
1279 int
ppt_setup_msi(struct vm * vm,int vcpu,int pptfd,uint64_t addr,uint64_t msg,int numvec)1280 ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg,
1281 int numvec)
1282 {
1283 int i, msi_count, intr_type;
1284 struct pptdev *ppt;
1285 int err = 0;
1286
1287 if (numvec < 0 || numvec > MAX_MSIMSGS)
1288 return (EINVAL);
1289
1290 mutex_enter(&pptdev_mtx);
1291 err = ppt_findf(vm, pptfd, &ppt);
1292 if (err != 0) {
1293 mutex_exit(&pptdev_mtx);
1294 return (err);
1295 }
1296
1297 /* Reject attempts to enable MSI while MSI-X is active. */
1298 if (ppt->msix.num_msgs != 0 && numvec != 0) {
1299 err = EBUSY;
1300 goto done;
1301 }
1302
1303 /* Free any allocated resources */
1304 ppt_teardown_msi(ppt);
1305
1306 if (numvec == 0) {
1307 /* nothing more to do */
1308 goto done;
1309 }
1310
1311 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1312 &msi_count) != DDI_SUCCESS) {
1313 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED,
1314 &msi_count) != DDI_SUCCESS) {
1315 err = EINVAL;
1316 goto done;
1317 }
1318
1319 intr_type = DDI_INTR_TYPE_FIXED;
1320 ppt->msi.is_fixed = B_TRUE;
1321 } else {
1322 intr_type = DDI_INTR_TYPE_MSI;
1323 }
1324
1325 /*
1326 * The device must be capable of supporting the number of vectors
1327 * the guest wants to allocate.
1328 */
1329 if (numvec > msi_count) {
1330 err = EINVAL;
1331 goto done;
1332 }
1333
1334 ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1335 ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP);
1336 if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0,
1337 numvec, &msi_count, 0) != DDI_SUCCESS) {
1338 kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
1339 err = EINVAL;
1340 goto done;
1341 }
1342
1343 /* Verify that we got as many vectors as the guest requested */
1344 if (numvec != msi_count) {
1345 ppt_teardown_msi(ppt);
1346 err = EINVAL;
1347 goto done;
1348 }
1349
1350 /* Set up & enable interrupt handler for each vector. */
1351 for (i = 0; i < numvec; i++) {
1352 int res, intr_cap = 0;
1353
1354 ppt->msi.num_msgs = i + 1;
1355 ppt->msi.arg[i].pptdev = ppt;
1356 ppt->msi.arg[i].addr = addr;
1357 ppt->msi.arg[i].msg_data = msg + i;
1358
1359 if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr,
1360 &ppt->msi.arg[i], NULL) != DDI_SUCCESS)
1361 break;
1362
1363 (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
1364 if (intr_cap & DDI_INTR_FLAG_BLOCK)
1365 res = ddi_intr_block_enable(&ppt->msi.inth[i], 1);
1366 else
1367 res = ddi_intr_enable(ppt->msi.inth[i]);
1368
1369 if (res != DDI_SUCCESS)
1370 break;
1371 }
1372 if (i < numvec) {
1373 ppt_teardown_msi(ppt);
1374 err = ENXIO;
1375 }
1376
1377 done:
1378 releasef(pptfd);
1379 mutex_exit(&pptdev_mtx);
1380 return (err);
1381 }
1382
1383 int
ppt_setup_msix(struct vm * vm,int vcpu,int pptfd,int idx,uint64_t addr,uint64_t msg,uint32_t vector_control)1384 ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
1385 uint64_t msg, uint32_t vector_control)
1386 {
1387 struct pptdev *ppt;
1388 int numvec, alloced;
1389 int err = 0;
1390
1391 mutex_enter(&pptdev_mtx);
1392 err = ppt_findf(vm, pptfd, &ppt);
1393 if (err != 0) {
1394 mutex_exit(&pptdev_mtx);
1395 return (err);
1396 }
1397
1398 /* Reject attempts to enable MSI-X while MSI is active. */
1399 if (ppt->msi.num_msgs != 0) {
1400 err = EBUSY;
1401 goto done;
1402 }
1403
1404 /*
1405 * First-time configuration:
1406 * Allocate the MSI-X table
1407 * Allocate the IRQ resources
1408 * Set up some variables in ppt->msix
1409 */
1410 if (ppt->msix.num_msgs == 0) {
1411 dev_info_t *dip = ppt->pptd_dip;
1412
1413 if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX,
1414 &numvec) != DDI_SUCCESS) {
1415 err = EINVAL;
1416 goto done;
1417 }
1418
1419 ppt->msix.num_msgs = numvec;
1420
1421 ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]);
1422 ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP);
1423 ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1424 ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP);
1425
1426 if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0,
1427 numvec, &alloced, 0) != DDI_SUCCESS) {
1428 kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
1429 kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
1430 ppt->msix.arg = NULL;
1431 ppt->msix.inth = NULL;
1432 ppt->msix.arg_sz = ppt->msix.inth_sz = 0;
1433 err = EINVAL;
1434 goto done;
1435 }
1436
1437 if (numvec != alloced) {
1438 ppt_teardown_msix(ppt);
1439 err = EINVAL;
1440 goto done;
1441 }
1442 }
1443
1444 if (idx >= ppt->msix.num_msgs) {
1445 err = EINVAL;
1446 goto done;
1447 }
1448
1449 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1450 int intr_cap, res;
1451
1452 /* Tear down the IRQ if it's already set up */
1453 ppt_teardown_msix_intr(ppt, idx);
1454
1455 ppt->msix.arg[idx].pptdev = ppt;
1456 ppt->msix.arg[idx].addr = addr;
1457 ppt->msix.arg[idx].msg_data = msg;
1458
1459 /* Setup the MSI-X interrupt */
1460 if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr,
1461 &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) {
1462 err = ENXIO;
1463 goto done;
1464 }
1465
1466 (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
1467 if (intr_cap & DDI_INTR_FLAG_BLOCK)
1468 res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1);
1469 else
1470 res = ddi_intr_enable(ppt->msix.inth[idx]);
1471
1472 if (res != DDI_SUCCESS) {
1473 ddi_intr_remove_handler(ppt->msix.inth[idx]);
1474 err = ENXIO;
1475 goto done;
1476 }
1477 } else {
1478 /* Masked, tear it down if it's already been set up */
1479 ppt_teardown_msix_intr(ppt, idx);
1480 }
1481
1482 done:
1483 releasef(pptfd);
1484 mutex_exit(&pptdev_mtx);
1485 return (err);
1486 }
1487
1488 int
ppt_get_limits(struct vm * vm,int pptfd,int * msilimit,int * msixlimit)1489 ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit)
1490 {
1491 struct pptdev *ppt;
1492 int err = 0;
1493
1494 mutex_enter(&pptdev_mtx);
1495 err = ppt_findf(vm, pptfd, &ppt);
1496 if (err != 0) {
1497 mutex_exit(&pptdev_mtx);
1498 return (err);
1499 }
1500
1501 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1502 msilimit) != DDI_SUCCESS) {
1503 *msilimit = -1;
1504 }
1505 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX,
1506 msixlimit) != DDI_SUCCESS) {
1507 *msixlimit = -1;
1508 }
1509
1510 releasef(pptfd);
1511 mutex_exit(&pptdev_mtx);
1512 return (err);
1513 }
1514
1515 int
ppt_disable_msix(struct vm * vm,int pptfd)1516 ppt_disable_msix(struct vm *vm, int pptfd)
1517 {
1518 struct pptdev *ppt;
1519 int err = 0;
1520
1521 mutex_enter(&pptdev_mtx);
1522 err = ppt_findf(vm, pptfd, &ppt);
1523 if (err != 0) {
1524 mutex_exit(&pptdev_mtx);
1525 return (err);
1526 }
1527
1528 ppt_teardown_msix(ppt);
1529
1530 releasef(pptfd);
1531 mutex_exit(&pptdev_mtx);
1532 return (err);
1533 }
1534