xref: /freebsd/usr.sbin/bhyve/pci_passthru.c (revision 4d1e669cadff30b4cfda6eaf356b325dceac6357)
1366f6083SPeter Grehan /*-
2366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
3366f6083SPeter Grehan  * All rights reserved.
4366f6083SPeter Grehan  *
5366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
6366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
7366f6083SPeter Grehan  * are met:
8366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
9366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
10366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
11366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
12366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
13366f6083SPeter Grehan  *
14366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24366f6083SPeter Grehan  * SUCH DAMAGE.
25366f6083SPeter Grehan  *
26366f6083SPeter Grehan  * $FreeBSD$
27366f6083SPeter Grehan  */
28366f6083SPeter Grehan 
29366f6083SPeter Grehan #include <sys/cdefs.h>
30366f6083SPeter Grehan __FBSDID("$FreeBSD$");
31366f6083SPeter Grehan 
32366f6083SPeter Grehan #include <sys/param.h>
33366f6083SPeter Grehan #include <sys/types.h>
34366f6083SPeter Grehan #include <sys/pciio.h>
35366f6083SPeter Grehan #include <sys/ioctl.h>
36366f6083SPeter Grehan 
37366f6083SPeter Grehan #include <dev/io/iodev.h>
38366f6083SPeter Grehan #include <machine/iodev.h>
39366f6083SPeter Grehan 
40366f6083SPeter Grehan #include <stdio.h>
41366f6083SPeter Grehan #include <stdlib.h>
42366f6083SPeter Grehan #include <string.h>
43366f6083SPeter Grehan #include <errno.h>
44366f6083SPeter Grehan #include <fcntl.h>
45366f6083SPeter Grehan #include <unistd.h>
46366f6083SPeter Grehan 
47366f6083SPeter Grehan #include <machine/vmm.h>
48366f6083SPeter Grehan #include <vmmapi.h>
49366f6083SPeter Grehan #include "pci_emul.h"
50*4d1e669cSPeter Grehan #include "mem.h"
51cd942e0fSPeter Grehan #include "instruction_emul.h"
52366f6083SPeter Grehan 
53366f6083SPeter Grehan #ifndef _PATH_DEVPCI
54366f6083SPeter Grehan #define	_PATH_DEVPCI	"/dev/pci"
55366f6083SPeter Grehan #endif
56366f6083SPeter Grehan 
57366f6083SPeter Grehan #ifndef	_PATH_DEVIO
58366f6083SPeter Grehan #define	_PATH_DEVIO	"/dev/io"
59366f6083SPeter Grehan #endif
60366f6083SPeter Grehan 
61366f6083SPeter Grehan #define	LEGACY_SUPPORT	1
62366f6083SPeter Grehan 
63cd942e0fSPeter Grehan #define MSIX_TABLE_BIR_MASK 7
64cd942e0fSPeter Grehan #define MSIX_TABLE_OFFSET_MASK (~MSIX_TABLE_BIR_MASK);
65cd942e0fSPeter Grehan #define MSIX_TABLE_COUNT(x) (((x) & 0x7FF) + 1)
66cd942e0fSPeter Grehan #define MSIX_CAPLEN 12
67cd942e0fSPeter Grehan 
68366f6083SPeter Grehan static int pcifd = -1;
69366f6083SPeter Grehan static int iofd = -1;
70366f6083SPeter Grehan 
71366f6083SPeter Grehan struct passthru_softc {
72366f6083SPeter Grehan 	struct pci_devinst *psc_pi;
73366f6083SPeter Grehan 	struct pcibar psc_bar[PCI_BARMAX + 1];
74366f6083SPeter Grehan 	struct {
75366f6083SPeter Grehan 		int		capoff;
76366f6083SPeter Grehan 		int		msgctrl;
77366f6083SPeter Grehan 		int		emulated;
78366f6083SPeter Grehan 	} psc_msi;
79cd942e0fSPeter Grehan 	struct {
80cd942e0fSPeter Grehan 		int		capoff;
81cd942e0fSPeter Grehan 	} psc_msix;
82366f6083SPeter Grehan 	struct pcisel psc_sel;
83366f6083SPeter Grehan };
84366f6083SPeter Grehan 
85366f6083SPeter Grehan static int
86366f6083SPeter Grehan msi_caplen(int msgctrl)
87366f6083SPeter Grehan {
88366f6083SPeter Grehan 	int len;
89366f6083SPeter Grehan 
90366f6083SPeter Grehan 	len = 10;		/* minimum length of msi capability */
91366f6083SPeter Grehan 
92366f6083SPeter Grehan 	if (msgctrl & PCIM_MSICTRL_64BIT)
93366f6083SPeter Grehan 		len += 4;
94366f6083SPeter Grehan 
95366f6083SPeter Grehan #if 0
96366f6083SPeter Grehan 	/*
97366f6083SPeter Grehan 	 * Ignore the 'mask' and 'pending' bits in the MSI capability.
98366f6083SPeter Grehan 	 * We'll let the guest manipulate them directly.
99366f6083SPeter Grehan 	 */
100366f6083SPeter Grehan 	if (msgctrl & PCIM_MSICTRL_VECTOR)
101366f6083SPeter Grehan 		len += 10;
102366f6083SPeter Grehan #endif
103366f6083SPeter Grehan 
104366f6083SPeter Grehan 	return (len);
105366f6083SPeter Grehan }
106366f6083SPeter Grehan 
107366f6083SPeter Grehan static uint32_t
108366f6083SPeter Grehan read_config(const struct pcisel *sel, long reg, int width)
109366f6083SPeter Grehan {
110366f6083SPeter Grehan 	struct pci_io pi;
111366f6083SPeter Grehan 
112366f6083SPeter Grehan 	bzero(&pi, sizeof(pi));
113366f6083SPeter Grehan 	pi.pi_sel = *sel;
114366f6083SPeter Grehan 	pi.pi_reg = reg;
115366f6083SPeter Grehan 	pi.pi_width = width;
116366f6083SPeter Grehan 
117366f6083SPeter Grehan 	if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
118366f6083SPeter Grehan 		return (0);				/* XXX */
119366f6083SPeter Grehan 	else
120366f6083SPeter Grehan 		return (pi.pi_data);
121366f6083SPeter Grehan }
122366f6083SPeter Grehan 
123366f6083SPeter Grehan static void
124366f6083SPeter Grehan write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
125366f6083SPeter Grehan {
126366f6083SPeter Grehan 	struct pci_io pi;
127366f6083SPeter Grehan 
128366f6083SPeter Grehan 	bzero(&pi, sizeof(pi));
129366f6083SPeter Grehan 	pi.pi_sel = *sel;
130366f6083SPeter Grehan 	pi.pi_reg = reg;
131366f6083SPeter Grehan 	pi.pi_width = width;
132366f6083SPeter Grehan 	pi.pi_data = data;
133366f6083SPeter Grehan 
134366f6083SPeter Grehan 	(void)ioctl(pcifd, PCIOCWRITE, &pi);		/* XXX */
135366f6083SPeter Grehan }
136366f6083SPeter Grehan 
137366f6083SPeter Grehan #ifdef LEGACY_SUPPORT
138366f6083SPeter Grehan static int
139366f6083SPeter Grehan passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
140366f6083SPeter Grehan {
141366f6083SPeter Grehan 	int capoff, i;
142366f6083SPeter Grehan 	struct msicap msicap;
143366f6083SPeter Grehan 	u_char *capdata;
144366f6083SPeter Grehan 
145366f6083SPeter Grehan 	pci_populate_msicap(&msicap, msgnum, nextptr);
146366f6083SPeter Grehan 
147366f6083SPeter Grehan 	/*
148366f6083SPeter Grehan 	 * XXX
149366f6083SPeter Grehan 	 * Copy the msi capability structure in the last 16 bytes of the
150366f6083SPeter Grehan 	 * config space. This is wrong because it could shadow something
151366f6083SPeter Grehan 	 * useful to the device.
152366f6083SPeter Grehan 	 */
153366f6083SPeter Grehan 	capoff = 256 - roundup(sizeof(msicap), 4);
154366f6083SPeter Grehan 	capdata = (u_char *)&msicap;
155366f6083SPeter Grehan 	for (i = 0; i < sizeof(msicap); i++)
156366f6083SPeter Grehan 		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
157366f6083SPeter Grehan 
158366f6083SPeter Grehan 	return (capoff);
159366f6083SPeter Grehan }
160366f6083SPeter Grehan #endif	/* LEGACY_SUPPORT */
161366f6083SPeter Grehan 
162366f6083SPeter Grehan static int
163366f6083SPeter Grehan cfginitmsi(struct passthru_softc *sc)
164366f6083SPeter Grehan {
165cd942e0fSPeter Grehan 	int ptr, capptr, cap, sts, caplen;
166366f6083SPeter Grehan 	uint32_t u32;
167366f6083SPeter Grehan 	struct pcisel sel;
168366f6083SPeter Grehan 	struct pci_devinst *pi;
169cd942e0fSPeter Grehan 	struct msixcap msixcap;
170cd942e0fSPeter Grehan 	uint32_t *msixcap_ptr;
171366f6083SPeter Grehan 
172366f6083SPeter Grehan 	pi = sc->psc_pi;
173366f6083SPeter Grehan 	sel = sc->psc_sel;
174366f6083SPeter Grehan 
175366f6083SPeter Grehan 	/*
176366f6083SPeter Grehan 	 * Parse the capabilities and cache the location of the MSI
177cd942e0fSPeter Grehan 	 * and MSI-X capabilities.
178366f6083SPeter Grehan 	 */
179366f6083SPeter Grehan 	sts = read_config(&sel, PCIR_STATUS, 2);
180366f6083SPeter Grehan 	if (sts & PCIM_STATUS_CAPPRESENT) {
181366f6083SPeter Grehan 		ptr = read_config(&sel, PCIR_CAP_PTR, 1);
182366f6083SPeter Grehan 		while (ptr != 0 && ptr != 0xff) {
183366f6083SPeter Grehan 			cap = read_config(&sel, ptr + PCICAP_ID, 1);
184366f6083SPeter Grehan 			if (cap == PCIY_MSI) {
185366f6083SPeter Grehan 				/*
186366f6083SPeter Grehan 				 * Copy the MSI capability into the config
187366f6083SPeter Grehan 				 * space of the emulated pci device
188366f6083SPeter Grehan 				 */
189366f6083SPeter Grehan 				sc->psc_msi.capoff = ptr;
190366f6083SPeter Grehan 				sc->psc_msi.msgctrl = read_config(&sel,
191366f6083SPeter Grehan 								  ptr + 2, 2);
192366f6083SPeter Grehan 				sc->psc_msi.emulated = 0;
193366f6083SPeter Grehan 				caplen = msi_caplen(sc->psc_msi.msgctrl);
194cd942e0fSPeter Grehan 				capptr = ptr;
195366f6083SPeter Grehan 				while (caplen > 0) {
196cd942e0fSPeter Grehan 					u32 = read_config(&sel, capptr, 4);
197cd942e0fSPeter Grehan 					pci_set_cfgdata32(pi, capptr, u32);
198366f6083SPeter Grehan 					caplen -= 4;
199cd942e0fSPeter Grehan 					capptr += 4;
200366f6083SPeter Grehan 				}
201cd942e0fSPeter Grehan 			} else if (cap == PCIY_MSIX) {
202cd942e0fSPeter Grehan 				/*
203cd942e0fSPeter Grehan 				 * Copy the MSI-X capability
204cd942e0fSPeter Grehan 				 */
205cd942e0fSPeter Grehan 				sc->psc_msix.capoff = ptr;
206cd942e0fSPeter Grehan 				caplen = 12;
207cd942e0fSPeter Grehan 				msixcap_ptr = (uint32_t*) &msixcap;
208cd942e0fSPeter Grehan 				capptr = ptr;
209cd942e0fSPeter Grehan 				while (caplen > 0) {
210cd942e0fSPeter Grehan 					u32 = read_config(&sel, capptr, 4);
211cd942e0fSPeter Grehan 					*msixcap_ptr = u32;
212cd942e0fSPeter Grehan 					pci_set_cfgdata32(pi, capptr, u32);
213cd942e0fSPeter Grehan 					caplen -= 4;
214cd942e0fSPeter Grehan 					capptr += 4;
215cd942e0fSPeter Grehan 					msixcap_ptr++;
216cd942e0fSPeter Grehan 				}
217366f6083SPeter Grehan 			}
218366f6083SPeter Grehan 			ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
219366f6083SPeter Grehan 		}
220366f6083SPeter Grehan 	}
221366f6083SPeter Grehan 
222*4d1e669cSPeter Grehan 	if (sc->psc_msix.capoff != 0) {
223*4d1e669cSPeter Grehan 		pi->pi_msix.pba_bar =
224*4d1e669cSPeter Grehan 		    msixcap.pba_offset & MSIX_TABLE_BIR_MASK;
225*4d1e669cSPeter Grehan 		pi->pi_msix.pba_offset =
226*4d1e669cSPeter Grehan 		    msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK;
227*4d1e669cSPeter Grehan 		pi->pi_msix.table_bar =
228*4d1e669cSPeter Grehan 		    msixcap.table_offset & MSIX_TABLE_BIR_MASK;
229*4d1e669cSPeter Grehan 		pi->pi_msix.table_offset =
230*4d1e669cSPeter Grehan 		    msixcap.table_offset & MSIX_TABLE_OFFSET_MASK;
231cd942e0fSPeter Grehan 		pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
232*4d1e669cSPeter Grehan 	}
233cd942e0fSPeter Grehan 
234366f6083SPeter Grehan #ifdef LEGACY_SUPPORT
235366f6083SPeter Grehan 	/*
236366f6083SPeter Grehan 	 * If the passthrough device does not support MSI then craft a
237366f6083SPeter Grehan 	 * MSI capability for it. We link the new MSI capability at the
238366f6083SPeter Grehan 	 * head of the list of capabilities.
239366f6083SPeter Grehan 	 */
240366f6083SPeter Grehan 	if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
241366f6083SPeter Grehan 		int origptr, msiptr;
242366f6083SPeter Grehan 		origptr = read_config(&sel, PCIR_CAP_PTR, 1);
243366f6083SPeter Grehan 		msiptr = passthru_add_msicap(pi, 1, origptr);
244366f6083SPeter Grehan 		sc->psc_msi.capoff = msiptr;
245366f6083SPeter Grehan 		sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
246366f6083SPeter Grehan 		sc->psc_msi.emulated = 1;
247366f6083SPeter Grehan 		pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
248366f6083SPeter Grehan 	}
249366f6083SPeter Grehan #endif
250366f6083SPeter Grehan 
251cd942e0fSPeter Grehan 	/* Make sure one of the capabilities is present */
252cd942e0fSPeter Grehan 	if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
253366f6083SPeter Grehan 		return (-1);
254366f6083SPeter Grehan 	else
255366f6083SPeter Grehan 		return (0);
256366f6083SPeter Grehan }
257366f6083SPeter Grehan 
258*4d1e669cSPeter Grehan static uint64_t
259*4d1e669cSPeter Grehan msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
260cd942e0fSPeter Grehan {
261cd942e0fSPeter Grehan 	struct pci_devinst *pi;
262*4d1e669cSPeter Grehan 	struct msix_table_entry *entry;
263cd942e0fSPeter Grehan 	uint8_t *src8;
264cd942e0fSPeter Grehan 	uint16_t *src16;
265cd942e0fSPeter Grehan 	uint32_t *src32;
266cd942e0fSPeter Grehan 	uint64_t *src64;
267*4d1e669cSPeter Grehan 	uint64_t data;
268*4d1e669cSPeter Grehan 	size_t entry_offset;
269*4d1e669cSPeter Grehan 	int index;
270cd942e0fSPeter Grehan 
271cd942e0fSPeter Grehan 	pi = sc->psc_pi;
272*4d1e669cSPeter Grehan 	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
273cd942e0fSPeter Grehan 	index = offset / MSIX_TABLE_ENTRY_SIZE;
274cd942e0fSPeter Grehan 	entry = &pi->pi_msix.table[index];
275cd942e0fSPeter Grehan 
276cd942e0fSPeter Grehan 	switch(size) {
277cd942e0fSPeter Grehan 	case 1:
278cd942e0fSPeter Grehan 		src8 = (uint8_t *)((void *)entry + entry_offset);
279*4d1e669cSPeter Grehan 		data = *src8;
280cd942e0fSPeter Grehan 		break;
281cd942e0fSPeter Grehan 	case 2:
282cd942e0fSPeter Grehan 		src16 = (uint16_t *)((void *)entry + entry_offset);
283*4d1e669cSPeter Grehan 		data = *src16;
284cd942e0fSPeter Grehan 		break;
285cd942e0fSPeter Grehan 	case 4:
286cd942e0fSPeter Grehan 		src32 = (uint32_t *)((void *)entry + entry_offset);
287*4d1e669cSPeter Grehan 		data = *src32;
288cd942e0fSPeter Grehan 		break;
289cd942e0fSPeter Grehan 	case 8:
290cd942e0fSPeter Grehan 		src64 = (uint64_t *)((void *)entry + entry_offset);
291*4d1e669cSPeter Grehan 		data = *src64;
292cd942e0fSPeter Grehan 		break;
293cd942e0fSPeter Grehan 	default:
294cd942e0fSPeter Grehan 		return (-1);
295cd942e0fSPeter Grehan 	}
296cd942e0fSPeter Grehan 
297*4d1e669cSPeter Grehan 	return (data);
298cd942e0fSPeter Grehan }
299cd942e0fSPeter Grehan 
300*4d1e669cSPeter Grehan static void
301*4d1e669cSPeter Grehan msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
302*4d1e669cSPeter Grehan 		 uint64_t offset, int size, uint64_t data)
303cd942e0fSPeter Grehan {
304cd942e0fSPeter Grehan 	struct pci_devinst *pi;
305cd942e0fSPeter Grehan 	struct msix_table_entry *entry;
306*4d1e669cSPeter Grehan 	uint32_t *dest;
307*4d1e669cSPeter Grehan 	size_t entry_offset;
308cd942e0fSPeter Grehan 	uint32_t vector_control;
309*4d1e669cSPeter Grehan 	int error, index;
310cd942e0fSPeter Grehan 
311cd942e0fSPeter Grehan 	pi = sc->psc_pi;
312*4d1e669cSPeter Grehan 	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
313cd942e0fSPeter Grehan 	index = offset / MSIX_TABLE_ENTRY_SIZE;
314cd942e0fSPeter Grehan 	entry = &pi->pi_msix.table[index];
315cd942e0fSPeter Grehan 
316cd942e0fSPeter Grehan 	/* Only 4 byte naturally-aligned writes are supported */
317*4d1e669cSPeter Grehan 	assert(size == 4);
318*4d1e669cSPeter Grehan 	assert(entry_offset % 4 == 0);
319*4d1e669cSPeter Grehan 
320cd942e0fSPeter Grehan 	vector_control = entry->vector_control;
321cd942e0fSPeter Grehan 	dest = (uint32_t *)((void *)entry + entry_offset);
322cd942e0fSPeter Grehan 	*dest = data;
323cd942e0fSPeter Grehan 	/* If MSI-X hasn't been enabled, do nothing */
324cd942e0fSPeter Grehan 	if (pi->pi_msix.enabled) {
325cd942e0fSPeter Grehan 		/* If the entry is masked, don't set it up */
326cd942e0fSPeter Grehan 		if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
327cd942e0fSPeter Grehan 		    (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
328*4d1e669cSPeter Grehan 			error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
329cd942e0fSPeter Grehan 					      sc->psc_sel.pc_dev,
330cd942e0fSPeter Grehan 					      sc->psc_sel.pc_func,
331cd942e0fSPeter Grehan 					      index, entry->msg_data,
332cd942e0fSPeter Grehan 					      entry->vector_control,
333cd942e0fSPeter Grehan 					      entry->addr);
334cd942e0fSPeter Grehan 		}
335cd942e0fSPeter Grehan 	}
336cd942e0fSPeter Grehan }
337cd942e0fSPeter Grehan 
338cd942e0fSPeter Grehan static int
339cd942e0fSPeter Grehan init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
340cd942e0fSPeter Grehan {
341cd942e0fSPeter Grehan 	int idx;
342cd942e0fSPeter Grehan 	size_t table_size;
343cd942e0fSPeter Grehan 	vm_paddr_t start;
344cd942e0fSPeter Grehan 	size_t len;
345cd942e0fSPeter Grehan 	struct pci_devinst *pi = sc->psc_pi;
346cd942e0fSPeter Grehan 
347cd942e0fSPeter Grehan 	/*
348cd942e0fSPeter Grehan 	 * If the MSI-X table BAR maps memory intended for
349cd942e0fSPeter Grehan 	 * other uses, it is at least assured that the table
350cd942e0fSPeter Grehan 	 * either resides in its own page within the region,
351cd942e0fSPeter Grehan 	 * or it resides in a page shared with only the PBA.
352cd942e0fSPeter Grehan 	 */
353cd942e0fSPeter Grehan 	if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar &&
354cd942e0fSPeter Grehan 	    ((pi->pi_msix.pba_offset - pi->pi_msix.table_offset) < 4096)) {
355cd942e0fSPeter Grehan 		/* Need to also emulate the PBA, not supported yet */
356cd942e0fSPeter Grehan 		printf("Unsupported MSI-X table and PBA in same page\n");
357cd942e0fSPeter Grehan 		return (-1);
358cd942e0fSPeter Grehan 	}
359*4d1e669cSPeter Grehan 
360cd942e0fSPeter Grehan 	/*
361cd942e0fSPeter Grehan 	 * May need to split the BAR into 3 regions:
362cd942e0fSPeter Grehan 	 * Before the MSI-X table, the MSI-X table, and after it
363cd942e0fSPeter Grehan 	 * XXX for now, assume that the table is not in the middle
364cd942e0fSPeter Grehan 	 */
365cd942e0fSPeter Grehan 	table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
366cd942e0fSPeter Grehan 	pi->pi_msix.table_size = table_size;
367cd942e0fSPeter Grehan 	idx = pi->pi_msix.table_bar;
368cd942e0fSPeter Grehan 
369cd942e0fSPeter Grehan 	/* Round up to page size */
370cd942e0fSPeter Grehan 	table_size = (table_size + 0x1000) & ~0xFFF;
371cd942e0fSPeter Grehan 	if (pi->pi_msix.table_offset == 0) {
372cd942e0fSPeter Grehan 		/* Map everything after the MSI-X table */
373cd942e0fSPeter Grehan 		start = pi->pi_bar[idx].addr + table_size;
374cd942e0fSPeter Grehan 		len = pi->pi_bar[idx].size - table_size;
375cd942e0fSPeter Grehan 	} else {
376cd942e0fSPeter Grehan                 /* Map everything before the MSI-X table */
377cd942e0fSPeter Grehan 		start = pi->pi_bar[idx].addr;
378cd942e0fSPeter Grehan 		len = pi->pi_msix.table_offset;
379cd942e0fSPeter Grehan 	}
380*4d1e669cSPeter Grehan 	return (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
381cd942e0fSPeter Grehan 				   sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
382*4d1e669cSPeter Grehan 				   start, len, base + table_size));
383cd942e0fSPeter Grehan }
384cd942e0fSPeter Grehan 
385cd942e0fSPeter Grehan static int
386366f6083SPeter Grehan cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
387366f6083SPeter Grehan {
388366f6083SPeter Grehan 	int i, error;
389366f6083SPeter Grehan 	struct pci_devinst *pi;
390366f6083SPeter Grehan 	struct pci_bar_io bar;
391366f6083SPeter Grehan 	enum pcibar_type bartype;
392366f6083SPeter Grehan 	uint64_t base;
393366f6083SPeter Grehan 
394366f6083SPeter Grehan 	pi = sc->psc_pi;
395366f6083SPeter Grehan 
396366f6083SPeter Grehan 	/*
397366f6083SPeter Grehan 	 * Initialize BAR registers
398366f6083SPeter Grehan 	 */
399366f6083SPeter Grehan 	for (i = 0; i <= PCI_BARMAX; i++) {
400366f6083SPeter Grehan 		bzero(&bar, sizeof(bar));
401366f6083SPeter Grehan 		bar.pbi_sel = sc->psc_sel;
402366f6083SPeter Grehan 		bar.pbi_reg = PCIR_BAR(i);
403366f6083SPeter Grehan 
404366f6083SPeter Grehan 		if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
405366f6083SPeter Grehan 			continue;
406366f6083SPeter Grehan 
407366f6083SPeter Grehan 		if (PCI_BAR_IO(bar.pbi_base)) {
408366f6083SPeter Grehan 			bartype = PCIBAR_IO;
409366f6083SPeter Grehan 			base = bar.pbi_base & PCIM_BAR_IO_BASE;
410366f6083SPeter Grehan 		} else {
411366f6083SPeter Grehan 			switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
412366f6083SPeter Grehan 			case PCIM_BAR_MEM_64:
413366f6083SPeter Grehan 				bartype = PCIBAR_MEM64;
414366f6083SPeter Grehan 				break;
415366f6083SPeter Grehan 			default:
416366f6083SPeter Grehan 				bartype = PCIBAR_MEM32;
417366f6083SPeter Grehan 				break;
418366f6083SPeter Grehan 			}
419366f6083SPeter Grehan 			base = bar.pbi_base & PCIM_BAR_MEM_BASE;
420366f6083SPeter Grehan 		}
421366f6083SPeter Grehan 
422366f6083SPeter Grehan 		/* Cache information about the "real" BAR */
423366f6083SPeter Grehan 		sc->psc_bar[i].type = bartype;
424366f6083SPeter Grehan 		sc->psc_bar[i].size = bar.pbi_length;
425366f6083SPeter Grehan 		sc->psc_bar[i].addr = base;
426366f6083SPeter Grehan 
427366f6083SPeter Grehan 		/* Allocate the BAR in the guest I/O or MMIO space */
428*4d1e669cSPeter Grehan 		error = pci_emul_alloc_pbar(pi, i, base, bartype,
429366f6083SPeter Grehan 					    bar.pbi_length);
430366f6083SPeter Grehan 		if (error)
431366f6083SPeter Grehan 			return (-1);
432366f6083SPeter Grehan 
433cd942e0fSPeter Grehan 		/* The MSI-X table needs special handling */
434cd942e0fSPeter Grehan 		if (i == pi->pi_msix.table_bar) {
435cd942e0fSPeter Grehan 			error = init_msix_table(ctx, sc, base);
436cd942e0fSPeter Grehan 			if (error)
437cd942e0fSPeter Grehan 				return (-1);
438cd942e0fSPeter Grehan 		} else if (bartype != PCIBAR_IO) {
439cd942e0fSPeter Grehan 			/* Map the physical MMIO space in the guest MMIO space */
440366f6083SPeter Grehan 			error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
441366f6083SPeter Grehan 				sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
442366f6083SPeter Grehan 				pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
443366f6083SPeter Grehan 			if (error)
444366f6083SPeter Grehan 				return (-1);
445366f6083SPeter Grehan 		}
446366f6083SPeter Grehan 
447366f6083SPeter Grehan 		/*
448366f6083SPeter Grehan 		 * 64-bit BAR takes up two slots so skip the next one.
449366f6083SPeter Grehan 		 */
450366f6083SPeter Grehan 		if (bartype == PCIBAR_MEM64) {
451366f6083SPeter Grehan 			i++;
452366f6083SPeter Grehan 			assert(i <= PCI_BARMAX);
453366f6083SPeter Grehan 			sc->psc_bar[i].type = PCIBAR_MEMHI64;
454366f6083SPeter Grehan 		}
455366f6083SPeter Grehan 	}
456366f6083SPeter Grehan 	return (0);
457366f6083SPeter Grehan }
458366f6083SPeter Grehan 
459366f6083SPeter Grehan static int
460366f6083SPeter Grehan cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
461366f6083SPeter Grehan {
462366f6083SPeter Grehan 	int error;
463366f6083SPeter Grehan 	struct passthru_softc *sc;
464366f6083SPeter Grehan 
465366f6083SPeter Grehan 	error = 1;
466366f6083SPeter Grehan 	sc = pi->pi_arg;
467366f6083SPeter Grehan 
468366f6083SPeter Grehan 	bzero(&sc->psc_sel, sizeof(struct pcisel));
469366f6083SPeter Grehan 	sc->psc_sel.pc_bus = bus;
470366f6083SPeter Grehan 	sc->psc_sel.pc_dev = slot;
471366f6083SPeter Grehan 	sc->psc_sel.pc_func = func;
472366f6083SPeter Grehan 
473cd942e0fSPeter Grehan 	if (cfginitmsi(sc) != 0)
474cd942e0fSPeter Grehan 		goto done;
475cd942e0fSPeter Grehan 
476366f6083SPeter Grehan 	if (cfginitbar(ctx, sc) != 0)
477366f6083SPeter Grehan 		goto done;
478366f6083SPeter Grehan 
479366f6083SPeter Grehan 	error = 0;				/* success */
480366f6083SPeter Grehan done:
481366f6083SPeter Grehan 	return (error);
482366f6083SPeter Grehan }
483366f6083SPeter Grehan 
484366f6083SPeter Grehan static int
485366f6083SPeter Grehan passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
486366f6083SPeter Grehan {
487366f6083SPeter Grehan 	int bus, slot, func, error;
488366f6083SPeter Grehan 	struct passthru_softc *sc;
489366f6083SPeter Grehan 
490366f6083SPeter Grehan 	sc = NULL;
491366f6083SPeter Grehan 	error = 1;
492366f6083SPeter Grehan 
493366f6083SPeter Grehan 	if (pcifd < 0) {
494366f6083SPeter Grehan 		pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
495366f6083SPeter Grehan 		if (pcifd < 0)
496366f6083SPeter Grehan 			goto done;
497366f6083SPeter Grehan 	}
498366f6083SPeter Grehan 
499366f6083SPeter Grehan 	if (iofd < 0) {
500366f6083SPeter Grehan 		iofd = open(_PATH_DEVIO, O_RDWR, 0);
501366f6083SPeter Grehan 		if (iofd < 0)
502366f6083SPeter Grehan 			goto done;
503366f6083SPeter Grehan 	}
504366f6083SPeter Grehan 
505*4d1e669cSPeter Grehan 	if (opts == NULL ||
506*4d1e669cSPeter Grehan 	    sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
507366f6083SPeter Grehan 		goto done;
508366f6083SPeter Grehan 
509366f6083SPeter Grehan 	if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
510366f6083SPeter Grehan 		goto done;
511366f6083SPeter Grehan 
512366f6083SPeter Grehan 	sc = malloc(sizeof(struct passthru_softc));
513366f6083SPeter Grehan 	memset(sc, 0, sizeof(struct passthru_softc));
514366f6083SPeter Grehan 
515366f6083SPeter Grehan 	pi->pi_arg = sc;
516366f6083SPeter Grehan 	sc->psc_pi = pi;
517366f6083SPeter Grehan 
518366f6083SPeter Grehan 	/* initialize config space */
519*4d1e669cSPeter Grehan 	if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
520366f6083SPeter Grehan 		goto done;
521366f6083SPeter Grehan 
522366f6083SPeter Grehan 	error = 0;		/* success */
523366f6083SPeter Grehan done:
524366f6083SPeter Grehan 	if (error) {
525366f6083SPeter Grehan 		free(sc);
526366f6083SPeter Grehan 		vm_unassign_pptdev(ctx, bus, slot, func);
527366f6083SPeter Grehan 	}
528366f6083SPeter Grehan 	return (error);
529366f6083SPeter Grehan }
530366f6083SPeter Grehan 
531366f6083SPeter Grehan static int
532366f6083SPeter Grehan bar_access(int coff)
533366f6083SPeter Grehan {
534366f6083SPeter Grehan 	if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
535366f6083SPeter Grehan 		return (1);
536366f6083SPeter Grehan 	else
537366f6083SPeter Grehan 		return (0);
538366f6083SPeter Grehan }
539366f6083SPeter Grehan 
540366f6083SPeter Grehan static int
541366f6083SPeter Grehan msicap_access(struct passthru_softc *sc, int coff)
542366f6083SPeter Grehan {
543366f6083SPeter Grehan 	int caplen;
544366f6083SPeter Grehan 
545366f6083SPeter Grehan 	if (sc->psc_msi.capoff == 0)
546366f6083SPeter Grehan 		return (0);
547366f6083SPeter Grehan 
548366f6083SPeter Grehan 	caplen = msi_caplen(sc->psc_msi.msgctrl);
549366f6083SPeter Grehan 
550366f6083SPeter Grehan 	if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
551366f6083SPeter Grehan 		return (1);
552366f6083SPeter Grehan 	else
553366f6083SPeter Grehan 		return (0);
554366f6083SPeter Grehan }
555366f6083SPeter Grehan 
556366f6083SPeter Grehan static int
557cd942e0fSPeter Grehan msixcap_access(struct passthru_softc *sc, int coff)
558cd942e0fSPeter Grehan {
559cd942e0fSPeter Grehan 	if (sc->psc_msix.capoff == 0)
560cd942e0fSPeter Grehan 		return (0);
561cd942e0fSPeter Grehan 
562cd942e0fSPeter Grehan 	return (coff >= sc->psc_msix.capoff &&
563cd942e0fSPeter Grehan 	        coff < sc->psc_msix.capoff + MSIX_CAPLEN);
564cd942e0fSPeter Grehan }
565cd942e0fSPeter Grehan 
566cd942e0fSPeter Grehan static int
567*4d1e669cSPeter Grehan passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
568*4d1e669cSPeter Grehan 		 int coff, int bytes, uint32_t *rv)
569366f6083SPeter Grehan {
570366f6083SPeter Grehan 	struct passthru_softc *sc;
571366f6083SPeter Grehan 
572366f6083SPeter Grehan 	sc = pi->pi_arg;
573366f6083SPeter Grehan 
574366f6083SPeter Grehan 	/*
575366f6083SPeter Grehan 	 * PCI BARs and MSI capability is emulated.
576366f6083SPeter Grehan 	 */
577366f6083SPeter Grehan 	if (bar_access(coff) || msicap_access(sc, coff))
578366f6083SPeter Grehan 		return (-1);
579366f6083SPeter Grehan 
580366f6083SPeter Grehan #ifdef LEGACY_SUPPORT
581366f6083SPeter Grehan 	/*
582366f6083SPeter Grehan 	 * Emulate PCIR_CAP_PTR if this device does not support MSI capability
583366f6083SPeter Grehan 	 * natively.
584366f6083SPeter Grehan 	 */
585366f6083SPeter Grehan 	if (sc->psc_msi.emulated) {
586366f6083SPeter Grehan 		if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
587366f6083SPeter Grehan 			return (-1);
588366f6083SPeter Grehan 	}
589366f6083SPeter Grehan #endif
590366f6083SPeter Grehan 
591366f6083SPeter Grehan 	/* Everything else just read from the device's config space */
592366f6083SPeter Grehan 	*rv = read_config(&sc->psc_sel, coff, bytes);
593366f6083SPeter Grehan 
594366f6083SPeter Grehan 	return (0);
595366f6083SPeter Grehan }
596366f6083SPeter Grehan 
597366f6083SPeter Grehan static int
598*4d1e669cSPeter Grehan passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
599*4d1e669cSPeter Grehan 		  int coff, int bytes, uint32_t val)
600366f6083SPeter Grehan {
601cd942e0fSPeter Grehan 	int error, msix_table_entries, i;
602366f6083SPeter Grehan 	struct passthru_softc *sc;
603366f6083SPeter Grehan 
604366f6083SPeter Grehan 	sc = pi->pi_arg;
605366f6083SPeter Grehan 
606366f6083SPeter Grehan 	/*
607366f6083SPeter Grehan 	 * PCI BARs are emulated
608366f6083SPeter Grehan 	 */
609366f6083SPeter Grehan 	if (bar_access(coff))
610366f6083SPeter Grehan 		return (-1);
611366f6083SPeter Grehan 
612366f6083SPeter Grehan 	/*
613366f6083SPeter Grehan 	 * MSI capability is emulated
614366f6083SPeter Grehan 	 */
615366f6083SPeter Grehan 	if (msicap_access(sc, coff)) {
616366f6083SPeter Grehan 		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
617366f6083SPeter Grehan 
618366f6083SPeter Grehan 		error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
619366f6083SPeter Grehan 			sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
620366f6083SPeter Grehan 			pi->pi_msi.vector, pi->pi_msi.msgnum);
621366f6083SPeter Grehan 		if (error != 0) {
622366f6083SPeter Grehan 			printf("vm_setup_msi returned error %d\r\n", errno);
623366f6083SPeter Grehan 			exit(1);
624366f6083SPeter Grehan 		}
625366f6083SPeter Grehan 		return (0);
626366f6083SPeter Grehan 	}
627366f6083SPeter Grehan 
628cd942e0fSPeter Grehan 	if (msixcap_access(sc, coff)) {
629cd942e0fSPeter Grehan 		msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
630cd942e0fSPeter Grehan 		if (pi->pi_msix.enabled) {
631cd942e0fSPeter Grehan 			msix_table_entries = pi->pi_msix.table_count;
632cd942e0fSPeter Grehan 			for (i = 0; i < msix_table_entries; i++) {
633cd942e0fSPeter Grehan 				error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
634cd942e0fSPeter Grehan 						      sc->psc_sel.pc_dev,
635cd942e0fSPeter Grehan 						      sc->psc_sel.pc_func, i,
636cd942e0fSPeter Grehan 						      pi->pi_msix.table[i].msg_data,
637cd942e0fSPeter Grehan 						      pi->pi_msix.table[i].vector_control,
638cd942e0fSPeter Grehan 						      pi->pi_msix.table[i].addr);
639cd942e0fSPeter Grehan 
640cd942e0fSPeter Grehan 				if (error) {
641cd942e0fSPeter Grehan 					printf("vm_setup_msix returned error %d\r\n", errno);
642cd942e0fSPeter Grehan 					exit(1);
643cd942e0fSPeter Grehan 				}
644cd942e0fSPeter Grehan 			}
645cd942e0fSPeter Grehan 		}
646cd942e0fSPeter Grehan 		return (0);
647cd942e0fSPeter Grehan 	}
648cd942e0fSPeter Grehan 
649366f6083SPeter Grehan #ifdef LEGACY_SUPPORT
650366f6083SPeter Grehan 	/*
651366f6083SPeter Grehan 	 * If this device does not support MSI natively then we cannot let
652366f6083SPeter Grehan 	 * the guest disable legacy interrupts from the device. It is the
653366f6083SPeter Grehan 	 * legacy interrupt that is triggering the virtual MSI to the guest.
654366f6083SPeter Grehan 	 */
655366f6083SPeter Grehan 	if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
656366f6083SPeter Grehan 		if (coff == PCIR_COMMAND && bytes == 2)
657366f6083SPeter Grehan 			val &= ~PCIM_CMD_INTxDIS;
658366f6083SPeter Grehan 	}
659366f6083SPeter Grehan #endif
660366f6083SPeter Grehan 
661366f6083SPeter Grehan 	write_config(&sc->psc_sel, coff, bytes, val);
662366f6083SPeter Grehan 
663366f6083SPeter Grehan 	return (0);
664366f6083SPeter Grehan }
665366f6083SPeter Grehan 
666366f6083SPeter Grehan static void
667*4d1e669cSPeter Grehan passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
668*4d1e669cSPeter Grehan 	       uint64_t offset, int size, uint64_t value)
669366f6083SPeter Grehan {
670366f6083SPeter Grehan 	struct passthru_softc *sc;
671366f6083SPeter Grehan 	struct iodev_pio_req pio;
672366f6083SPeter Grehan 
673366f6083SPeter Grehan 	sc = pi->pi_arg;
674366f6083SPeter Grehan 
675*4d1e669cSPeter Grehan 	if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) {
676*4d1e669cSPeter Grehan 		msix_table_write(ctx, vcpu, sc, offset, size, value);
677*4d1e669cSPeter Grehan 	} else {
678*4d1e669cSPeter Grehan 		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
679366f6083SPeter Grehan 		bzero(&pio, sizeof(struct iodev_pio_req));
680366f6083SPeter Grehan 		pio.access = IODEV_PIO_WRITE;
681366f6083SPeter Grehan 		pio.port = sc->psc_bar[baridx].addr + offset;
682366f6083SPeter Grehan 		pio.width = size;
683366f6083SPeter Grehan 		pio.val = value;
684366f6083SPeter Grehan 
685366f6083SPeter Grehan 		(void)ioctl(iofd, IODEV_PIO, &pio);
686366f6083SPeter Grehan 	}
687*4d1e669cSPeter Grehan }
688366f6083SPeter Grehan 
689*4d1e669cSPeter Grehan static uint64_t
690*4d1e669cSPeter Grehan passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
691*4d1e669cSPeter Grehan 	      uint64_t offset, int size)
692366f6083SPeter Grehan {
693366f6083SPeter Grehan 	struct passthru_softc *sc;
694366f6083SPeter Grehan 	struct iodev_pio_req pio;
695*4d1e669cSPeter Grehan 	uint64_t val;
696366f6083SPeter Grehan 
697366f6083SPeter Grehan 	sc = pi->pi_arg;
698366f6083SPeter Grehan 
699*4d1e669cSPeter Grehan 	if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) {
700*4d1e669cSPeter Grehan 		val = msix_table_read(sc, offset, size);
701*4d1e669cSPeter Grehan 	} else {
702*4d1e669cSPeter Grehan 		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
703366f6083SPeter Grehan 		bzero(&pio, sizeof(struct iodev_pio_req));
704366f6083SPeter Grehan 		pio.access = IODEV_PIO_READ;
705366f6083SPeter Grehan 		pio.port = sc->psc_bar[baridx].addr + offset;
706366f6083SPeter Grehan 		pio.width = size;
707366f6083SPeter Grehan 		pio.val = 0;
708366f6083SPeter Grehan 
709366f6083SPeter Grehan 		(void)ioctl(iofd, IODEV_PIO, &pio);
710366f6083SPeter Grehan 
711*4d1e669cSPeter Grehan 		val = pio.val;
712*4d1e669cSPeter Grehan 	}
713*4d1e669cSPeter Grehan 
714*4d1e669cSPeter Grehan 	return (val);
715366f6083SPeter Grehan }
716366f6083SPeter Grehan 
717366f6083SPeter Grehan struct pci_devemu passthru = {
718366f6083SPeter Grehan 	.pe_emu		= "passthru",
719366f6083SPeter Grehan 	.pe_init	= passthru_init,
720366f6083SPeter Grehan 	.pe_cfgwrite	= passthru_cfgwrite,
721366f6083SPeter Grehan 	.pe_cfgread	= passthru_cfgread,
722*4d1e669cSPeter Grehan 	.pe_barwrite 	= passthru_write,
723*4d1e669cSPeter Grehan 	.pe_barread    	= passthru_read,
724366f6083SPeter Grehan };
725366f6083SPeter Grehan PCI_EMUL_SET(passthru);
726