xref: /freebsd/sys/dev/pci/pci.c (revision 8655c70597b0e0918c82114b1186df5669b83eb6)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static pci_addr_t	pci_mapbase(uint64_t mapreg);
75 static const char	*pci_maptype(uint64_t mapreg);
76 static int		pci_mapsize(uint64_t testval);
77 static int		pci_maprange(uint64_t mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_msi = 1;
258 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260     "Enable support for MSI interrupts");
261 
262 static int pci_do_msix = 1;
263 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265     "Enable support for MSI-X interrupts");
266 
267 static int pci_honor_msi_blacklist = 1;
268 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271 
272 /* Find a device_t by bus/slot/function in domain 0 */
273 
274 device_t
275 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276 {
277 
278 	return (pci_find_dbsf(0, bus, slot, func));
279 }
280 
281 /* Find a device_t by domain/bus/slot/function */
282 
283 device_t
284 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285 {
286 	struct pci_devinfo *dinfo;
287 
288 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289 		if ((dinfo->cfg.domain == domain) &&
290 		    (dinfo->cfg.bus == bus) &&
291 		    (dinfo->cfg.slot == slot) &&
292 		    (dinfo->cfg.func == func)) {
293 			return (dinfo->cfg.dev);
294 		}
295 	}
296 
297 	return (NULL);
298 }
299 
300 /* Find a device_t by vendor/device ID */
301 
302 device_t
303 pci_find_device(uint16_t vendor, uint16_t device)
304 {
305 	struct pci_devinfo *dinfo;
306 
307 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308 		if ((dinfo->cfg.vendor == vendor) &&
309 		    (dinfo->cfg.device == device)) {
310 			return (dinfo->cfg.dev);
311 		}
312 	}
313 
314 	return (NULL);
315 }
316 
317 /* return base address of memory or port map */
318 
319 static pci_addr_t
320 pci_mapbase(uint64_t mapreg)
321 {
322 
323 	if (PCI_BAR_MEM(mapreg))
324 		return (mapreg & PCIM_BAR_MEM_BASE);
325 	else
326 		return (mapreg & PCIM_BAR_IO_BASE);
327 }
328 
329 /* return map type of memory or port map */
330 
331 static const char *
332 pci_maptype(uint64_t mapreg)
333 {
334 
335 	if (PCI_BAR_IO(mapreg))
336 		return ("I/O Port");
337 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338 		return ("Prefetchable Memory");
339 	return ("Memory");
340 }
341 
342 /* return log2 of map size decoded for memory or port map */
343 
344 static int
345 pci_mapsize(uint64_t testval)
346 {
347 	int ln2size;
348 
349 	testval = pci_mapbase(testval);
350 	ln2size = 0;
351 	if (testval != 0) {
352 		while ((testval & 1) == 0)
353 		{
354 			ln2size++;
355 			testval >>= 1;
356 		}
357 	}
358 	return (ln2size);
359 }
360 
361 /* return log2 of address range supported by map register */
362 
363 static int
364 pci_maprange(uint64_t mapreg)
365 {
366 	int ln2range = 0;
367 
368 	if (PCI_BAR_IO(mapreg))
369 		ln2range = 32;
370 	else
371 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372 		case PCIM_BAR_MEM_32:
373 			ln2range = 32;
374 			break;
375 		case PCIM_BAR_MEM_1MB:
376 			ln2range = 20;
377 			break;
378 		case PCIM_BAR_MEM_64:
379 			ln2range = 64;
380 			break;
381 		}
382 	return (ln2range);
383 }
384 
385 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386 
387 static void
388 pci_fixancient(pcicfgregs *cfg)
389 {
390 	if (cfg->hdrtype != 0)
391 		return;
392 
393 	/* PCI to PCI bridges use header type 1 */
394 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395 		cfg->hdrtype = 1;
396 }
397 
398 /* extract header type specific config data */
399 
400 static void
401 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402 {
403 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404 	switch (cfg->hdrtype) {
405 	case 0:
406 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408 		cfg->nummaps	    = PCI_MAXMAPS_0;
409 		break;
410 	case 1:
411 		cfg->nummaps	    = PCI_MAXMAPS_1;
412 		break;
413 	case 2:
414 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416 		cfg->nummaps	    = PCI_MAXMAPS_2;
417 		break;
418 	}
419 #undef REG
420 }
421 
422 /* read configuration header into pcicfgregs structure */
423 struct pci_devinfo *
424 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425 {
426 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427 	pcicfgregs *cfg = NULL;
428 	struct pci_devinfo *devlist_entry;
429 	struct devlist *devlist_head;
430 
431 	devlist_head = &pci_devq;
432 
433 	devlist_entry = NULL;
434 
435 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
436 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437 		if (devlist_entry == NULL)
438 			return (NULL);
439 
440 		cfg = &devlist_entry->cfg;
441 
442 		cfg->domain		= d;
443 		cfg->bus		= b;
444 		cfg->slot		= s;
445 		cfg->func		= f;
446 		cfg->vendor		= REG(PCIR_VENDOR, 2);
447 		cfg->device		= REG(PCIR_DEVICE, 2);
448 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449 		cfg->statreg		= REG(PCIR_STATUS, 2);
450 		cfg->baseclass		= REG(PCIR_CLASS, 1);
451 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452 		cfg->progif		= REG(PCIR_PROGIF, 1);
453 		cfg->revid		= REG(PCIR_REVID, 1);
454 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457 		cfg->intpin		= REG(PCIR_INTPIN, 1);
458 		cfg->intline		= REG(PCIR_INTLINE, 1);
459 
460 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462 
463 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464 		cfg->hdrtype		&= ~PCIM_MFDEV;
465 
466 		pci_fixancient(cfg);
467 		pci_hdrtypedata(pcib, b, s, f, cfg);
468 
469 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470 			pci_read_extcap(pcib, cfg);
471 
472 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473 
474 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479 
480 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482 		devlist_entry->conf.pc_vendor = cfg->vendor;
483 		devlist_entry->conf.pc_device = cfg->device;
484 
485 		devlist_entry->conf.pc_class = cfg->baseclass;
486 		devlist_entry->conf.pc_subclass = cfg->subclass;
487 		devlist_entry->conf.pc_progif = cfg->progif;
488 		devlist_entry->conf.pc_revid = cfg->revid;
489 
490 		pci_numdevs++;
491 		pci_generation++;
492 	}
493 	return (devlist_entry);
494 #undef REG
495 }
496 
497 static void
498 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499 {
500 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502 #if defined(__i386__) || defined(__amd64__)
503 	uint64_t addr;
504 #endif
505 	uint32_t val;
506 	int	ptr, nextptr, ptrptr;
507 
508 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509 	case 0:
510 	case 1:
511 		ptrptr = PCIR_CAP_PTR;
512 		break;
513 	case 2:
514 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515 		break;
516 	default:
517 		return;		/* no extended capabilities support */
518 	}
519 	nextptr = REG(ptrptr, 1);	/* sanity check? */
520 
521 	/*
522 	 * Read capability entries.
523 	 */
524 	while (nextptr != 0) {
525 		/* Sanity check */
526 		if (nextptr > 255) {
527 			printf("illegal PCI extended capability offset %d\n",
528 			    nextptr);
529 			return;
530 		}
531 		/* Find the next entry */
532 		ptr = nextptr;
533 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534 
535 		/* Process this entry */
536 		switch (REG(ptr + PCICAP_ID, 1)) {
537 		case PCIY_PMG:		/* PCI power management */
538 			if (cfg->pp.pp_cap == 0) {
539 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542 				if ((nextptr - ptr) > PCIR_POWER_DATA)
543 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544 			}
545 			break;
546 #if defined(__i386__) || defined(__amd64__)
547 		case PCIY_HT:		/* HyperTransport */
548 			/* Determine HT-specific capability type. */
549 			val = REG(ptr + PCIR_HT_COMMAND, 2);
550 			switch (val & PCIM_HTCMD_CAP_MASK) {
551 			case PCIM_HTCAP_MSI_MAPPING:
552 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553 					/* Sanity check the mapping window. */
554 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555 					    4);
556 					addr <<= 32;
557 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558 					    4);
559 					if (addr != MSI_INTEL_ADDR_BASE)
560 						device_printf(pcib,
561 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562 						    cfg->domain, cfg->bus,
563 						    cfg->slot, cfg->func,
564 						    (long long)addr);
565 				} else
566 					addr = MSI_INTEL_ADDR_BASE;
567 
568 				cfg->ht.ht_msimap = ptr;
569 				cfg->ht.ht_msictrl = val;
570 				cfg->ht.ht_msiaddr = addr;
571 				break;
572 			}
573 			break;
574 #endif
575 		case PCIY_MSI:		/* PCI MSI */
576 			cfg->msi.msi_location = ptr;
577 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
578 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
579 						     PCIM_MSICTRL_MMC_MASK)>>1);
580 			break;
581 		case PCIY_MSIX:		/* PCI MSI-X */
582 			cfg->msix.msix_location = ptr;
583 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
584 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
585 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
586 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
587 			cfg->msix.msix_table_bar = PCIR_BAR(val &
588 			    PCIM_MSIX_BIR_MASK);
589 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
590 			val = REG(ptr + PCIR_MSIX_PBA, 4);
591 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
592 			    PCIM_MSIX_BIR_MASK);
593 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
594 			break;
595 		case PCIY_VPD:		/* PCI Vital Product Data */
596 			cfg->vpd.vpd_reg = ptr;
597 			break;
598 		case PCIY_SUBVENDOR:
599 			/* Should always be true. */
600 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
601 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
602 				cfg->subvendor = val & 0xffff;
603 				cfg->subdevice = val >> 16;
604 			}
605 			break;
606 		case PCIY_PCIX:		/* PCI-X */
607 			/*
608 			 * Assume we have a PCI-X chipset if we have
609 			 * at least one PCI-PCI bridge with a PCI-X
610 			 * capability.  Note that some systems with
611 			 * PCI-express or HT chipsets might match on
612 			 * this check as well.
613 			 */
614 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
615 				pcix_chipset = 1;
616 			break;
617 		case PCIY_EXPRESS:	/* PCI-express */
618 			/*
619 			 * Assume we have a PCI-express chipset if we have
620 			 * at least one PCI-express device.
621 			 */
622 			pcie_chipset = 1;
623 			break;
624 		default:
625 			break;
626 		}
627 	}
628 /* REG and WREG use carry through to next functions */
629 }
630 
631 /*
632  * PCI Vital Product Data
633  */
634 
635 #define	PCI_VPD_TIMEOUT		1000000
636 
637 static int
638 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
639 {
640 	int count = PCI_VPD_TIMEOUT;
641 
642 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
643 
644 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
645 
646 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
647 		if (--count < 0)
648 			return (ENXIO);
649 		DELAY(1);	/* limit looping */
650 	}
651 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
652 
653 	return (0);
654 }
655 
656 #if 0
657 static int
658 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
659 {
660 	int count = PCI_VPD_TIMEOUT;
661 
662 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
663 
664 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
665 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
666 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
667 		if (--count < 0)
668 			return (ENXIO);
669 		DELAY(1);	/* limit looping */
670 	}
671 
672 	return (0);
673 }
674 #endif
675 
676 #undef PCI_VPD_TIMEOUT
677 
678 struct vpd_readstate {
679 	device_t	pcib;
680 	pcicfgregs	*cfg;
681 	uint32_t	val;
682 	int		bytesinval;
683 	int		off;
684 	uint8_t		cksum;
685 };
686 
687 static int
688 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
689 {
690 	uint32_t reg;
691 	uint8_t byte;
692 
693 	if (vrs->bytesinval == 0) {
694 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
695 			return (ENXIO);
696 		vrs->val = le32toh(reg);
697 		vrs->off += 4;
698 		byte = vrs->val & 0xff;
699 		vrs->bytesinval = 3;
700 	} else {
701 		vrs->val = vrs->val >> 8;
702 		byte = vrs->val & 0xff;
703 		vrs->bytesinval--;
704 	}
705 
706 	vrs->cksum += byte;
707 	*data = byte;
708 	return (0);
709 }
710 
711 static void
712 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
713 {
714 	struct vpd_readstate vrs;
715 	int state;
716 	int name;
717 	int remain;
718 	int i;
719 	int alloc, off;		/* alloc/off for RO/W arrays */
720 	int cksumvalid;
721 	int dflen;
722 	uint8_t byte;
723 	uint8_t byte2;
724 
725 	/* init vpd reader */
726 	vrs.bytesinval = 0;
727 	vrs.off = 0;
728 	vrs.pcib = pcib;
729 	vrs.cfg = cfg;
730 	vrs.cksum = 0;
731 
732 	state = 0;
733 	name = remain = i = 0;	/* shut up stupid gcc */
734 	alloc = off = 0;	/* shut up stupid gcc */
735 	dflen = 0;		/* shut up stupid gcc */
736 	cksumvalid = -1;
737 	while (state >= 0) {
738 		if (vpd_nextbyte(&vrs, &byte)) {
739 			state = -2;
740 			break;
741 		}
742 #if 0
743 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
744 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
745 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
746 #endif
747 		switch (state) {
748 		case 0:		/* item name */
749 			if (byte & 0x80) {
750 				if (vpd_nextbyte(&vrs, &byte2)) {
751 					state = -2;
752 					break;
753 				}
754 				remain = byte2;
755 				if (vpd_nextbyte(&vrs, &byte2)) {
756 					state = -2;
757 					break;
758 				}
759 				remain |= byte2 << 8;
760 				if (remain > (0x7f*4 - vrs.off)) {
761 					state = -1;
762 					printf(
763 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
764 					    cfg->domain, cfg->bus, cfg->slot,
765 					    cfg->func, remain);
766 				}
767 				name = byte & 0x7f;
768 			} else {
769 				remain = byte & 0x7;
770 				name = (byte >> 3) & 0xf;
771 			}
772 			switch (name) {
773 			case 0x2:	/* String */
774 				cfg->vpd.vpd_ident = malloc(remain + 1,
775 				    M_DEVBUF, M_WAITOK);
776 				i = 0;
777 				state = 1;
778 				break;
779 			case 0xf:	/* End */
780 				state = -1;
781 				break;
782 			case 0x10:	/* VPD-R */
783 				alloc = 8;
784 				off = 0;
785 				cfg->vpd.vpd_ros = malloc(alloc *
786 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
787 				    M_WAITOK | M_ZERO);
788 				state = 2;
789 				break;
790 			case 0x11:	/* VPD-W */
791 				alloc = 8;
792 				off = 0;
793 				cfg->vpd.vpd_w = malloc(alloc *
794 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
795 				    M_WAITOK | M_ZERO);
796 				state = 5;
797 				break;
798 			default:	/* Invalid data, abort */
799 				state = -1;
800 				break;
801 			}
802 			break;
803 
804 		case 1:	/* Identifier String */
805 			cfg->vpd.vpd_ident[i++] = byte;
806 			remain--;
807 			if (remain == 0)  {
808 				cfg->vpd.vpd_ident[i] = '\0';
809 				state = 0;
810 			}
811 			break;
812 
813 		case 2:	/* VPD-R Keyword Header */
814 			if (off == alloc) {
815 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
816 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
817 				    M_DEVBUF, M_WAITOK | M_ZERO);
818 			}
819 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
820 			if (vpd_nextbyte(&vrs, &byte2)) {
821 				state = -2;
822 				break;
823 			}
824 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
825 			if (vpd_nextbyte(&vrs, &byte2)) {
826 				state = -2;
827 				break;
828 			}
829 			dflen = byte2;
830 			if (dflen == 0 &&
831 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
832 			    2) == 0) {
833 				/*
834 				 * if this happens, we can't trust the rest
835 				 * of the VPD.
836 				 */
837 				printf(
838 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
839 				    cfg->domain, cfg->bus, cfg->slot,
840 				    cfg->func, dflen);
841 				cksumvalid = 0;
842 				state = -1;
843 				break;
844 			} else if (dflen == 0) {
845 				cfg->vpd.vpd_ros[off].value = malloc(1 *
846 				    sizeof(*cfg->vpd.vpd_ros[off].value),
847 				    M_DEVBUF, M_WAITOK);
848 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
849 			} else
850 				cfg->vpd.vpd_ros[off].value = malloc(
851 				    (dflen + 1) *
852 				    sizeof(*cfg->vpd.vpd_ros[off].value),
853 				    M_DEVBUF, M_WAITOK);
854 			remain -= 3;
855 			i = 0;
856 			/* keep in sync w/ state 3's transistions */
857 			if (dflen == 0 && remain == 0)
858 				state = 0;
859 			else if (dflen == 0)
860 				state = 2;
861 			else
862 				state = 3;
863 			break;
864 
865 		case 3:	/* VPD-R Keyword Value */
866 			cfg->vpd.vpd_ros[off].value[i++] = byte;
867 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
868 			    "RV", 2) == 0 && cksumvalid == -1) {
869 				if (vrs.cksum == 0)
870 					cksumvalid = 1;
871 				else {
872 					if (bootverbose)
873 						printf(
874 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
875 						    cfg->domain, cfg->bus,
876 						    cfg->slot, cfg->func,
877 						    vrs.cksum);
878 					cksumvalid = 0;
879 					state = -1;
880 					break;
881 				}
882 			}
883 			dflen--;
884 			remain--;
885 			/* keep in sync w/ state 2's transistions */
886 			if (dflen == 0)
887 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
888 			if (dflen == 0 && remain == 0) {
889 				cfg->vpd.vpd_rocnt = off;
890 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
891 				    off * sizeof(*cfg->vpd.vpd_ros),
892 				    M_DEVBUF, M_WAITOK | M_ZERO);
893 				state = 0;
894 			} else if (dflen == 0)
895 				state = 2;
896 			break;
897 
898 		case 4:
899 			remain--;
900 			if (remain == 0)
901 				state = 0;
902 			break;
903 
904 		case 5:	/* VPD-W Keyword Header */
905 			if (off == alloc) {
906 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
907 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
908 				    M_DEVBUF, M_WAITOK | M_ZERO);
909 			}
910 			cfg->vpd.vpd_w[off].keyword[0] = byte;
911 			if (vpd_nextbyte(&vrs, &byte2)) {
912 				state = -2;
913 				break;
914 			}
915 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
916 			if (vpd_nextbyte(&vrs, &byte2)) {
917 				state = -2;
918 				break;
919 			}
920 			cfg->vpd.vpd_w[off].len = dflen = byte2;
921 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
922 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
923 			    sizeof(*cfg->vpd.vpd_w[off].value),
924 			    M_DEVBUF, M_WAITOK);
925 			remain -= 3;
926 			i = 0;
927 			/* keep in sync w/ state 6's transistions */
928 			if (dflen == 0 && remain == 0)
929 				state = 0;
930 			else if (dflen == 0)
931 				state = 5;
932 			else
933 				state = 6;
934 			break;
935 
936 		case 6:	/* VPD-W Keyword Value */
937 			cfg->vpd.vpd_w[off].value[i++] = byte;
938 			dflen--;
939 			remain--;
940 			/* keep in sync w/ state 5's transistions */
941 			if (dflen == 0)
942 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
943 			if (dflen == 0 && remain == 0) {
944 				cfg->vpd.vpd_wcnt = off;
945 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
946 				    off * sizeof(*cfg->vpd.vpd_w),
947 				    M_DEVBUF, M_WAITOK | M_ZERO);
948 				state = 0;
949 			} else if (dflen == 0)
950 				state = 5;
951 			break;
952 
953 		default:
954 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
955 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
956 			    state);
957 			state = -1;
958 			break;
959 		}
960 	}
961 
962 	if (cksumvalid == 0 || state < -1) {
963 		/* read-only data bad, clean up */
964 		if (cfg->vpd.vpd_ros != NULL) {
965 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
966 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
967 			free(cfg->vpd.vpd_ros, M_DEVBUF);
968 			cfg->vpd.vpd_ros = NULL;
969 		}
970 	}
971 	if (state < -1) {
972 		/* I/O error, clean up */
973 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
974 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
975 		if (cfg->vpd.vpd_ident != NULL) {
976 			free(cfg->vpd.vpd_ident, M_DEVBUF);
977 			cfg->vpd.vpd_ident = NULL;
978 		}
979 		if (cfg->vpd.vpd_w != NULL) {
980 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
981 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
982 			free(cfg->vpd.vpd_w, M_DEVBUF);
983 			cfg->vpd.vpd_w = NULL;
984 		}
985 	}
986 	cfg->vpd.vpd_cached = 1;
987 #undef REG
988 #undef WREG
989 }
990 
991 int
992 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
993 {
994 	struct pci_devinfo *dinfo = device_get_ivars(child);
995 	pcicfgregs *cfg = &dinfo->cfg;
996 
997 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
998 		pci_read_vpd(device_get_parent(dev), cfg);
999 
1000 	*identptr = cfg->vpd.vpd_ident;
1001 
1002 	if (*identptr == NULL)
1003 		return (ENXIO);
1004 
1005 	return (0);
1006 }
1007 
1008 int
1009 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1010 	const char **vptr)
1011 {
1012 	struct pci_devinfo *dinfo = device_get_ivars(child);
1013 	pcicfgregs *cfg = &dinfo->cfg;
1014 	int i;
1015 
1016 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1017 		pci_read_vpd(device_get_parent(dev), cfg);
1018 
1019 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1020 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1021 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1022 			*vptr = cfg->vpd.vpd_ros[i].value;
1023 		}
1024 
1025 	if (i != cfg->vpd.vpd_rocnt)
1026 		return (0);
1027 
1028 	*vptr = NULL;
1029 	return (ENXIO);
1030 }
1031 
1032 /*
1033  * Return the offset in configuration space of the requested extended
1034  * capability entry or 0 if the specified capability was not found.
1035  */
1036 int
1037 pci_find_extcap_method(device_t dev, device_t child, int capability,
1038     int *capreg)
1039 {
1040 	struct pci_devinfo *dinfo = device_get_ivars(child);
1041 	pcicfgregs *cfg = &dinfo->cfg;
1042 	u_int32_t status;
1043 	u_int8_t ptr;
1044 
1045 	/*
1046 	 * Check the CAP_LIST bit of the PCI status register first.
1047 	 */
1048 	status = pci_read_config(child, PCIR_STATUS, 2);
1049 	if (!(status & PCIM_STATUS_CAPPRESENT))
1050 		return (ENXIO);
1051 
1052 	/*
1053 	 * Determine the start pointer of the capabilities list.
1054 	 */
1055 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1056 	case 0:
1057 	case 1:
1058 		ptr = PCIR_CAP_PTR;
1059 		break;
1060 	case 2:
1061 		ptr = PCIR_CAP_PTR_2;
1062 		break;
1063 	default:
1064 		/* XXX: panic? */
1065 		return (ENXIO);		/* no extended capabilities support */
1066 	}
1067 	ptr = pci_read_config(child, ptr, 1);
1068 
1069 	/*
1070 	 * Traverse the capabilities list.
1071 	 */
1072 	while (ptr != 0) {
1073 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1074 			if (capreg != NULL)
1075 				*capreg = ptr;
1076 			return (0);
1077 		}
1078 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1079 	}
1080 
1081 	return (ENOENT);
1082 }
1083 
1084 /*
1085  * Support for MSI-X message interrupts.
1086  */
1087 void
1088 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1089 {
1090 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1091 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1092 	uint32_t offset;
1093 
1094 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1095 	offset = msix->msix_table_offset + index * 16;
1096 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1097 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1098 	bus_write_4(msix->msix_table_res, offset + 8, data);
1099 
1100 	/* Enable MSI -> HT mapping. */
1101 	pci_ht_map_msi(dev, address);
1102 }
1103 
1104 void
1105 pci_mask_msix(device_t dev, u_int index)
1106 {
1107 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1108 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1109 	uint32_t offset, val;
1110 
1111 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1112 	offset = msix->msix_table_offset + index * 16 + 12;
1113 	val = bus_read_4(msix->msix_table_res, offset);
1114 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1115 		val |= PCIM_MSIX_VCTRL_MASK;
1116 		bus_write_4(msix->msix_table_res, offset, val);
1117 	}
1118 }
1119 
1120 void
1121 pci_unmask_msix(device_t dev, u_int index)
1122 {
1123 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125 	uint32_t offset, val;
1126 
1127 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1128 	offset = msix->msix_table_offset + index * 16 + 12;
1129 	val = bus_read_4(msix->msix_table_res, offset);
1130 	if (val & PCIM_MSIX_VCTRL_MASK) {
1131 		val &= ~PCIM_MSIX_VCTRL_MASK;
1132 		bus_write_4(msix->msix_table_res, offset, val);
1133 	}
1134 }
1135 
1136 int
1137 pci_pending_msix(device_t dev, u_int index)
1138 {
1139 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1140 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1141 	uint32_t offset, bit;
1142 
1143 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1144 	offset = msix->msix_pba_offset + (index / 32) * 4;
1145 	bit = 1 << index % 32;
1146 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1147 }
1148 
1149 /*
1150  * Restore MSI-X registers and table during resume.  If MSI-X is
1151  * enabled then walk the virtual table to restore the actual MSI-X
1152  * table.
1153  */
1154 static void
1155 pci_resume_msix(device_t dev)
1156 {
1157 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1158 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1159 	struct msix_table_entry *mte;
1160 	struct msix_vector *mv;
1161 	int i;
1162 
1163 	if (msix->msix_alloc > 0) {
1164 		/* First, mask all vectors. */
1165 		for (i = 0; i < msix->msix_msgnum; i++)
1166 			pci_mask_msix(dev, i);
1167 
1168 		/* Second, program any messages with at least one handler. */
1169 		for (i = 0; i < msix->msix_table_len; i++) {
1170 			mte = &msix->msix_table[i];
1171 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1172 				continue;
1173 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1174 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1175 			pci_unmask_msix(dev, i);
1176 		}
1177 	}
1178 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1179 	    msix->msix_ctrl, 2);
1180 }
1181 
1182 /*
1183  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1184  * returned in *count.  After this function returns, each message will be
1185  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1186  */
1187 int
1188 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1189 {
1190 	struct pci_devinfo *dinfo = device_get_ivars(child);
1191 	pcicfgregs *cfg = &dinfo->cfg;
1192 	struct resource_list_entry *rle;
1193 	int actual, error, i, irq, max;
1194 
1195 	/* Don't let count == 0 get us into trouble. */
1196 	if (*count == 0)
1197 		return (EINVAL);
1198 
1199 	/* If rid 0 is allocated, then fail. */
1200 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1201 	if (rle != NULL && rle->res != NULL)
1202 		return (ENXIO);
1203 
1204 	/* Already have allocated messages? */
1205 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1206 		return (ENXIO);
1207 
1208 	/* If MSI is blacklisted for this system, fail. */
1209 	if (pci_msi_blacklisted())
1210 		return (ENXIO);
1211 
1212 	/* MSI-X capability present? */
1213 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1214 		return (ENODEV);
1215 
1216 	/* Make sure the appropriate BARs are mapped. */
1217 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1218 	    cfg->msix.msix_table_bar);
1219 	if (rle == NULL || rle->res == NULL ||
1220 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1221 		return (ENXIO);
1222 	cfg->msix.msix_table_res = rle->res;
1223 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1224 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1225 		    cfg->msix.msix_pba_bar);
1226 		if (rle == NULL || rle->res == NULL ||
1227 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1228 			return (ENXIO);
1229 	}
1230 	cfg->msix.msix_pba_res = rle->res;
1231 
1232 	if (bootverbose)
1233 		device_printf(child,
1234 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1235 		    *count, cfg->msix.msix_msgnum);
1236 	max = min(*count, cfg->msix.msix_msgnum);
1237 	for (i = 0; i < max; i++) {
1238 		/* Allocate a message. */
1239 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1240 		if (error)
1241 			break;
1242 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1243 		    irq, 1);
1244 	}
1245 	actual = i;
1246 
1247 	if (bootverbose) {
1248 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1249 		if (actual == 1)
1250 			device_printf(child, "using IRQ %lu for MSI-X\n",
1251 			    rle->start);
1252 		else {
1253 			int run;
1254 
1255 			/*
1256 			 * Be fancy and try to print contiguous runs of
1257 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1258 			 * 'run' is true if we are in a range.
1259 			 */
1260 			device_printf(child, "using IRQs %lu", rle->start);
1261 			irq = rle->start;
1262 			run = 0;
1263 			for (i = 1; i < actual; i++) {
1264 				rle = resource_list_find(&dinfo->resources,
1265 				    SYS_RES_IRQ, i + 1);
1266 
1267 				/* Still in a run? */
1268 				if (rle->start == irq + 1) {
1269 					run = 1;
1270 					irq++;
1271 					continue;
1272 				}
1273 
1274 				/* Finish previous range. */
1275 				if (run) {
1276 					printf("-%d", irq);
1277 					run = 0;
1278 				}
1279 
1280 				/* Start new range. */
1281 				printf(",%lu", rle->start);
1282 				irq = rle->start;
1283 			}
1284 
1285 			/* Unfinished range? */
1286 			if (run)
1287 				printf("-%d", irq);
1288 			printf(" for MSI-X\n");
1289 		}
1290 	}
1291 
1292 	/* Mask all vectors. */
1293 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1294 		pci_mask_msix(child, i);
1295 
1296 	/* Allocate and initialize vector data and virtual table. */
1297 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1298 	    M_DEVBUF, M_WAITOK | M_ZERO);
1299 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1300 	    M_DEVBUF, M_WAITOK | M_ZERO);
1301 	for (i = 0; i < actual; i++) {
1302 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1303 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1304 		cfg->msix.msix_table[i].mte_vector = i + 1;
1305 	}
1306 
1307 	/* Update control register to enable MSI-X. */
1308 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1309 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1310 	    cfg->msix.msix_ctrl, 2);
1311 
1312 	/* Update counts of alloc'd messages. */
1313 	cfg->msix.msix_alloc = actual;
1314 	cfg->msix.msix_table_len = actual;
1315 	*count = actual;
1316 	return (0);
1317 }
1318 
1319 /*
1320  * By default, pci_alloc_msix() will assign the allocated IRQ
1321  * resources consecutively to the first N messages in the MSI-X table.
1322  * However, device drivers may want to use different layouts if they
1323  * either receive fewer messages than they asked for, or they wish to
1324  * populate the MSI-X table sparsely.  This method allows the driver
1325  * to specify what layout it wants.  It must be called after a
1326  * successful pci_alloc_msix() but before any of the associated
1327  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1328  *
1329  * The 'vectors' array contains 'count' message vectors.  The array
1330  * maps directly to the MSI-X table in that index 0 in the array
1331  * specifies the vector for the first message in the MSI-X table, etc.
1332  * The vector value in each array index can either be 0 to indicate
1333  * that no vector should be assigned to a message slot, or it can be a
1334  * number from 1 to N (where N is the count returned from a
1335  * succcessful call to pci_alloc_msix()) to indicate which message
1336  * vector (IRQ) to be used for the corresponding message.
1337  *
1338  * On successful return, each message with a non-zero vector will have
1339  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1340  * 1.  Additionally, if any of the IRQs allocated via the previous
1341  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1342  * will be freed back to the system automatically.
1343  *
1344  * For example, suppose a driver has a MSI-X table with 6 messages and
1345  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1346  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1347  * C.  After the call to pci_alloc_msix(), the device will be setup to
1348  * have an MSI-X table of ABC--- (where - means no vector assigned).
1349  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1350  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1351  * be freed back to the system.  This device will also have valid
1352  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1353  *
1354  * In any case, the SYS_RES_IRQ rid X will always map to the message
1355  * at MSI-X table index X - 1 and will only be valid if a vector is
1356  * assigned to that table entry.
1357  */
1358 int
1359 pci_remap_msix_method(device_t dev, device_t child, int count,
1360     const u_int *vectors)
1361 {
1362 	struct pci_devinfo *dinfo = device_get_ivars(child);
1363 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1364 	struct resource_list_entry *rle;
1365 	int i, irq, j, *used;
1366 
1367 	/*
1368 	 * Have to have at least one message in the table but the
1369 	 * table can't be bigger than the actual MSI-X table in the
1370 	 * device.
1371 	 */
1372 	if (count == 0 || count > msix->msix_msgnum)
1373 		return (EINVAL);
1374 
1375 	/* Sanity check the vectors. */
1376 	for (i = 0; i < count; i++)
1377 		if (vectors[i] > msix->msix_alloc)
1378 			return (EINVAL);
1379 
1380 	/*
1381 	 * Make sure there aren't any holes in the vectors to be used.
1382 	 * It's a big pain to support it, and it doesn't really make
1383 	 * sense anyway.  Also, at least one vector must be used.
1384 	 */
1385 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1386 	    M_ZERO);
1387 	for (i = 0; i < count; i++)
1388 		if (vectors[i] != 0)
1389 			used[vectors[i] - 1] = 1;
1390 	for (i = 0; i < msix->msix_alloc - 1; i++)
1391 		if (used[i] == 0 && used[i + 1] == 1) {
1392 			free(used, M_DEVBUF);
1393 			return (EINVAL);
1394 		}
1395 	if (used[0] != 1) {
1396 		free(used, M_DEVBUF);
1397 		return (EINVAL);
1398 	}
1399 
1400 	/* Make sure none of the resources are allocated. */
1401 	for (i = 0; i < msix->msix_table_len; i++) {
1402 		if (msix->msix_table[i].mte_vector == 0)
1403 			continue;
1404 		if (msix->msix_table[i].mte_handlers > 0)
1405 			return (EBUSY);
1406 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1407 		KASSERT(rle != NULL, ("missing resource"));
1408 		if (rle->res != NULL)
1409 			return (EBUSY);
1410 	}
1411 
1412 	/* Free the existing resource list entries. */
1413 	for (i = 0; i < msix->msix_table_len; i++) {
1414 		if (msix->msix_table[i].mte_vector == 0)
1415 			continue;
1416 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1417 	}
1418 
1419 	/*
1420 	 * Build the new virtual table keeping track of which vectors are
1421 	 * used.
1422 	 */
1423 	free(msix->msix_table, M_DEVBUF);
1424 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1425 	    M_DEVBUF, M_WAITOK | M_ZERO);
1426 	for (i = 0; i < count; i++)
1427 		msix->msix_table[i].mte_vector = vectors[i];
1428 	msix->msix_table_len = count;
1429 
1430 	/* Free any unused IRQs and resize the vectors array if necessary. */
1431 	j = msix->msix_alloc - 1;
1432 	if (used[j] == 0) {
1433 		struct msix_vector *vec;
1434 
1435 		while (used[j] == 0) {
1436 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1437 			    msix->msix_vectors[j].mv_irq);
1438 			j--;
1439 		}
1440 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1441 		    M_WAITOK);
1442 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1443 		    (j + 1));
1444 		free(msix->msix_vectors, M_DEVBUF);
1445 		msix->msix_vectors = vec;
1446 		msix->msix_alloc = j + 1;
1447 	}
1448 	free(used, M_DEVBUF);
1449 
1450 	/* Map the IRQs onto the rids. */
1451 	for (i = 0; i < count; i++) {
1452 		if (vectors[i] == 0)
1453 			continue;
1454 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1455 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1456 		    irq, 1);
1457 	}
1458 
1459 	if (bootverbose) {
1460 		device_printf(child, "Remapped MSI-X IRQs as: ");
1461 		for (i = 0; i < count; i++) {
1462 			if (i != 0)
1463 				printf(", ");
1464 			if (vectors[i] == 0)
1465 				printf("---");
1466 			else
1467 				printf("%d",
1468 				    msix->msix_vectors[vectors[i]].mv_irq);
1469 		}
1470 		printf("\n");
1471 	}
1472 
1473 	return (0);
1474 }
1475 
1476 static int
1477 pci_release_msix(device_t dev, device_t child)
1478 {
1479 	struct pci_devinfo *dinfo = device_get_ivars(child);
1480 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481 	struct resource_list_entry *rle;
1482 	int i;
1483 
1484 	/* Do we have any messages to release? */
1485 	if (msix->msix_alloc == 0)
1486 		return (ENODEV);
1487 
1488 	/* Make sure none of the resources are allocated. */
1489 	for (i = 0; i < msix->msix_table_len; i++) {
1490 		if (msix->msix_table[i].mte_vector == 0)
1491 			continue;
1492 		if (msix->msix_table[i].mte_handlers > 0)
1493 			return (EBUSY);
1494 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1495 		KASSERT(rle != NULL, ("missing resource"));
1496 		if (rle->res != NULL)
1497 			return (EBUSY);
1498 	}
1499 
1500 	/* Update control register to disable MSI-X. */
1501 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1502 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1503 	    msix->msix_ctrl, 2);
1504 
1505 	/* Free the resource list entries. */
1506 	for (i = 0; i < msix->msix_table_len; i++) {
1507 		if (msix->msix_table[i].mte_vector == 0)
1508 			continue;
1509 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1510 	}
1511 	free(msix->msix_table, M_DEVBUF);
1512 	msix->msix_table_len = 0;
1513 
1514 	/* Release the IRQs. */
1515 	for (i = 0; i < msix->msix_alloc; i++)
1516 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1517 		    msix->msix_vectors[i].mv_irq);
1518 	free(msix->msix_vectors, M_DEVBUF);
1519 	msix->msix_alloc = 0;
1520 	return (0);
1521 }
1522 
1523 /*
1524  * Return the max supported MSI-X messages this device supports.
1525  * Basically, assuming the MD code can alloc messages, this function
1526  * should return the maximum value that pci_alloc_msix() can return.
1527  * Thus, it is subject to the tunables, etc.
1528  */
1529 int
1530 pci_msix_count_method(device_t dev, device_t child)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(child);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 
1535 	if (pci_do_msix && msix->msix_location != 0)
1536 		return (msix->msix_msgnum);
1537 	return (0);
1538 }
1539 
1540 /*
1541  * HyperTransport MSI mapping control
1542  */
1543 void
1544 pci_ht_map_msi(device_t dev, uint64_t addr)
1545 {
1546 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1547 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1548 
1549 	if (!ht->ht_msimap)
1550 		return;
1551 
1552 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1553 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1554 		/* Enable MSI -> HT mapping. */
1555 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1556 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1557 		    ht->ht_msictrl, 2);
1558 	}
1559 
1560 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1561 		/* Disable MSI -> HT mapping. */
1562 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1563 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1564 		    ht->ht_msictrl, 2);
1565 	}
1566 }
1567 
1568 /*
1569  * Support for MSI message signalled interrupts.
1570  */
1571 void
1572 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1573 {
1574 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1575 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1576 
1577 	/* Write data and address values. */
1578 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1579 	    address & 0xffffffff, 4);
1580 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1581 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1582 		    address >> 32, 4);
1583 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1584 		    data, 2);
1585 	} else
1586 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1587 		    2);
1588 
1589 	/* Enable MSI in the control register. */
1590 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1591 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1592 	    2);
1593 
1594 	/* Enable MSI -> HT mapping. */
1595 	pci_ht_map_msi(dev, address);
1596 }
1597 
1598 void
1599 pci_disable_msi(device_t dev)
1600 {
1601 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1602 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1603 
1604 	/* Disable MSI -> HT mapping. */
1605 	pci_ht_map_msi(dev, 0);
1606 
1607 	/* Disable MSI in the control register. */
1608 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1609 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1610 	    2);
1611 }
1612 
1613 /*
1614  * Restore MSI registers during resume.  If MSI is enabled then
1615  * restore the data and address registers in addition to the control
1616  * register.
1617  */
1618 static void
1619 pci_resume_msi(device_t dev)
1620 {
1621 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1622 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1623 	uint64_t address;
1624 	uint16_t data;
1625 
1626 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1627 		address = msi->msi_addr;
1628 		data = msi->msi_data;
1629 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1630 		    address & 0xffffffff, 4);
1631 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1632 			pci_write_config(dev, msi->msi_location +
1633 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1634 			pci_write_config(dev, msi->msi_location +
1635 			    PCIR_MSI_DATA_64BIT, data, 2);
1636 		} else
1637 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1638 			    data, 2);
1639 	}
1640 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1641 	    2);
1642 }
1643 
1644 int
1645 pci_remap_msi_irq(device_t dev, u_int irq)
1646 {
1647 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648 	pcicfgregs *cfg = &dinfo->cfg;
1649 	struct resource_list_entry *rle;
1650 	struct msix_table_entry *mte;
1651 	struct msix_vector *mv;
1652 	device_t bus;
1653 	uint64_t addr;
1654 	uint32_t data;
1655 	int error, i, j;
1656 
1657 	bus = device_get_parent(dev);
1658 
1659 	/*
1660 	 * Handle MSI first.  We try to find this IRQ among our list
1661 	 * of MSI IRQs.  If we find it, we request updated address and
1662 	 * data registers and apply the results.
1663 	 */
1664 	if (cfg->msi.msi_alloc > 0) {
1665 
1666 		/* If we don't have any active handlers, nothing to do. */
1667 		if (cfg->msi.msi_handlers == 0)
1668 			return (0);
1669 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1670 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1671 			    i + 1);
1672 			if (rle->start == irq) {
1673 				error = PCIB_MAP_MSI(device_get_parent(bus),
1674 				    dev, irq, &addr, &data);
1675 				if (error)
1676 					return (error);
1677 				pci_disable_msi(dev);
1678 				dinfo->cfg.msi.msi_addr = addr;
1679 				dinfo->cfg.msi.msi_data = data;
1680 				pci_enable_msi(dev, addr, data);
1681 				return (0);
1682 			}
1683 		}
1684 		return (ENOENT);
1685 	}
1686 
1687 	/*
1688 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1689 	 * we request the updated mapping info.  If that works, we go
1690 	 * through all the slots that use this IRQ and update them.
1691 	 */
1692 	if (cfg->msix.msix_alloc > 0) {
1693 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1694 			mv = &cfg->msix.msix_vectors[i];
1695 			if (mv->mv_irq == irq) {
1696 				error = PCIB_MAP_MSI(device_get_parent(bus),
1697 				    dev, irq, &addr, &data);
1698 				if (error)
1699 					return (error);
1700 				mv->mv_address = addr;
1701 				mv->mv_data = data;
1702 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1703 					mte = &cfg->msix.msix_table[j];
1704 					if (mte->mte_vector != i + 1)
1705 						continue;
1706 					if (mte->mte_handlers == 0)
1707 						continue;
1708 					pci_mask_msix(dev, j);
1709 					pci_enable_msix(dev, j, addr, data);
1710 					pci_unmask_msix(dev, j);
1711 				}
1712 			}
1713 		}
1714 		return (ENOENT);
1715 	}
1716 
1717 	return (ENOENT);
1718 }
1719 
1720 /*
1721  * Returns true if the specified device is blacklisted because MSI
1722  * doesn't work.
1723  */
1724 int
1725 pci_msi_device_blacklisted(device_t dev)
1726 {
1727 	struct pci_quirk *q;
1728 
1729 	if (!pci_honor_msi_blacklist)
1730 		return (0);
1731 
1732 	for (q = &pci_quirks[0]; q->devid; q++) {
1733 		if (q->devid == pci_get_devid(dev) &&
1734 		    q->type == PCI_QUIRK_DISABLE_MSI)
1735 			return (1);
1736 	}
1737 	return (0);
1738 }
1739 
1740 /*
1741  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1742  * we just check for blacklisted chipsets as represented by the
1743  * host-PCI bridge at device 0:0:0.  In the future, it may become
1744  * necessary to check other system attributes, such as the kenv values
1745  * that give the motherboard manufacturer and model number.
1746  */
1747 static int
1748 pci_msi_blacklisted(void)
1749 {
1750 	device_t dev;
1751 
1752 	if (!pci_honor_msi_blacklist)
1753 		return (0);
1754 
1755 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1756 	if (!(pcie_chipset || pcix_chipset))
1757 		return (1);
1758 
1759 	dev = pci_find_bsf(0, 0, 0);
1760 	if (dev != NULL)
1761 		return (pci_msi_device_blacklisted(dev));
1762 	return (0);
1763 }
1764 
1765 /*
1766  * Attempt to allocate *count MSI messages.  The actual number allocated is
1767  * returned in *count.  After this function returns, each message will be
1768  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1769  */
1770 int
1771 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1772 {
1773 	struct pci_devinfo *dinfo = device_get_ivars(child);
1774 	pcicfgregs *cfg = &dinfo->cfg;
1775 	struct resource_list_entry *rle;
1776 	int actual, error, i, irqs[32];
1777 	uint16_t ctrl;
1778 
1779 	/* Don't let count == 0 get us into trouble. */
1780 	if (*count == 0)
1781 		return (EINVAL);
1782 
1783 	/* If rid 0 is allocated, then fail. */
1784 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1785 	if (rle != NULL && rle->res != NULL)
1786 		return (ENXIO);
1787 
1788 	/* Already have allocated messages? */
1789 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1790 		return (ENXIO);
1791 
1792 	/* If MSI is blacklisted for this system, fail. */
1793 	if (pci_msi_blacklisted())
1794 		return (ENXIO);
1795 
1796 	/* MSI capability present? */
1797 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1798 		return (ENODEV);
1799 
1800 	if (bootverbose)
1801 		device_printf(child,
1802 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1803 		    *count, cfg->msi.msi_msgnum);
1804 
1805 	/* Don't ask for more than the device supports. */
1806 	actual = min(*count, cfg->msi.msi_msgnum);
1807 
1808 	/* Don't ask for more than 32 messages. */
1809 	actual = min(actual, 32);
1810 
1811 	/* MSI requires power of 2 number of messages. */
1812 	if (!powerof2(actual))
1813 		return (EINVAL);
1814 
1815 	for (;;) {
1816 		/* Try to allocate N messages. */
1817 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1818 		    cfg->msi.msi_msgnum, irqs);
1819 		if (error == 0)
1820 			break;
1821 		if (actual == 1)
1822 			return (error);
1823 
1824 		/* Try N / 2. */
1825 		actual >>= 1;
1826 	}
1827 
1828 	/*
1829 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1830 	 * resources in the irqs[] array, so add new resources
1831 	 * starting at rid 1.
1832 	 */
1833 	for (i = 0; i < actual; i++)
1834 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1835 		    irqs[i], irqs[i], 1);
1836 
1837 	if (bootverbose) {
1838 		if (actual == 1)
1839 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1840 		else {
1841 			int run;
1842 
1843 			/*
1844 			 * Be fancy and try to print contiguous runs
1845 			 * of IRQ values as ranges.  'run' is true if
1846 			 * we are in a range.
1847 			 */
1848 			device_printf(child, "using IRQs %d", irqs[0]);
1849 			run = 0;
1850 			for (i = 1; i < actual; i++) {
1851 
1852 				/* Still in a run? */
1853 				if (irqs[i] == irqs[i - 1] + 1) {
1854 					run = 1;
1855 					continue;
1856 				}
1857 
1858 				/* Finish previous range. */
1859 				if (run) {
1860 					printf("-%d", irqs[i - 1]);
1861 					run = 0;
1862 				}
1863 
1864 				/* Start new range. */
1865 				printf(",%d", irqs[i]);
1866 			}
1867 
1868 			/* Unfinished range? */
1869 			if (run)
1870 				printf("-%d", irqs[actual - 1]);
1871 			printf(" for MSI\n");
1872 		}
1873 	}
1874 
1875 	/* Update control register with actual count. */
1876 	ctrl = cfg->msi.msi_ctrl;
1877 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1878 	ctrl |= (ffs(actual) - 1) << 4;
1879 	cfg->msi.msi_ctrl = ctrl;
1880 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1881 
1882 	/* Update counts of alloc'd messages. */
1883 	cfg->msi.msi_alloc = actual;
1884 	cfg->msi.msi_handlers = 0;
1885 	*count = actual;
1886 	return (0);
1887 }
1888 
1889 /* Release the MSI messages associated with this device. */
1890 int
1891 pci_release_msi_method(device_t dev, device_t child)
1892 {
1893 	struct pci_devinfo *dinfo = device_get_ivars(child);
1894 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1895 	struct resource_list_entry *rle;
1896 	int error, i, irqs[32];
1897 
1898 	/* Try MSI-X first. */
1899 	error = pci_release_msix(dev, child);
1900 	if (error != ENODEV)
1901 		return (error);
1902 
1903 	/* Do we have any messages to release? */
1904 	if (msi->msi_alloc == 0)
1905 		return (ENODEV);
1906 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1907 
1908 	/* Make sure none of the resources are allocated. */
1909 	if (msi->msi_handlers > 0)
1910 		return (EBUSY);
1911 	for (i = 0; i < msi->msi_alloc; i++) {
1912 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1913 		KASSERT(rle != NULL, ("missing MSI resource"));
1914 		if (rle->res != NULL)
1915 			return (EBUSY);
1916 		irqs[i] = rle->start;
1917 	}
1918 
1919 	/* Update control register with 0 count. */
1920 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1921 	    ("%s: MSI still enabled", __func__));
1922 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1923 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1924 	    msi->msi_ctrl, 2);
1925 
1926 	/* Release the messages. */
1927 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1928 	for (i = 0; i < msi->msi_alloc; i++)
1929 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1930 
1931 	/* Update alloc count. */
1932 	msi->msi_alloc = 0;
1933 	msi->msi_addr = 0;
1934 	msi->msi_data = 0;
1935 	return (0);
1936 }
1937 
1938 /*
1939  * Return the max supported MSI messages this device supports.
1940  * Basically, assuming the MD code can alloc messages, this function
1941  * should return the maximum value that pci_alloc_msi() can return.
1942  * Thus, it is subject to the tunables, etc.
1943  */
1944 int
1945 pci_msi_count_method(device_t dev, device_t child)
1946 {
1947 	struct pci_devinfo *dinfo = device_get_ivars(child);
1948 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1949 
1950 	if (pci_do_msi && msi->msi_location != 0)
1951 		return (msi->msi_msgnum);
1952 	return (0);
1953 }
1954 
1955 /* free pcicfgregs structure and all depending data structures */
1956 
1957 int
1958 pci_freecfg(struct pci_devinfo *dinfo)
1959 {
1960 	struct devlist *devlist_head;
1961 	int i;
1962 
1963 	devlist_head = &pci_devq;
1964 
1965 	if (dinfo->cfg.vpd.vpd_reg) {
1966 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1967 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1968 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1969 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1970 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1971 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1972 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1973 	}
1974 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1975 	free(dinfo, M_DEVBUF);
1976 
1977 	/* increment the generation count */
1978 	pci_generation++;
1979 
1980 	/* we're losing one device */
1981 	pci_numdevs--;
1982 	return (0);
1983 }
1984 
1985 /*
1986  * PCI power manangement
1987  */
1988 int
1989 pci_set_powerstate_method(device_t dev, device_t child, int state)
1990 {
1991 	struct pci_devinfo *dinfo = device_get_ivars(child);
1992 	pcicfgregs *cfg = &dinfo->cfg;
1993 	uint16_t status;
1994 	int result, oldstate, highest, delay;
1995 
1996 	if (cfg->pp.pp_cap == 0)
1997 		return (EOPNOTSUPP);
1998 
1999 	/*
2000 	 * Optimize a no state change request away.  While it would be OK to
2001 	 * write to the hardware in theory, some devices have shown odd
2002 	 * behavior when going from D3 -> D3.
2003 	 */
2004 	oldstate = pci_get_powerstate(child);
2005 	if (oldstate == state)
2006 		return (0);
2007 
2008 	/*
2009 	 * The PCI power management specification states that after a state
2010 	 * transition between PCI power states, system software must
2011 	 * guarantee a minimal delay before the function accesses the device.
2012 	 * Compute the worst case delay that we need to guarantee before we
2013 	 * access the device.  Many devices will be responsive much more
2014 	 * quickly than this delay, but there are some that don't respond
2015 	 * instantly to state changes.  Transitions to/from D3 state require
2016 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2017 	 * is done below with DELAY rather than a sleeper function because
2018 	 * this function can be called from contexts where we cannot sleep.
2019 	 */
2020 	highest = (oldstate > state) ? oldstate : state;
2021 	if (highest == PCI_POWERSTATE_D3)
2022 	    delay = 10000;
2023 	else if (highest == PCI_POWERSTATE_D2)
2024 	    delay = 200;
2025 	else
2026 	    delay = 0;
2027 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2028 	    & ~PCIM_PSTAT_DMASK;
2029 	result = 0;
2030 	switch (state) {
2031 	case PCI_POWERSTATE_D0:
2032 		status |= PCIM_PSTAT_D0;
2033 		break;
2034 	case PCI_POWERSTATE_D1:
2035 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2036 			return (EOPNOTSUPP);
2037 		status |= PCIM_PSTAT_D1;
2038 		break;
2039 	case PCI_POWERSTATE_D2:
2040 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2041 			return (EOPNOTSUPP);
2042 		status |= PCIM_PSTAT_D2;
2043 		break;
2044 	case PCI_POWERSTATE_D3:
2045 		status |= PCIM_PSTAT_D3;
2046 		break;
2047 	default:
2048 		return (EINVAL);
2049 	}
2050 
2051 	if (bootverbose)
2052 		printf(
2053 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2054 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2055 		    dinfo->cfg.func, oldstate, state);
2056 
2057 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2058 	if (delay)
2059 		DELAY(delay);
2060 	return (0);
2061 }
2062 
2063 int
2064 pci_get_powerstate_method(device_t dev, device_t child)
2065 {
2066 	struct pci_devinfo *dinfo = device_get_ivars(child);
2067 	pcicfgregs *cfg = &dinfo->cfg;
2068 	uint16_t status;
2069 	int result;
2070 
2071 	if (cfg->pp.pp_cap != 0) {
2072 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2073 		switch (status & PCIM_PSTAT_DMASK) {
2074 		case PCIM_PSTAT_D0:
2075 			result = PCI_POWERSTATE_D0;
2076 			break;
2077 		case PCIM_PSTAT_D1:
2078 			result = PCI_POWERSTATE_D1;
2079 			break;
2080 		case PCIM_PSTAT_D2:
2081 			result = PCI_POWERSTATE_D2;
2082 			break;
2083 		case PCIM_PSTAT_D3:
2084 			result = PCI_POWERSTATE_D3;
2085 			break;
2086 		default:
2087 			result = PCI_POWERSTATE_UNKNOWN;
2088 			break;
2089 		}
2090 	} else {
2091 		/* No support, device is always at D0 */
2092 		result = PCI_POWERSTATE_D0;
2093 	}
2094 	return (result);
2095 }
2096 
2097 /*
2098  * Some convenience functions for PCI device drivers.
2099  */
2100 
2101 static __inline void
2102 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2103 {
2104 	uint16_t	command;
2105 
2106 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2107 	command |= bit;
2108 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2109 }
2110 
2111 static __inline void
2112 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2113 {
2114 	uint16_t	command;
2115 
2116 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2117 	command &= ~bit;
2118 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2119 }
2120 
2121 int
2122 pci_enable_busmaster_method(device_t dev, device_t child)
2123 {
2124 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2125 	return (0);
2126 }
2127 
2128 int
2129 pci_disable_busmaster_method(device_t dev, device_t child)
2130 {
2131 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2132 	return (0);
2133 }
2134 
2135 int
2136 pci_enable_io_method(device_t dev, device_t child, int space)
2137 {
2138 	uint16_t command;
2139 	uint16_t bit;
2140 	char *error;
2141 
2142 	bit = 0;
2143 	error = NULL;
2144 
2145 	switch(space) {
2146 	case SYS_RES_IOPORT:
2147 		bit = PCIM_CMD_PORTEN;
2148 		error = "port";
2149 		break;
2150 	case SYS_RES_MEMORY:
2151 		bit = PCIM_CMD_MEMEN;
2152 		error = "memory";
2153 		break;
2154 	default:
2155 		return (EINVAL);
2156 	}
2157 	pci_set_command_bit(dev, child, bit);
2158 	/* Some devices seem to need a brief stall here, what do to? */
2159 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2160 	if (command & bit)
2161 		return (0);
2162 	device_printf(child, "failed to enable %s mapping!\n", error);
2163 	return (ENXIO);
2164 }
2165 
2166 int
2167 pci_disable_io_method(device_t dev, device_t child, int space)
2168 {
2169 	uint16_t command;
2170 	uint16_t bit;
2171 	char *error;
2172 
2173 	bit = 0;
2174 	error = NULL;
2175 
2176 	switch(space) {
2177 	case SYS_RES_IOPORT:
2178 		bit = PCIM_CMD_PORTEN;
2179 		error = "port";
2180 		break;
2181 	case SYS_RES_MEMORY:
2182 		bit = PCIM_CMD_MEMEN;
2183 		error = "memory";
2184 		break;
2185 	default:
2186 		return (EINVAL);
2187 	}
2188 	pci_clear_command_bit(dev, child, bit);
2189 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2190 	if (command & bit) {
2191 		device_printf(child, "failed to disable %s mapping!\n", error);
2192 		return (ENXIO);
2193 	}
2194 	return (0);
2195 }
2196 
2197 /*
2198  * New style pci driver.  Parent device is either a pci-host-bridge or a
2199  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2200  */
2201 
2202 void
2203 pci_print_verbose(struct pci_devinfo *dinfo)
2204 {
2205 
2206 	if (bootverbose) {
2207 		pcicfgregs *cfg = &dinfo->cfg;
2208 
2209 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2210 		    cfg->vendor, cfg->device, cfg->revid);
2211 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2212 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2213 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2214 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2215 		    cfg->mfdev);
2216 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2217 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2218 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2219 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2220 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2221 		if (cfg->intpin > 0)
2222 			printf("\tintpin=%c, irq=%d\n",
2223 			    cfg->intpin +'a' -1, cfg->intline);
2224 		if (cfg->pp.pp_cap) {
2225 			uint16_t status;
2226 
2227 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2228 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2229 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2230 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2231 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2232 			    status & PCIM_PSTAT_DMASK);
2233 		}
2234 		if (cfg->msi.msi_location) {
2235 			int ctrl;
2236 
2237 			ctrl = cfg->msi.msi_ctrl;
2238 			printf("\tMSI supports %d message%s%s%s\n",
2239 			    cfg->msi.msi_msgnum,
2240 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2241 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2242 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2243 		}
2244 		if (cfg->msix.msix_location) {
2245 			printf("\tMSI-X supports %d message%s ",
2246 			    cfg->msix.msix_msgnum,
2247 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2248 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2249 				printf("in map 0x%x\n",
2250 				    cfg->msix.msix_table_bar);
2251 			else
2252 				printf("in maps 0x%x and 0x%x\n",
2253 				    cfg->msix.msix_table_bar,
2254 				    cfg->msix.msix_pba_bar);
2255 		}
2256 	}
2257 }
2258 
2259 static int
2260 pci_porten(device_t pcib, int b, int s, int f)
2261 {
2262 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2263 		& PCIM_CMD_PORTEN) != 0;
2264 }
2265 
2266 static int
2267 pci_memen(device_t pcib, int b, int s, int f)
2268 {
2269 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2270 		& PCIM_CMD_MEMEN) != 0;
2271 }
2272 
2273 /*
2274  * Add a resource based on a pci map register. Return 1 if the map
2275  * register is a 32bit map register or 2 if it is a 64bit register.
2276  */
2277 static int
2278 pci_add_map(device_t pcib, device_t bus, device_t dev,
2279     int b, int s, int f, int reg, struct resource_list *rl, int force,
2280     int prefetch)
2281 {
2282 	pci_addr_t base, map;
2283 	pci_addr_t start, end, count;
2284 	uint8_t ln2size;
2285 	uint8_t ln2range;
2286 	uint32_t testval;
2287 	uint16_t cmd;
2288 	int type;
2289 	int barlen;
2290 	struct resource *res;
2291 
2292 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2293 	ln2range = pci_maprange(map);
2294 	if (ln2range == 64)
2295 		map |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) <<
2296 		    32;
2297 
2298 	/*
2299 	 * Disable decoding via the command register before
2300 	 * determining the BAR's length since we will be placing it in
2301 	 * a weird state.
2302 	 */
2303 	cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2304 	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND,
2305 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2306 
2307 	/*
2308 	 * Determine the BAR's length by writing all 1's.  The bottom
2309 	 * log_2(size) bits of the BAR will stick as 0 when we read
2310 	 * the value back.
2311 	 */
2312 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2313 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2314 	if (ln2range == 64) {
2315 		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, 0xffffffff, 4);
2316 		testval |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4,
2317 		    4) << 32;
2318 	}
2319 
2320 	/* Restore the BAR and command register. */
2321 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2322 	if (ln2range == 64)
2323 		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, map >> 32, 4);
2324 	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2325 
2326 	if (PCI_BAR_MEM(map)) {
2327 		type = SYS_RES_MEMORY;
2328 		if (map & PCIM_BAR_MEM_PREFETCH)
2329 			prefetch = 1;
2330 	} else
2331 		type = SYS_RES_IOPORT;
2332 	ln2size = pci_mapsize(testval);
2333 	base = pci_mapbase(map);
2334 	barlen = ln2range == 64 ? 2 : 1;
2335 
2336 	/*
2337 	 * For I/O registers, if bottom bit is set, and the next bit up
2338 	 * isn't clear, we know we have a BAR that doesn't conform to the
2339 	 * spec, so ignore it.  Also, sanity check the size of the data
2340 	 * areas to the type of memory involved.  Memory must be at least
2341 	 * 16 bytes in size, while I/O ranges must be at least 4.
2342 	 */
2343 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2344 		return (barlen);
2345 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2346 	    (type == SYS_RES_IOPORT && ln2size < 2))
2347 		return (barlen);
2348 
2349 	if (bootverbose) {
2350 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2351 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2352 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2353 			printf(", port disabled\n");
2354 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2355 			printf(", memory disabled\n");
2356 		else
2357 			printf(", enabled\n");
2358 	}
2359 
2360 	/*
2361 	 * If base is 0, then we have problems.  It is best to ignore
2362 	 * such entries for the moment.  These will be allocated later if
2363 	 * the driver specifically requests them.  However, some
2364 	 * removable busses look better when all resources are allocated,
2365 	 * so allow '0' to be overriden.
2366 	 *
2367 	 * Similarly treat maps whose values is the same as the test value
2368 	 * read back.  These maps have had all f's written to them by the
2369 	 * BIOS in an attempt to disable the resources.
2370 	 */
2371 	if (!force && (base == 0 || map == testval))
2372 		return (barlen);
2373 	if ((u_long)base != base) {
2374 		device_printf(bus,
2375 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2376 		    pci_get_domain(dev), b, s, f, reg);
2377 		return (barlen);
2378 	}
2379 
2380 	/*
2381 	 * This code theoretically does the right thing, but has
2382 	 * undesirable side effects in some cases where peripherals
2383 	 * respond oddly to having these bits enabled.  Let the user
2384 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2385 	 * default).
2386 	 */
2387 	if (pci_enable_io_modes) {
2388 		/* Turn on resources that have been left off by a lazy BIOS */
2389 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2390 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2391 			cmd |= PCIM_CMD_PORTEN;
2392 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2393 		}
2394 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2395 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2396 			cmd |= PCIM_CMD_MEMEN;
2397 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2398 		}
2399 	} else {
2400 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2401 			return (barlen);
2402 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2403 			return (barlen);
2404 	}
2405 
2406 	count = 1 << ln2size;
2407 	if (base == 0 || base == pci_mapbase(testval)) {
2408 		start = 0;	/* Let the parent decide. */
2409 		end = ~0ULL;
2410 	} else {
2411 		start = base;
2412 		end = base + (1 << ln2size) - 1;
2413 	}
2414 	resource_list_add(rl, type, reg, start, end, count);
2415 
2416 	/*
2417 	 * Try to allocate the resource for this BAR from our parent
2418 	 * so that this resource range is already reserved.  The
2419 	 * driver for this device will later inherit this resource in
2420 	 * pci_alloc_resource().
2421 	 */
2422 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2423 	    prefetch ? RF_PREFETCHABLE : 0);
2424 	if (res == NULL) {
2425 		/*
2426 		 * If the allocation fails, clear the BAR and delete
2427 		 * the resource list entry to force
2428 		 * pci_alloc_resource() to allocate resources from the
2429 		 * parent.
2430 		 */
2431 		resource_list_delete(rl, type, reg);
2432 		start = 0;
2433 	} else {
2434 		start = rman_get_start(res);
2435 		rman_set_device(res, bus);
2436 	}
2437 	pci_write_config(dev, reg, start, 4);
2438 	if (ln2range == 64)
2439 		pci_write_config(dev, reg + 4, start >> 32, 4);
2440 	return (barlen);
2441 }
2442 
2443 /*
2444  * For ATA devices we need to decide early what addressing mode to use.
2445  * Legacy demands that the primary and secondary ATA ports sits on the
2446  * same addresses that old ISA hardware did. This dictates that we use
2447  * those addresses and ignore the BAR's if we cannot set PCI native
2448  * addressing mode.
2449  */
2450 static void
2451 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2452     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2453 {
2454 	struct resource *r;
2455 	int rid, type, progif;
2456 #if 0
2457 	/* if this device supports PCI native addressing use it */
2458 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2459 	if ((progif & 0x8a) == 0x8a) {
2460 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2461 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2462 			printf("Trying ATA native PCI addressing mode\n");
2463 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2464 		}
2465 	}
2466 #endif
2467 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2468 	type = SYS_RES_IOPORT;
2469 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2470 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2471 		    prefetchmask & (1 << 0));
2472 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2473 		    prefetchmask & (1 << 1));
2474 	} else {
2475 		rid = PCIR_BAR(0);
2476 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2477 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2478 		    8, 0);
2479 		rman_set_device(r, bus);
2480 		rid = PCIR_BAR(1);
2481 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2482 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2483 		    1, 0);
2484 		rman_set_device(r, bus);
2485 	}
2486 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2487 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2488 		    prefetchmask & (1 << 2));
2489 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2490 		    prefetchmask & (1 << 3));
2491 	} else {
2492 		rid = PCIR_BAR(2);
2493 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2494 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2495 		    8, 0);
2496 		rman_set_device(r, bus);
2497 		rid = PCIR_BAR(3);
2498 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2499 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2500 		    1, 0);
2501 		rman_set_device(r, bus);
2502 	}
2503 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2504 	    prefetchmask & (1 << 4));
2505 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2506 	    prefetchmask & (1 << 5));
2507 }
2508 
2509 static void
2510 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2511 {
2512 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2513 	pcicfgregs *cfg = &dinfo->cfg;
2514 	char tunable_name[64];
2515 	int irq;
2516 
2517 	/* Has to have an intpin to have an interrupt. */
2518 	if (cfg->intpin == 0)
2519 		return;
2520 
2521 	/* Let the user override the IRQ with a tunable. */
2522 	irq = PCI_INVALID_IRQ;
2523 	snprintf(tunable_name, sizeof(tunable_name),
2524 	    "hw.pci%d.%d.%d.INT%c.irq",
2525 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2526 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2527 		irq = PCI_INVALID_IRQ;
2528 
2529 	/*
2530 	 * If we didn't get an IRQ via the tunable, then we either use the
2531 	 * IRQ value in the intline register or we ask the bus to route an
2532 	 * interrupt for us.  If force_route is true, then we only use the
2533 	 * value in the intline register if the bus was unable to assign an
2534 	 * IRQ.
2535 	 */
2536 	if (!PCI_INTERRUPT_VALID(irq)) {
2537 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2538 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2539 		if (!PCI_INTERRUPT_VALID(irq))
2540 			irq = cfg->intline;
2541 	}
2542 
2543 	/* If after all that we don't have an IRQ, just bail. */
2544 	if (!PCI_INTERRUPT_VALID(irq))
2545 		return;
2546 
2547 	/* Update the config register if it changed. */
2548 	if (irq != cfg->intline) {
2549 		cfg->intline = irq;
2550 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2551 	}
2552 
2553 	/* Add this IRQ as rid 0 interrupt resource. */
2554 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2555 }
2556 
2557 void
2558 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2559 {
2560 	device_t pcib;
2561 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2562 	pcicfgregs *cfg = &dinfo->cfg;
2563 	struct resource_list *rl = &dinfo->resources;
2564 	struct pci_quirk *q;
2565 	int b, i, f, s;
2566 
2567 	pcib = device_get_parent(bus);
2568 
2569 	b = cfg->bus;
2570 	s = cfg->slot;
2571 	f = cfg->func;
2572 
2573 	/* ATA devices needs special map treatment */
2574 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2575 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2576 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2577 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2578 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2579 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2580 	else
2581 		for (i = 0; i < cfg->nummaps;)
2582 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2583 			    rl, force, prefetchmask & (1 << i));
2584 
2585 	/*
2586 	 * Add additional, quirked resources.
2587 	 */
2588 	for (q = &pci_quirks[0]; q->devid; q++) {
2589 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2590 		    && q->type == PCI_QUIRK_MAP_REG)
2591 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2592 			  force, 0);
2593 	}
2594 
2595 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2596 #ifdef __PCI_REROUTE_INTERRUPT
2597 		/*
2598 		 * Try to re-route interrupts. Sometimes the BIOS or
2599 		 * firmware may leave bogus values in these registers.
2600 		 * If the re-route fails, then just stick with what we
2601 		 * have.
2602 		 */
2603 		pci_assign_interrupt(bus, dev, 1);
2604 #else
2605 		pci_assign_interrupt(bus, dev, 0);
2606 #endif
2607 	}
2608 }
2609 
2610 void
2611 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2612 {
2613 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2614 	device_t pcib = device_get_parent(dev);
2615 	struct pci_devinfo *dinfo;
2616 	int maxslots;
2617 	int s, f, pcifunchigh;
2618 	uint8_t hdrtype;
2619 
2620 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2621 	    ("dinfo_size too small"));
2622 	maxslots = PCIB_MAXSLOTS(pcib);
2623 	for (s = 0; s <= maxslots; s++) {
2624 		pcifunchigh = 0;
2625 		f = 0;
2626 		DELAY(1);
2627 		hdrtype = REG(PCIR_HDRTYPE, 1);
2628 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2629 			continue;
2630 		if (hdrtype & PCIM_MFDEV)
2631 			pcifunchigh = PCI_FUNCMAX;
2632 		for (f = 0; f <= pcifunchigh; f++) {
2633 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2634 			    dinfo_size);
2635 			if (dinfo != NULL) {
2636 				pci_add_child(dev, dinfo);
2637 			}
2638 		}
2639 	}
2640 #undef REG
2641 }
2642 
2643 void
2644 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2645 {
2646 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2647 	device_set_ivars(dinfo->cfg.dev, dinfo);
2648 	resource_list_init(&dinfo->resources);
2649 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2650 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2651 	pci_print_verbose(dinfo);
2652 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2653 }
2654 
2655 static int
2656 pci_probe(device_t dev)
2657 {
2658 
2659 	device_set_desc(dev, "PCI bus");
2660 
2661 	/* Allow other subclasses to override this driver. */
2662 	return (BUS_PROBE_GENERIC);
2663 }
2664 
2665 static int
2666 pci_attach(device_t dev)
2667 {
2668 	int busno, domain;
2669 
2670 	/*
2671 	 * Since there can be multiple independantly numbered PCI
2672 	 * busses on systems with multiple PCI domains, we can't use
2673 	 * the unit number to decide which bus we are probing. We ask
2674 	 * the parent pcib what our domain and bus numbers are.
2675 	 */
2676 	domain = pcib_get_domain(dev);
2677 	busno = pcib_get_bus(dev);
2678 	if (bootverbose)
2679 		device_printf(dev, "domain=%d, physical bus=%d\n",
2680 		    domain, busno);
2681 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2682 	return (bus_generic_attach(dev));
2683 }
2684 
2685 int
2686 pci_suspend(device_t dev)
2687 {
2688 	int dstate, error, i, numdevs;
2689 	device_t acpi_dev, child, *devlist;
2690 	struct pci_devinfo *dinfo;
2691 
2692 	/*
2693 	 * Save the PCI configuration space for each child and set the
2694 	 * device in the appropriate power state for this sleep state.
2695 	 */
2696 	acpi_dev = NULL;
2697 	if (pci_do_power_resume)
2698 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2699 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2700 		return (error);
2701 	for (i = 0; i < numdevs; i++) {
2702 		child = devlist[i];
2703 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2704 		pci_cfg_save(child, dinfo, 0);
2705 	}
2706 
2707 	/* Suspend devices before potentially powering them down. */
2708 	error = bus_generic_suspend(dev);
2709 	if (error) {
2710 		free(devlist, M_TEMP);
2711 		return (error);
2712 	}
2713 
2714 	/*
2715 	 * Always set the device to D3.  If ACPI suggests a different
2716 	 * power state, use it instead.  If ACPI is not present, the
2717 	 * firmware is responsible for managing device power.  Skip
2718 	 * children who aren't attached since they are powered down
2719 	 * separately.  Only manage type 0 devices for now.
2720 	 */
2721 	for (i = 0; acpi_dev && i < numdevs; i++) {
2722 		child = devlist[i];
2723 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2724 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2725 			dstate = PCI_POWERSTATE_D3;
2726 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2727 			pci_set_powerstate(child, dstate);
2728 		}
2729 	}
2730 	free(devlist, M_TEMP);
2731 	return (0);
2732 }
2733 
2734 int
2735 pci_resume(device_t dev)
2736 {
2737 	int i, numdevs, error;
2738 	device_t acpi_dev, child, *devlist;
2739 	struct pci_devinfo *dinfo;
2740 
2741 	/*
2742 	 * Set each child to D0 and restore its PCI configuration space.
2743 	 */
2744 	acpi_dev = NULL;
2745 	if (pci_do_power_resume)
2746 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2747 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2748 		return (error);
2749 	for (i = 0; i < numdevs; i++) {
2750 		/*
2751 		 * Notify ACPI we're going to D0 but ignore the result.  If
2752 		 * ACPI is not present, the firmware is responsible for
2753 		 * managing device power.  Only manage type 0 devices for now.
2754 		 */
2755 		child = devlist[i];
2756 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2757 		if (acpi_dev && device_is_attached(child) &&
2758 		    dinfo->cfg.hdrtype == 0) {
2759 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2760 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2761 		}
2762 
2763 		/* Now the device is powered up, restore its config space. */
2764 		pci_cfg_restore(child, dinfo);
2765 	}
2766 	free(devlist, M_TEMP);
2767 	return (bus_generic_resume(dev));
2768 }
2769 
2770 static void
2771 pci_load_vendor_data(void)
2772 {
2773 	caddr_t vendordata, info;
2774 
2775 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2776 		info = preload_search_info(vendordata, MODINFO_ADDR);
2777 		pci_vendordata = *(char **)info;
2778 		info = preload_search_info(vendordata, MODINFO_SIZE);
2779 		pci_vendordata_size = *(size_t *)info;
2780 		/* terminate the database */
2781 		pci_vendordata[pci_vendordata_size] = '\n';
2782 	}
2783 }
2784 
2785 void
2786 pci_driver_added(device_t dev, driver_t *driver)
2787 {
2788 	int numdevs;
2789 	device_t *devlist;
2790 	device_t child;
2791 	struct pci_devinfo *dinfo;
2792 	int i;
2793 
2794 	if (bootverbose)
2795 		device_printf(dev, "driver added\n");
2796 	DEVICE_IDENTIFY(driver, dev);
2797 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2798 		return;
2799 	for (i = 0; i < numdevs; i++) {
2800 		child = devlist[i];
2801 		if (device_get_state(child) != DS_NOTPRESENT)
2802 			continue;
2803 		dinfo = device_get_ivars(child);
2804 		pci_print_verbose(dinfo);
2805 		if (bootverbose)
2806 			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2807 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2808 			    dinfo->cfg.func);
2809 		pci_cfg_restore(child, dinfo);
2810 		if (device_probe_and_attach(child) != 0)
2811 			pci_cfg_save(child, dinfo, 1);
2812 	}
2813 	free(devlist, M_TEMP);
2814 }
2815 
2816 int
2817 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2818     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2819 {
2820 	struct pci_devinfo *dinfo;
2821 	struct msix_table_entry *mte;
2822 	struct msix_vector *mv;
2823 	uint64_t addr;
2824 	uint32_t data;
2825 	void *cookie;
2826 	int error, rid;
2827 
2828 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2829 	    arg, &cookie);
2830 	if (error)
2831 		return (error);
2832 
2833 	/* If this is not a direct child, just bail out. */
2834 	if (device_get_parent(child) != dev) {
2835 		*cookiep = cookie;
2836 		return(0);
2837 	}
2838 
2839 	rid = rman_get_rid(irq);
2840 	if (rid == 0) {
2841 		/* Make sure that INTx is enabled */
2842 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2843 	} else {
2844 		/*
2845 		 * Check to see if the interrupt is MSI or MSI-X.
2846 		 * Ask our parent to map the MSI and give
2847 		 * us the address and data register values.
2848 		 * If we fail for some reason, teardown the
2849 		 * interrupt handler.
2850 		 */
2851 		dinfo = device_get_ivars(child);
2852 		if (dinfo->cfg.msi.msi_alloc > 0) {
2853 			if (dinfo->cfg.msi.msi_addr == 0) {
2854 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2855 			    ("MSI has handlers, but vectors not mapped"));
2856 				error = PCIB_MAP_MSI(device_get_parent(dev),
2857 				    child, rman_get_start(irq), &addr, &data);
2858 				if (error)
2859 					goto bad;
2860 				dinfo->cfg.msi.msi_addr = addr;
2861 				dinfo->cfg.msi.msi_data = data;
2862 				pci_enable_msi(child, addr, data);
2863 			}
2864 			dinfo->cfg.msi.msi_handlers++;
2865 		} else {
2866 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2867 			    ("No MSI or MSI-X interrupts allocated"));
2868 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2869 			    ("MSI-X index too high"));
2870 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2871 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2872 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2873 			KASSERT(mv->mv_irq == rman_get_start(irq),
2874 			    ("IRQ mismatch"));
2875 			if (mv->mv_address == 0) {
2876 				KASSERT(mte->mte_handlers == 0,
2877 		    ("MSI-X table entry has handlers, but vector not mapped"));
2878 				error = PCIB_MAP_MSI(device_get_parent(dev),
2879 				    child, rman_get_start(irq), &addr, &data);
2880 				if (error)
2881 					goto bad;
2882 				mv->mv_address = addr;
2883 				mv->mv_data = data;
2884 			}
2885 			if (mte->mte_handlers == 0) {
2886 				pci_enable_msix(child, rid - 1, mv->mv_address,
2887 				    mv->mv_data);
2888 				pci_unmask_msix(child, rid - 1);
2889 			}
2890 			mte->mte_handlers++;
2891 		}
2892 
2893 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2894 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2895 	bad:
2896 		if (error) {
2897 			(void)bus_generic_teardown_intr(dev, child, irq,
2898 			    cookie);
2899 			return (error);
2900 		}
2901 	}
2902 	*cookiep = cookie;
2903 	return (0);
2904 }
2905 
2906 int
2907 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2908     void *cookie)
2909 {
2910 	struct msix_table_entry *mte;
2911 	struct resource_list_entry *rle;
2912 	struct pci_devinfo *dinfo;
2913 	int error, rid;
2914 
2915 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2916 		return (EINVAL);
2917 
2918 	/* If this isn't a direct child, just bail out */
2919 	if (device_get_parent(child) != dev)
2920 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2921 
2922 	rid = rman_get_rid(irq);
2923 	if (rid > 0) {
2924 		/* Mask INTx */
2925 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2926 	} else {
2927 		/*
2928 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2929 		 * decrement the appropriate handlers count and mask the
2930 		 * MSI-X message, or disable MSI messages if the count
2931 		 * drops to 0.
2932 		 */
2933 		dinfo = device_get_ivars(child);
2934 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2935 		if (rle->res != irq)
2936 			return (EINVAL);
2937 		if (dinfo->cfg.msi.msi_alloc > 0) {
2938 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2939 			    ("MSI-X index too high"));
2940 			if (dinfo->cfg.msi.msi_handlers == 0)
2941 				return (EINVAL);
2942 			dinfo->cfg.msi.msi_handlers--;
2943 			if (dinfo->cfg.msi.msi_handlers == 0)
2944 				pci_disable_msi(child);
2945 		} else {
2946 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2947 			    ("No MSI or MSI-X interrupts allocated"));
2948 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2949 			    ("MSI-X index too high"));
2950 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2951 			if (mte->mte_handlers == 0)
2952 				return (EINVAL);
2953 			mte->mte_handlers--;
2954 			if (mte->mte_handlers == 0)
2955 				pci_mask_msix(child, rid - 1);
2956 		}
2957 	}
2958 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2959 	if (rid > 0)
2960 		KASSERT(error == 0,
2961 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2962 	return (error);
2963 }
2964 
2965 int
2966 pci_print_child(device_t dev, device_t child)
2967 {
2968 	struct pci_devinfo *dinfo;
2969 	struct resource_list *rl;
2970 	int retval = 0;
2971 
2972 	dinfo = device_get_ivars(child);
2973 	rl = &dinfo->resources;
2974 
2975 	retval += bus_print_child_header(dev, child);
2976 
2977 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2978 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2979 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2980 	if (device_get_flags(dev))
2981 		retval += printf(" flags %#x", device_get_flags(dev));
2982 
2983 	retval += printf(" at device %d.%d", pci_get_slot(child),
2984 	    pci_get_function(child));
2985 
2986 	retval += bus_print_child_footer(dev, child);
2987 
2988 	return (retval);
2989 }
2990 
2991 static struct
2992 {
2993 	int	class;
2994 	int	subclass;
2995 	char	*desc;
2996 } pci_nomatch_tab[] = {
2997 	{PCIC_OLD,		-1,			"old"},
2998 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2999 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3000 	{PCIC_STORAGE,		-1,			"mass storage"},
3001 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3002 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3003 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3004 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3005 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3006 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3007 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3008 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3009 	{PCIC_NETWORK,		-1,			"network"},
3010 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3011 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3012 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3013 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3014 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3015 	{PCIC_DISPLAY,		-1,			"display"},
3016 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3017 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3018 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3019 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3020 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3021 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3022 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3023 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3024 	{PCIC_MEMORY,		-1,			"memory"},
3025 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3026 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3027 	{PCIC_BRIDGE,		-1,			"bridge"},
3028 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3029 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3030 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3031 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3032 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3033 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3034 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3035 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3036 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3037 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3038 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3039 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3040 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3041 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3042 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3043 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3044 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3045 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3046 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3047 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3048 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3049 	{PCIC_INPUTDEV,		-1,			"input device"},
3050 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3051 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3052 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3053 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3054 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3055 	{PCIC_DOCKING,		-1,			"docking station"},
3056 	{PCIC_PROCESSOR,	-1,			"processor"},
3057 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3058 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3059 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3060 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3061 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3062 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3063 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3064 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3065 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3066 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3067 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3068 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3069 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3070 	{PCIC_SATCOM,		-1,			"satellite communication"},
3071 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3072 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3073 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3074 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3075 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3076 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3077 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3078 	{PCIC_DASP,		-1,			"dasp"},
3079 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3080 	{0, 0,		NULL}
3081 };
3082 
3083 void
3084 pci_probe_nomatch(device_t dev, device_t child)
3085 {
3086 	int	i;
3087 	char	*cp, *scp, *device;
3088 
3089 	/*
3090 	 * Look for a listing for this device in a loaded device database.
3091 	 */
3092 	if ((device = pci_describe_device(child)) != NULL) {
3093 		device_printf(dev, "<%s>", device);
3094 		free(device, M_DEVBUF);
3095 	} else {
3096 		/*
3097 		 * Scan the class/subclass descriptions for a general
3098 		 * description.
3099 		 */
3100 		cp = "unknown";
3101 		scp = NULL;
3102 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3103 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3104 				if (pci_nomatch_tab[i].subclass == -1) {
3105 					cp = pci_nomatch_tab[i].desc;
3106 				} else if (pci_nomatch_tab[i].subclass ==
3107 				    pci_get_subclass(child)) {
3108 					scp = pci_nomatch_tab[i].desc;
3109 				}
3110 			}
3111 		}
3112 		device_printf(dev, "<%s%s%s>",
3113 		    cp ? cp : "",
3114 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3115 		    scp ? scp : "");
3116 	}
3117 	printf(" at device %d.%d (no driver attached)\n",
3118 	    pci_get_slot(child), pci_get_function(child));
3119 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3120 	return;
3121 }
3122 
3123 /*
3124  * Parse the PCI device database, if loaded, and return a pointer to a
3125  * description of the device.
3126  *
3127  * The database is flat text formatted as follows:
3128  *
3129  * Any line not in a valid format is ignored.
3130  * Lines are terminated with newline '\n' characters.
3131  *
3132  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3133  * the vendor name.
3134  *
3135  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3136  * - devices cannot be listed without a corresponding VENDOR line.
3137  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3138  * another TAB, then the device name.
3139  */
3140 
3141 /*
3142  * Assuming (ptr) points to the beginning of a line in the database,
3143  * return the vendor or device and description of the next entry.
3144  * The value of (vendor) or (device) inappropriate for the entry type
3145  * is set to -1.  Returns nonzero at the end of the database.
3146  *
3147  * Note that this is slightly unrobust in the face of corrupt data;
3148  * we attempt to safeguard against this by spamming the end of the
3149  * database with a newline when we initialise.
3150  */
3151 static int
3152 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3153 {
3154 	char	*cp = *ptr;
3155 	int	left;
3156 
3157 	*device = -1;
3158 	*vendor = -1;
3159 	**desc = '\0';
3160 	for (;;) {
3161 		left = pci_vendordata_size - (cp - pci_vendordata);
3162 		if (left <= 0) {
3163 			*ptr = cp;
3164 			return(1);
3165 		}
3166 
3167 		/* vendor entry? */
3168 		if (*cp != '\t' &&
3169 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3170 			break;
3171 		/* device entry? */
3172 		if (*cp == '\t' &&
3173 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3174 			break;
3175 
3176 		/* skip to next line */
3177 		while (*cp != '\n' && left > 0) {
3178 			cp++;
3179 			left--;
3180 		}
3181 		if (*cp == '\n') {
3182 			cp++;
3183 			left--;
3184 		}
3185 	}
3186 	/* skip to next line */
3187 	while (*cp != '\n' && left > 0) {
3188 		cp++;
3189 		left--;
3190 	}
3191 	if (*cp == '\n' && left > 0)
3192 		cp++;
3193 	*ptr = cp;
3194 	return(0);
3195 }
3196 
3197 static char *
3198 pci_describe_device(device_t dev)
3199 {
3200 	int	vendor, device;
3201 	char	*desc, *vp, *dp, *line;
3202 
3203 	desc = vp = dp = NULL;
3204 
3205 	/*
3206 	 * If we have no vendor data, we can't do anything.
3207 	 */
3208 	if (pci_vendordata == NULL)
3209 		goto out;
3210 
3211 	/*
3212 	 * Scan the vendor data looking for this device
3213 	 */
3214 	line = pci_vendordata;
3215 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3216 		goto out;
3217 	for (;;) {
3218 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3219 			goto out;
3220 		if (vendor == pci_get_vendor(dev))
3221 			break;
3222 	}
3223 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3224 		goto out;
3225 	for (;;) {
3226 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3227 			*dp = 0;
3228 			break;
3229 		}
3230 		if (vendor != -1) {
3231 			*dp = 0;
3232 			break;
3233 		}
3234 		if (device == pci_get_device(dev))
3235 			break;
3236 	}
3237 	if (dp[0] == '\0')
3238 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3239 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3240 	    NULL)
3241 		sprintf(desc, "%s, %s", vp, dp);
3242  out:
3243 	if (vp != NULL)
3244 		free(vp, M_DEVBUF);
3245 	if (dp != NULL)
3246 		free(dp, M_DEVBUF);
3247 	return(desc);
3248 }
3249 
3250 int
3251 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3252 {
3253 	struct pci_devinfo *dinfo;
3254 	pcicfgregs *cfg;
3255 
3256 	dinfo = device_get_ivars(child);
3257 	cfg = &dinfo->cfg;
3258 
3259 	switch (which) {
3260 	case PCI_IVAR_ETHADDR:
3261 		/*
3262 		 * The generic accessor doesn't deal with failure, so
3263 		 * we set the return value, then return an error.
3264 		 */
3265 		*((uint8_t **) result) = NULL;
3266 		return (EINVAL);
3267 	case PCI_IVAR_SUBVENDOR:
3268 		*result = cfg->subvendor;
3269 		break;
3270 	case PCI_IVAR_SUBDEVICE:
3271 		*result = cfg->subdevice;
3272 		break;
3273 	case PCI_IVAR_VENDOR:
3274 		*result = cfg->vendor;
3275 		break;
3276 	case PCI_IVAR_DEVICE:
3277 		*result = cfg->device;
3278 		break;
3279 	case PCI_IVAR_DEVID:
3280 		*result = (cfg->device << 16) | cfg->vendor;
3281 		break;
3282 	case PCI_IVAR_CLASS:
3283 		*result = cfg->baseclass;
3284 		break;
3285 	case PCI_IVAR_SUBCLASS:
3286 		*result = cfg->subclass;
3287 		break;
3288 	case PCI_IVAR_PROGIF:
3289 		*result = cfg->progif;
3290 		break;
3291 	case PCI_IVAR_REVID:
3292 		*result = cfg->revid;
3293 		break;
3294 	case PCI_IVAR_INTPIN:
3295 		*result = cfg->intpin;
3296 		break;
3297 	case PCI_IVAR_IRQ:
3298 		*result = cfg->intline;
3299 		break;
3300 	case PCI_IVAR_DOMAIN:
3301 		*result = cfg->domain;
3302 		break;
3303 	case PCI_IVAR_BUS:
3304 		*result = cfg->bus;
3305 		break;
3306 	case PCI_IVAR_SLOT:
3307 		*result = cfg->slot;
3308 		break;
3309 	case PCI_IVAR_FUNCTION:
3310 		*result = cfg->func;
3311 		break;
3312 	case PCI_IVAR_CMDREG:
3313 		*result = cfg->cmdreg;
3314 		break;
3315 	case PCI_IVAR_CACHELNSZ:
3316 		*result = cfg->cachelnsz;
3317 		break;
3318 	case PCI_IVAR_MINGNT:
3319 		*result = cfg->mingnt;
3320 		break;
3321 	case PCI_IVAR_MAXLAT:
3322 		*result = cfg->maxlat;
3323 		break;
3324 	case PCI_IVAR_LATTIMER:
3325 		*result = cfg->lattimer;
3326 		break;
3327 	default:
3328 		return (ENOENT);
3329 	}
3330 	return (0);
3331 }
3332 
3333 int
3334 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3335 {
3336 	struct pci_devinfo *dinfo;
3337 
3338 	dinfo = device_get_ivars(child);
3339 
3340 	switch (which) {
3341 	case PCI_IVAR_INTPIN:
3342 		dinfo->cfg.intpin = value;
3343 		return (0);
3344 	case PCI_IVAR_ETHADDR:
3345 	case PCI_IVAR_SUBVENDOR:
3346 	case PCI_IVAR_SUBDEVICE:
3347 	case PCI_IVAR_VENDOR:
3348 	case PCI_IVAR_DEVICE:
3349 	case PCI_IVAR_DEVID:
3350 	case PCI_IVAR_CLASS:
3351 	case PCI_IVAR_SUBCLASS:
3352 	case PCI_IVAR_PROGIF:
3353 	case PCI_IVAR_REVID:
3354 	case PCI_IVAR_IRQ:
3355 	case PCI_IVAR_DOMAIN:
3356 	case PCI_IVAR_BUS:
3357 	case PCI_IVAR_SLOT:
3358 	case PCI_IVAR_FUNCTION:
3359 		return (EINVAL);	/* disallow for now */
3360 
3361 	default:
3362 		return (ENOENT);
3363 	}
3364 }
3365 
3366 
3367 #include "opt_ddb.h"
3368 #ifdef DDB
3369 #include <ddb/ddb.h>
3370 #include <sys/cons.h>
3371 
3372 /*
3373  * List resources based on pci map registers, used for within ddb
3374  */
3375 
3376 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3377 {
3378 	struct pci_devinfo *dinfo;
3379 	struct devlist *devlist_head;
3380 	struct pci_conf *p;
3381 	const char *name;
3382 	int i, error, none_count;
3383 
3384 	none_count = 0;
3385 	/* get the head of the device queue */
3386 	devlist_head = &pci_devq;
3387 
3388 	/*
3389 	 * Go through the list of devices and print out devices
3390 	 */
3391 	for (error = 0, i = 0,
3392 	     dinfo = STAILQ_FIRST(devlist_head);
3393 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3394 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3395 
3396 		/* Populate pd_name and pd_unit */
3397 		name = NULL;
3398 		if (dinfo->cfg.dev)
3399 			name = device_get_name(dinfo->cfg.dev);
3400 
3401 		p = &dinfo->conf;
3402 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3403 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3404 			(name && *name) ? name : "none",
3405 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3406 			none_count++,
3407 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3408 			p->pc_sel.pc_func, (p->pc_class << 16) |
3409 			(p->pc_subclass << 8) | p->pc_progif,
3410 			(p->pc_subdevice << 16) | p->pc_subvendor,
3411 			(p->pc_device << 16) | p->pc_vendor,
3412 			p->pc_revid, p->pc_hdr);
3413 	}
3414 }
3415 #endif /* DDB */
3416 
3417 static struct resource *
3418 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3419     u_long start, u_long end, u_long count, u_int flags)
3420 {
3421 	struct pci_devinfo *dinfo = device_get_ivars(child);
3422 	struct resource_list *rl = &dinfo->resources;
3423 	struct resource_list_entry *rle;
3424 	struct resource *res;
3425 	pci_addr_t map, testval;
3426 	uint16_t cmd;
3427 	int maprange, mapsize;
3428 
3429 	/*
3430 	 * Weed out the bogons, and figure out how large the BAR/map
3431 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3432 	 * Note: atapci in legacy mode are special and handled elsewhere
3433 	 * in the code.  If you have a atapci device in legacy mode and
3434 	 * it fails here, that other code is broken.
3435 	 */
3436 	res = NULL;
3437 	map = pci_read_config(child, *rid, 4);
3438 	maprange = pci_maprange(map);
3439 	if (maprange == 64)
3440 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3441 
3442 	/*
3443 	 * Disable decoding via the command register before
3444 	 * determining the BAR's length since we will be placing it in
3445 	 * a weird state.
3446 	 */
3447 	cmd = pci_read_config(child, PCIR_COMMAND, 2);
3448 	pci_write_config(child, PCIR_COMMAND,
3449 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
3450 
3451 	/* Determine the BAR's length. */
3452 	pci_write_config(child, *rid, 0xffffffff, 4);
3453 	testval = pci_read_config(child, *rid, 4);
3454 	if (maprange == 64) {
3455 		pci_write_config(child, *rid + 4, 0xffffffff, 4);
3456 		testval |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) <<
3457 		    32;
3458 	}
3459 
3460 	/*
3461 	 * Restore the original value of the BAR.  We may have reprogrammed
3462 	 * the BAR of the low-level console device and when booting verbose,
3463 	 * we need the console device addressable.
3464 	 */
3465 	pci_write_config(child, *rid, map, 4);
3466 	if (maprange == 64)
3467 		pci_write_config(child, *rid + 4, map, 4);
3468 	pci_write_config(child, PCIR_COMMAND, cmd, 2);
3469 
3470 	/* Ignore a BAR with a base of 0. */
3471 	if (pci_mapbase(testval) == 0)
3472 		goto out;
3473 
3474 	if (PCI_BAR_MEM(testval)) {
3475 		if (type != SYS_RES_MEMORY) {
3476 			if (bootverbose)
3477 				device_printf(dev,
3478 				    "child %s requested type %d for rid %#x,"
3479 				    " but the BAR says it is an memio\n",
3480 				    device_get_nameunit(child), type, *rid);
3481 			goto out;
3482 		}
3483 	} else {
3484 		if (type != SYS_RES_IOPORT) {
3485 			if (bootverbose)
3486 				device_printf(dev,
3487 				    "child %s requested type %d for rid %#x,"
3488 				    " but the BAR says it is an ioport\n",
3489 				    device_get_nameunit(child), type, *rid);
3490 			goto out;
3491 		}
3492 	}
3493 	/*
3494 	 * For real BARs, we need to override the size that
3495 	 * the driver requests, because that's what the BAR
3496 	 * actually uses and we would otherwise have a
3497 	 * situation where we might allocate the excess to
3498 	 * another driver, which won't work.
3499 	 */
3500 	mapsize = pci_mapsize(testval);
3501 	count = 1UL << mapsize;
3502 	if (RF_ALIGNMENT(flags) < mapsize)
3503 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3504 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3505 		flags |= RF_PREFETCHABLE;
3506 
3507 	/*
3508 	 * Allocate enough resource, and then write back the
3509 	 * appropriate bar for that resource.
3510 	 */
3511 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3512 	    start, end, count, flags & ~RF_ACTIVE);
3513 	if (res == NULL) {
3514 		device_printf(child,
3515 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3516 		    count, *rid, type, start, end);
3517 		goto out;
3518 	}
3519 	rman_set_device(res, dev);
3520 	resource_list_add(rl, type, *rid, start, end, count);
3521 	rle = resource_list_find(rl, type, *rid);
3522 	if (rle == NULL)
3523 		panic("pci_alloc_map: unexpectedly can't find resource.");
3524 	rle->res = res;
3525 	rle->start = rman_get_start(res);
3526 	rle->end = rman_get_end(res);
3527 	rle->count = count;
3528 	if (bootverbose)
3529 		device_printf(child,
3530 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3531 		    count, *rid, type, rman_get_start(res));
3532 	map = rman_get_start(res);
3533 	pci_write_config(child, *rid, map, 4);
3534 	if (maprange == 64)
3535 		pci_write_config(child, *rid + 4, map >> 32, 4);
3536 out:;
3537 	return (res);
3538 }
3539 
3540 
3541 struct resource *
3542 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3543 		   u_long start, u_long end, u_long count, u_int flags)
3544 {
3545 	struct pci_devinfo *dinfo = device_get_ivars(child);
3546 	struct resource_list *rl = &dinfo->resources;
3547 	struct resource_list_entry *rle;
3548 	struct resource *res;
3549 	pcicfgregs *cfg = &dinfo->cfg;
3550 
3551 	if (device_get_parent(child) != dev)
3552 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3553 		    type, rid, start, end, count, flags));
3554 
3555 	/*
3556 	 * Perform lazy resource allocation
3557 	 */
3558 	switch (type) {
3559 	case SYS_RES_IRQ:
3560 		/*
3561 		 * Can't alloc legacy interrupt once MSI messages have
3562 		 * been allocated.
3563 		 */
3564 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3565 		    cfg->msix.msix_alloc > 0))
3566 			return (NULL);
3567 
3568 		/*
3569 		 * If the child device doesn't have an interrupt
3570 		 * routed and is deserving of an interrupt, try to
3571 		 * assign it one.
3572 		 */
3573 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3574 		    (cfg->intpin != 0))
3575 			pci_assign_interrupt(dev, child, 0);
3576 		break;
3577 	case SYS_RES_IOPORT:
3578 	case SYS_RES_MEMORY:
3579 		/* Allocate resources for this BAR if needed. */
3580 		rle = resource_list_find(rl, type, *rid);
3581 		if (rle == NULL) {
3582 			res = pci_alloc_map(dev, child, type, rid, start, end,
3583 			    count, flags);
3584 			if (res == NULL)
3585 				return (NULL);
3586 			rle = resource_list_find(rl, type, *rid);
3587 		}
3588 
3589 		/*
3590 		 * If the resource belongs to the bus, then give it to
3591 		 * the child.  We need to activate it if requested
3592 		 * since the bus always allocates inactive resources.
3593 		 */
3594 		if (rle != NULL && rle->res != NULL &&
3595 		    rman_get_device(rle->res) == dev) {
3596 			if (bootverbose)
3597 				device_printf(child,
3598 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3599 				    rman_get_size(rle->res), *rid, type,
3600 				    rman_get_start(rle->res));
3601 			rman_set_device(rle->res, child);
3602 			if ((flags & RF_ACTIVE) &&
3603 			    bus_activate_resource(child, type, *rid,
3604 			    rle->res) != 0)
3605 				return (NULL);
3606 			return (rle->res);
3607 		}
3608 	}
3609 	return (resource_list_alloc(rl, dev, child, type, rid,
3610 	    start, end, count, flags));
3611 }
3612 
3613 int
3614 pci_release_resource(device_t dev, device_t child, int type, int rid,
3615     struct resource *r)
3616 {
3617 	int error;
3618 
3619 	if (device_get_parent(child) != dev)
3620 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3621 		    type, rid, r));
3622 
3623 	/*
3624 	 * For BARs we don't actually want to release the resource.
3625 	 * Instead, we deactivate the resource if needed and then give
3626 	 * ownership of the BAR back to the bus.
3627 	 */
3628 	switch (type) {
3629 	case SYS_RES_IOPORT:
3630 	case SYS_RES_MEMORY:
3631 		if (rman_get_device(r) != child)
3632 			return (EINVAL);
3633 		if (rman_get_flags(r) & RF_ACTIVE) {
3634 			error = bus_deactivate_resource(child, type, rid, r);
3635 			if (error)
3636 				return (error);
3637 		}
3638 		rman_set_device(r, dev);
3639 		return (0);
3640 	}
3641 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3642 }
3643 
3644 int
3645 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3646     struct resource *r)
3647 {
3648 	int error;
3649 
3650 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3651 	if (error)
3652 		return (error);
3653 
3654 	/* Enable decoding in the command register when activating BARs. */
3655 	if (device_get_parent(child) == dev) {
3656 		switch (type) {
3657 		case SYS_RES_IOPORT:
3658 		case SYS_RES_MEMORY:
3659 			error = PCI_ENABLE_IO(dev, child, type);
3660 			break;
3661 		}
3662 	}
3663 	return (error);
3664 }
3665 
3666 void
3667 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3668 {
3669 	struct pci_devinfo *dinfo;
3670 	struct resource_list *rl;
3671 	struct resource_list_entry *rle;
3672 
3673 	if (device_get_parent(child) != dev)
3674 		return;
3675 
3676 	dinfo = device_get_ivars(child);
3677 	rl = &dinfo->resources;
3678 	rle = resource_list_find(rl, type, rid);
3679 	if (rle == NULL)
3680 		return;
3681 
3682 	if (rle->res) {
3683 		if (rman_get_device(rle->res) != dev ||
3684 		    rman_get_flags(rle->res) & RF_ACTIVE) {
3685 			device_printf(dev, "delete_resource: "
3686 			    "Resource still owned by child, oops. "
3687 			    "(type=%d, rid=%d, addr=%lx)\n",
3688 			    rle->type, rle->rid,
3689 			    rman_get_start(rle->res));
3690 			return;
3691 		}
3692 
3693 		/*
3694 		 * If this is a BAR, clear the BAR so it stops
3695 		 * decoding before releasing the resource.
3696 		 */
3697 		switch (type) {
3698 		case SYS_RES_IOPORT:
3699 		case SYS_RES_MEMORY:
3700 			/* XXX: 64-bit BARs? */
3701 			pci_write_config(child, rid, 0, 4);
3702 			break;
3703 		}
3704 		bus_release_resource(dev, type, rid, rle->res);
3705 	}
3706 	resource_list_delete(rl, type, rid);
3707 }
3708 
3709 struct resource_list *
3710 pci_get_resource_list (device_t dev, device_t child)
3711 {
3712 	struct pci_devinfo *dinfo = device_get_ivars(child);
3713 
3714 	return (&dinfo->resources);
3715 }
3716 
3717 uint32_t
3718 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3719 {
3720 	struct pci_devinfo *dinfo = device_get_ivars(child);
3721 	pcicfgregs *cfg = &dinfo->cfg;
3722 
3723 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3724 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3725 }
3726 
3727 void
3728 pci_write_config_method(device_t dev, device_t child, int reg,
3729     uint32_t val, int width)
3730 {
3731 	struct pci_devinfo *dinfo = device_get_ivars(child);
3732 	pcicfgregs *cfg = &dinfo->cfg;
3733 
3734 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3735 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3736 }
3737 
3738 int
3739 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3740     size_t buflen)
3741 {
3742 
3743 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3744 	    pci_get_function(child));
3745 	return (0);
3746 }
3747 
3748 int
3749 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3750     size_t buflen)
3751 {
3752 	struct pci_devinfo *dinfo;
3753 	pcicfgregs *cfg;
3754 
3755 	dinfo = device_get_ivars(child);
3756 	cfg = &dinfo->cfg;
3757 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3758 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3759 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3760 	    cfg->progif);
3761 	return (0);
3762 }
3763 
3764 int
3765 pci_assign_interrupt_method(device_t dev, device_t child)
3766 {
3767 	struct pci_devinfo *dinfo = device_get_ivars(child);
3768 	pcicfgregs *cfg = &dinfo->cfg;
3769 
3770 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3771 	    cfg->intpin));
3772 }
3773 
3774 static int
3775 pci_modevent(module_t mod, int what, void *arg)
3776 {
3777 	static struct cdev *pci_cdev;
3778 
3779 	switch (what) {
3780 	case MOD_LOAD:
3781 		STAILQ_INIT(&pci_devq);
3782 		pci_generation = 0;
3783 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3784 		    "pci");
3785 		pci_load_vendor_data();
3786 		break;
3787 
3788 	case MOD_UNLOAD:
3789 		destroy_dev(pci_cdev);
3790 		break;
3791 	}
3792 
3793 	return (0);
3794 }
3795 
3796 void
3797 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3798 {
3799 	int i;
3800 
3801 	/*
3802 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3803 	 * which we know need special treatment.  Type 2 devices are
3804 	 * cardbus bridges which also require special treatment.
3805 	 * Other types are unknown, and we err on the side of safety
3806 	 * by ignoring them.
3807 	 */
3808 	if (dinfo->cfg.hdrtype != 0)
3809 		return;
3810 
3811 	/*
3812 	 * Restore the device to full power mode.  We must do this
3813 	 * before we restore the registers because moving from D3 to
3814 	 * D0 will cause the chip's BARs and some other registers to
3815 	 * be reset to some unknown power on reset values.  Cut down
3816 	 * the noise on boot by doing nothing if we are already in
3817 	 * state D0.
3818 	 */
3819 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3820 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3821 	}
3822 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3823 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3824 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3825 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3826 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3827 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3828 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3829 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3830 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3831 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3832 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3833 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3834 
3835 	/* Restore MSI and MSI-X configurations if they are present. */
3836 	if (dinfo->cfg.msi.msi_location != 0)
3837 		pci_resume_msi(dev);
3838 	if (dinfo->cfg.msix.msix_location != 0)
3839 		pci_resume_msix(dev);
3840 }
3841 
3842 void
3843 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3844 {
3845 	int i;
3846 	uint32_t cls;
3847 	int ps;
3848 
3849 	/*
3850 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3851 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3852 	 * which also require special treatment.  Other types are unknown, and
3853 	 * we err on the side of safety by ignoring them.  Powering down
3854 	 * bridges should not be undertaken lightly.
3855 	 */
3856 	if (dinfo->cfg.hdrtype != 0)
3857 		return;
3858 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3859 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3860 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3861 
3862 	/*
3863 	 * Some drivers apparently write to these registers w/o updating our
3864 	 * cached copy.  No harm happens if we update the copy, so do so here
3865 	 * so we can restore them.  The COMMAND register is modified by the
3866 	 * bus w/o updating the cache.  This should represent the normally
3867 	 * writable portion of the 'defined' part of type 0 headers.  In
3868 	 * theory we also need to save/restore the PCI capability structures
3869 	 * we know about, but apart from power we don't know any that are
3870 	 * writable.
3871 	 */
3872 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3873 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3874 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3875 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3876 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3877 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3878 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3879 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3880 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3881 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3882 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3883 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3884 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3885 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3886 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3887 
3888 	/*
3889 	 * don't set the state for display devices, base peripherals and
3890 	 * memory devices since bad things happen when they are powered down.
3891 	 * We should (a) have drivers that can easily detach and (b) use
3892 	 * generic drivers for these devices so that some device actually
3893 	 * attaches.  We need to make sure that when we implement (a) we don't
3894 	 * power the device down on a reattach.
3895 	 */
3896 	cls = pci_get_class(dev);
3897 	if (!setstate)
3898 		return;
3899 	switch (pci_do_power_nodriver)
3900 	{
3901 		case 0:		/* NO powerdown at all */
3902 			return;
3903 		case 1:		/* Conservative about what to power down */
3904 			if (cls == PCIC_STORAGE)
3905 				return;
3906 			/*FALLTHROUGH*/
3907 		case 2:		/* Agressive about what to power down */
3908 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3909 			    cls == PCIC_BASEPERIPH)
3910 				return;
3911 			/*FALLTHROUGH*/
3912 		case 3:		/* Power down everything */
3913 			break;
3914 	}
3915 	/*
3916 	 * PCI spec says we can only go into D3 state from D0 state.
3917 	 * Transition from D[12] into D0 before going to D3 state.
3918 	 */
3919 	ps = pci_get_powerstate(dev);
3920 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3921 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3922 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3923 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3924 }
3925