xref: /freebsd/sys/dev/pci/pci.c (revision b1866dfea3f3f142697f8e325db49b8e668fba35)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
74 #define	PCI_DMA_BOUNDARY	0x100000000
75 #endif
76 
77 #define	PCIR_IS_BIOS(cfg, reg)						\
78 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
79 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
80 
81 static pci_addr_t	pci_mapbase(uint64_t mapreg);
82 static const char	*pci_maptype(uint64_t mapreg);
83 static int		pci_mapsize(uint64_t testval);
84 static int		pci_maprange(uint64_t mapreg);
85 static pci_addr_t	pci_rombase(uint64_t mapreg);
86 static int		pci_romsize(uint64_t testval);
87 static void		pci_fixancient(pcicfgregs *cfg);
88 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
89 
90 static int		pci_porten(device_t dev);
91 static int		pci_memen(device_t dev);
92 static void		pci_assign_interrupt(device_t bus, device_t dev,
93 			    int force_route);
94 static int		pci_add_map(device_t bus, device_t dev, int reg,
95 			    struct resource_list *rl, int force, int prefetch);
96 static int		pci_probe(device_t dev);
97 static int		pci_attach(device_t dev);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static bus_dma_tag_t	pci_get_dma_tag(device_t bus, device_t dev);
103 static int		pci_modevent(module_t mod, int what, void *arg);
104 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105 			    pcicfgregs *cfg);
106 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108 			    int reg, uint32_t *data);
109 #if 0
110 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111 			    int reg, uint32_t data);
112 #endif
113 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114 static void		pci_disable_msi(device_t dev);
115 static void		pci_enable_msi(device_t dev, uint64_t address,
116 			    uint16_t data);
117 static void		pci_enable_msix(device_t dev, u_int index,
118 			    uint64_t address, uint32_t data);
119 static void		pci_mask_msix(device_t dev, u_int index);
120 static void		pci_unmask_msix(device_t dev, u_int index);
121 static int		pci_msi_blacklisted(void);
122 static void		pci_resume_msi(device_t dev);
123 static void		pci_resume_msix(device_t dev);
124 static int		pci_remap_intr_method(device_t bus, device_t dev,
125 			    u_int irq);
126 
127 static device_method_t pci_methods[] = {
128 	/* Device interface */
129 	DEVMETHOD(device_probe,		pci_probe),
130 	DEVMETHOD(device_attach,	pci_attach),
131 	DEVMETHOD(device_detach,	bus_generic_detach),
132 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
133 	DEVMETHOD(device_suspend,	pci_suspend),
134 	DEVMETHOD(device_resume,	pci_resume),
135 
136 	/* Bus interface */
137 	DEVMETHOD(bus_print_child,	pci_print_child),
138 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
139 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
140 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
141 	DEVMETHOD(bus_driver_added,	pci_driver_added),
142 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
143 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
144 
145 	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
146 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
147 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
148 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
149 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
150 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
151 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
155 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158 
159 	/* PCI interface */
160 	DEVMETHOD(pci_read_config,	pci_read_config_method),
161 	DEVMETHOD(pci_write_config,	pci_write_config_method),
162 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171 	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173 	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	DEVMETHOD_END
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 struct pci_quirk {
194 	uint32_t devid;	/* Vendor/device of the card */
195 	int	type;
196 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
197 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
198 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
199 	int	arg1;
200 	int	arg2;
201 };
202 
203 static const struct pci_quirk const pci_quirks[] = {
204 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
205 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
206 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
208 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
209 
210 	/*
211 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
212 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
213 	 */
214 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 
217 	/*
218 	 * MSI doesn't work on earlier Intel chipsets including
219 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
220 	 */
221 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
231 	 * bridge.
232 	 */
233 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 
235 	/*
236 	 * MSI-X doesn't work with at least LSI SAS1068E passed through by
237 	 * VMware.
238 	 */
239 	{ 0x079015ad, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 
241 	/*
242 	 * Some virtualization environments emulate an older chipset
243 	 * but support MSI just fine.  QEMU uses the Intel 82440.
244 	 */
245 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
246 
247 	{ 0 }
248 };
249 
250 /* map register information */
251 #define	PCI_MAPMEM	0x01	/* memory map */
252 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
253 #define	PCI_MAPPORT	0x04	/* port map */
254 
255 struct devlist pci_devq;
256 uint32_t pci_generation;
257 uint32_t pci_numdevs = 0;
258 static int pcie_chipset, pcix_chipset;
259 
260 /* sysctl vars */
261 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
262 
263 static int pci_enable_io_modes = 1;
264 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
265 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
266     &pci_enable_io_modes, 1,
267     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
268 enable these bits correctly.  We'd like to do this all the time, but there\n\
269 are some peripherals that this causes problems with.");
270 
271 static int pci_do_power_nodriver = 0;
272 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
273 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
274     &pci_do_power_nodriver, 0,
275   "Place a function into D3 state when no driver attaches to it.  0 means\n\
276 disable.  1 means conservatively place devices into D3 state.  2 means\n\
277 agressively place devices into D3 state.  3 means put absolutely everything\n\
278 in D3 state.");
279 
280 int pci_do_power_resume = 1;
281 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
282 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
283     &pci_do_power_resume, 1,
284   "Transition from D3 -> D0 on resume.");
285 
286 int pci_do_power_suspend = 1;
287 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
288 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
289     &pci_do_power_suspend, 1,
290   "Transition from D0 -> D3 on suspend.");
291 
292 static int pci_do_msi = 1;
293 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
294 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
295     "Enable support for MSI interrupts");
296 
297 static int pci_do_msix = 1;
298 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
299 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
300     "Enable support for MSI-X interrupts");
301 
302 static int pci_honor_msi_blacklist = 1;
303 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
304 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
305     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
306 
307 #if defined(__i386__) || defined(__amd64__)
308 static int pci_usb_takeover = 1;
309 #else
310 static int pci_usb_takeover = 0;
311 #endif
312 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
313 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
314     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
315 Disable this if you depend on BIOS emulation of USB devices, that is\n\
316 you use USB devices (like keyboard or mouse) but do not load USB drivers");
317 
318 /* Find a device_t by bus/slot/function in domain 0 */
319 
320 device_t
321 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
322 {
323 
324 	return (pci_find_dbsf(0, bus, slot, func));
325 }
326 
327 /* Find a device_t by domain/bus/slot/function */
328 
329 device_t
330 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
331 {
332 	struct pci_devinfo *dinfo;
333 
334 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
335 		if ((dinfo->cfg.domain == domain) &&
336 		    (dinfo->cfg.bus == bus) &&
337 		    (dinfo->cfg.slot == slot) &&
338 		    (dinfo->cfg.func == func)) {
339 			return (dinfo->cfg.dev);
340 		}
341 	}
342 
343 	return (NULL);
344 }
345 
346 /* Find a device_t by vendor/device ID */
347 
348 device_t
349 pci_find_device(uint16_t vendor, uint16_t device)
350 {
351 	struct pci_devinfo *dinfo;
352 
353 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
354 		if ((dinfo->cfg.vendor == vendor) &&
355 		    (dinfo->cfg.device == device)) {
356 			return (dinfo->cfg.dev);
357 		}
358 	}
359 
360 	return (NULL);
361 }
362 
363 device_t
364 pci_find_class(uint8_t class, uint8_t subclass)
365 {
366 	struct pci_devinfo *dinfo;
367 
368 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
369 		if (dinfo->cfg.baseclass == class &&
370 		    dinfo->cfg.subclass == subclass) {
371 			return (dinfo->cfg.dev);
372 		}
373 	}
374 
375 	return (NULL);
376 }
377 
378 static int
379 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
380 {
381 	va_list ap;
382 	int retval;
383 
384 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
385 	    cfg->func);
386 	va_start(ap, fmt);
387 	retval += vprintf(fmt, ap);
388 	va_end(ap);
389 	return (retval);
390 }
391 
392 /* return base address of memory or port map */
393 
394 static pci_addr_t
395 pci_mapbase(uint64_t mapreg)
396 {
397 
398 	if (PCI_BAR_MEM(mapreg))
399 		return (mapreg & PCIM_BAR_MEM_BASE);
400 	else
401 		return (mapreg & PCIM_BAR_IO_BASE);
402 }
403 
404 /* return map type of memory or port map */
405 
406 static const char *
407 pci_maptype(uint64_t mapreg)
408 {
409 
410 	if (PCI_BAR_IO(mapreg))
411 		return ("I/O Port");
412 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
413 		return ("Prefetchable Memory");
414 	return ("Memory");
415 }
416 
417 /* return log2 of map size decoded for memory or port map */
418 
419 static int
420 pci_mapsize(uint64_t testval)
421 {
422 	int ln2size;
423 
424 	testval = pci_mapbase(testval);
425 	ln2size = 0;
426 	if (testval != 0) {
427 		while ((testval & 1) == 0)
428 		{
429 			ln2size++;
430 			testval >>= 1;
431 		}
432 	}
433 	return (ln2size);
434 }
435 
436 /* return base address of device ROM */
437 
438 static pci_addr_t
439 pci_rombase(uint64_t mapreg)
440 {
441 
442 	return (mapreg & PCIM_BIOS_ADDR_MASK);
443 }
444 
445 /* return log2 of map size decided for device ROM */
446 
447 static int
448 pci_romsize(uint64_t testval)
449 {
450 	int ln2size;
451 
452 	testval = pci_rombase(testval);
453 	ln2size = 0;
454 	if (testval != 0) {
455 		while ((testval & 1) == 0)
456 		{
457 			ln2size++;
458 			testval >>= 1;
459 		}
460 	}
461 	return (ln2size);
462 }
463 
464 /* return log2 of address range supported by map register */
465 
466 static int
467 pci_maprange(uint64_t mapreg)
468 {
469 	int ln2range = 0;
470 
471 	if (PCI_BAR_IO(mapreg))
472 		ln2range = 32;
473 	else
474 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
475 		case PCIM_BAR_MEM_32:
476 			ln2range = 32;
477 			break;
478 		case PCIM_BAR_MEM_1MB:
479 			ln2range = 20;
480 			break;
481 		case PCIM_BAR_MEM_64:
482 			ln2range = 64;
483 			break;
484 		}
485 	return (ln2range);
486 }
487 
488 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
489 
490 static void
491 pci_fixancient(pcicfgregs *cfg)
492 {
493 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
494 		return;
495 
496 	/* PCI to PCI bridges use header type 1 */
497 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
498 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
499 }
500 
501 /* extract header type specific config data */
502 
503 static void
504 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
505 {
506 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
507 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
508 	case PCIM_HDRTYPE_NORMAL:
509 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
510 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
511 		cfg->nummaps	    = PCI_MAXMAPS_0;
512 		break;
513 	case PCIM_HDRTYPE_BRIDGE:
514 		cfg->nummaps	    = PCI_MAXMAPS_1;
515 		break;
516 	case PCIM_HDRTYPE_CARDBUS:
517 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
518 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
519 		cfg->nummaps	    = PCI_MAXMAPS_2;
520 		break;
521 	}
522 #undef REG
523 }
524 
525 /* read configuration header into pcicfgregs structure */
526 struct pci_devinfo *
527 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
528 {
529 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
530 	pcicfgregs *cfg = NULL;
531 	struct pci_devinfo *devlist_entry;
532 	struct devlist *devlist_head;
533 
534 	devlist_head = &pci_devq;
535 
536 	devlist_entry = NULL;
537 
538 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
539 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
540 		if (devlist_entry == NULL)
541 			return (NULL);
542 
543 		cfg = &devlist_entry->cfg;
544 
545 		cfg->domain		= d;
546 		cfg->bus		= b;
547 		cfg->slot		= s;
548 		cfg->func		= f;
549 		cfg->vendor		= REG(PCIR_VENDOR, 2);
550 		cfg->device		= REG(PCIR_DEVICE, 2);
551 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
552 		cfg->statreg		= REG(PCIR_STATUS, 2);
553 		cfg->baseclass		= REG(PCIR_CLASS, 1);
554 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
555 		cfg->progif		= REG(PCIR_PROGIF, 1);
556 		cfg->revid		= REG(PCIR_REVID, 1);
557 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
558 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
559 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
560 		cfg->intpin		= REG(PCIR_INTPIN, 1);
561 		cfg->intline		= REG(PCIR_INTLINE, 1);
562 
563 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
564 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
565 
566 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
567 		cfg->hdrtype		&= ~PCIM_MFDEV;
568 		STAILQ_INIT(&cfg->maps);
569 
570 		pci_fixancient(cfg);
571 		pci_hdrtypedata(pcib, b, s, f, cfg);
572 
573 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
574 			pci_read_cap(pcib, cfg);
575 
576 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
577 
578 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
579 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
580 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
581 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
582 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
583 
584 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
585 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
586 		devlist_entry->conf.pc_vendor = cfg->vendor;
587 		devlist_entry->conf.pc_device = cfg->device;
588 
589 		devlist_entry->conf.pc_class = cfg->baseclass;
590 		devlist_entry->conf.pc_subclass = cfg->subclass;
591 		devlist_entry->conf.pc_progif = cfg->progif;
592 		devlist_entry->conf.pc_revid = cfg->revid;
593 
594 		pci_numdevs++;
595 		pci_generation++;
596 	}
597 	return (devlist_entry);
598 #undef REG
599 }
600 
601 static void
602 pci_read_cap(device_t pcib, pcicfgregs *cfg)
603 {
604 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
605 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
606 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
607 	uint64_t addr;
608 #endif
609 	uint32_t val;
610 	int	ptr, nextptr, ptrptr;
611 
612 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
613 	case PCIM_HDRTYPE_NORMAL:
614 	case PCIM_HDRTYPE_BRIDGE:
615 		ptrptr = PCIR_CAP_PTR;
616 		break;
617 	case PCIM_HDRTYPE_CARDBUS:
618 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
619 		break;
620 	default:
621 		return;		/* no extended capabilities support */
622 	}
623 	nextptr = REG(ptrptr, 1);	/* sanity check? */
624 
625 	/*
626 	 * Read capability entries.
627 	 */
628 	while (nextptr != 0) {
629 		/* Sanity check */
630 		if (nextptr > 255) {
631 			printf("illegal PCI extended capability offset %d\n",
632 			    nextptr);
633 			return;
634 		}
635 		/* Find the next entry */
636 		ptr = nextptr;
637 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
638 
639 		/* Process this entry */
640 		switch (REG(ptr + PCICAP_ID, 1)) {
641 		case PCIY_PMG:		/* PCI power management */
642 			if (cfg->pp.pp_cap == 0) {
643 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
644 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
645 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
646 				if ((nextptr - ptr) > PCIR_POWER_DATA)
647 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
648 			}
649 			break;
650 		case PCIY_HT:		/* HyperTransport */
651 			/* Determine HT-specific capability type. */
652 			val = REG(ptr + PCIR_HT_COMMAND, 2);
653 
654 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
655 				cfg->ht.ht_slave = ptr;
656 
657 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
658 			switch (val & PCIM_HTCMD_CAP_MASK) {
659 			case PCIM_HTCAP_MSI_MAPPING:
660 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
661 					/* Sanity check the mapping window. */
662 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
663 					    4);
664 					addr <<= 32;
665 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
666 					    4);
667 					if (addr != MSI_INTEL_ADDR_BASE)
668 						device_printf(pcib,
669 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
670 						    cfg->domain, cfg->bus,
671 						    cfg->slot, cfg->func,
672 						    (long long)addr);
673 				} else
674 					addr = MSI_INTEL_ADDR_BASE;
675 
676 				cfg->ht.ht_msimap = ptr;
677 				cfg->ht.ht_msictrl = val;
678 				cfg->ht.ht_msiaddr = addr;
679 				break;
680 			}
681 #endif
682 			break;
683 		case PCIY_MSI:		/* PCI MSI */
684 			cfg->msi.msi_location = ptr;
685 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
686 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
687 						     PCIM_MSICTRL_MMC_MASK)>>1);
688 			break;
689 		case PCIY_MSIX:		/* PCI MSI-X */
690 			cfg->msix.msix_location = ptr;
691 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
692 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
693 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
694 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
695 			cfg->msix.msix_table_bar = PCIR_BAR(val &
696 			    PCIM_MSIX_BIR_MASK);
697 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
698 			val = REG(ptr + PCIR_MSIX_PBA, 4);
699 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
700 			    PCIM_MSIX_BIR_MASK);
701 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
702 			break;
703 		case PCIY_VPD:		/* PCI Vital Product Data */
704 			cfg->vpd.vpd_reg = ptr;
705 			break;
706 		case PCIY_SUBVENDOR:
707 			/* Should always be true. */
708 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
709 			    PCIM_HDRTYPE_BRIDGE) {
710 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
711 				cfg->subvendor = val & 0xffff;
712 				cfg->subdevice = val >> 16;
713 			}
714 			break;
715 		case PCIY_PCIX:		/* PCI-X */
716 			/*
717 			 * Assume we have a PCI-X chipset if we have
718 			 * at least one PCI-PCI bridge with a PCI-X
719 			 * capability.  Note that some systems with
720 			 * PCI-express or HT chipsets might match on
721 			 * this check as well.
722 			 */
723 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
724 			    PCIM_HDRTYPE_BRIDGE)
725 				pcix_chipset = 1;
726 			cfg->pcix.pcix_location = ptr;
727 			break;
728 		case PCIY_EXPRESS:	/* PCI-express */
729 			/*
730 			 * Assume we have a PCI-express chipset if we have
731 			 * at least one PCI-express device.
732 			 */
733 			pcie_chipset = 1;
734 			cfg->pcie.pcie_location = ptr;
735 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
736 			cfg->pcie.pcie_type = val & PCIM_EXP_FLAGS_TYPE;
737 			break;
738 		default:
739 			break;
740 		}
741 	}
742 
743 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
744 	/*
745 	 * Enable the MSI mapping window for all HyperTransport
746 	 * slaves.  PCI-PCI bridges have their windows enabled via
747 	 * PCIB_MAP_MSI().
748 	 */
749 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
750 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
751 		device_printf(pcib,
752 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
753 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
754 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
755 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
756 		     2);
757 	}
758 #endif
759 /* REG and WREG use carry through to next functions */
760 }
761 
762 /*
763  * PCI Vital Product Data
764  */
765 
766 #define	PCI_VPD_TIMEOUT		1000000
767 
768 static int
769 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
770 {
771 	int count = PCI_VPD_TIMEOUT;
772 
773 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
774 
775 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
776 
777 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
778 		if (--count < 0)
779 			return (ENXIO);
780 		DELAY(1);	/* limit looping */
781 	}
782 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
783 
784 	return (0);
785 }
786 
787 #if 0
788 static int
789 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
790 {
791 	int count = PCI_VPD_TIMEOUT;
792 
793 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
794 
795 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
796 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
797 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
798 		if (--count < 0)
799 			return (ENXIO);
800 		DELAY(1);	/* limit looping */
801 	}
802 
803 	return (0);
804 }
805 #endif
806 
807 #undef PCI_VPD_TIMEOUT
808 
809 struct vpd_readstate {
810 	device_t	pcib;
811 	pcicfgregs	*cfg;
812 	uint32_t	val;
813 	int		bytesinval;
814 	int		off;
815 	uint8_t		cksum;
816 };
817 
818 static int
819 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
820 {
821 	uint32_t reg;
822 	uint8_t byte;
823 
824 	if (vrs->bytesinval == 0) {
825 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
826 			return (ENXIO);
827 		vrs->val = le32toh(reg);
828 		vrs->off += 4;
829 		byte = vrs->val & 0xff;
830 		vrs->bytesinval = 3;
831 	} else {
832 		vrs->val = vrs->val >> 8;
833 		byte = vrs->val & 0xff;
834 		vrs->bytesinval--;
835 	}
836 
837 	vrs->cksum += byte;
838 	*data = byte;
839 	return (0);
840 }
841 
842 static void
843 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
844 {
845 	struct vpd_readstate vrs;
846 	int state;
847 	int name;
848 	int remain;
849 	int i;
850 	int alloc, off;		/* alloc/off for RO/W arrays */
851 	int cksumvalid;
852 	int dflen;
853 	uint8_t byte;
854 	uint8_t byte2;
855 
856 	/* init vpd reader */
857 	vrs.bytesinval = 0;
858 	vrs.off = 0;
859 	vrs.pcib = pcib;
860 	vrs.cfg = cfg;
861 	vrs.cksum = 0;
862 
863 	state = 0;
864 	name = remain = i = 0;	/* shut up stupid gcc */
865 	alloc = off = 0;	/* shut up stupid gcc */
866 	dflen = 0;		/* shut up stupid gcc */
867 	cksumvalid = -1;
868 	while (state >= 0) {
869 		if (vpd_nextbyte(&vrs, &byte)) {
870 			state = -2;
871 			break;
872 		}
873 #if 0
874 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
875 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
876 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
877 #endif
878 		switch (state) {
879 		case 0:		/* item name */
880 			if (byte & 0x80) {
881 				if (vpd_nextbyte(&vrs, &byte2)) {
882 					state = -2;
883 					break;
884 				}
885 				remain = byte2;
886 				if (vpd_nextbyte(&vrs, &byte2)) {
887 					state = -2;
888 					break;
889 				}
890 				remain |= byte2 << 8;
891 				if (remain > (0x7f*4 - vrs.off)) {
892 					state = -1;
893 					pci_printf(cfg,
894 					    "invalid VPD data, remain %#x\n",
895 					    remain);
896 				}
897 				name = byte & 0x7f;
898 			} else {
899 				remain = byte & 0x7;
900 				name = (byte >> 3) & 0xf;
901 			}
902 			switch (name) {
903 			case 0x2:	/* String */
904 				cfg->vpd.vpd_ident = malloc(remain + 1,
905 				    M_DEVBUF, M_WAITOK);
906 				i = 0;
907 				state = 1;
908 				break;
909 			case 0xf:	/* End */
910 				state = -1;
911 				break;
912 			case 0x10:	/* VPD-R */
913 				alloc = 8;
914 				off = 0;
915 				cfg->vpd.vpd_ros = malloc(alloc *
916 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
917 				    M_WAITOK | M_ZERO);
918 				state = 2;
919 				break;
920 			case 0x11:	/* VPD-W */
921 				alloc = 8;
922 				off = 0;
923 				cfg->vpd.vpd_w = malloc(alloc *
924 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
925 				    M_WAITOK | M_ZERO);
926 				state = 5;
927 				break;
928 			default:	/* Invalid data, abort */
929 				state = -1;
930 				break;
931 			}
932 			break;
933 
934 		case 1:	/* Identifier String */
935 			cfg->vpd.vpd_ident[i++] = byte;
936 			remain--;
937 			if (remain == 0)  {
938 				cfg->vpd.vpd_ident[i] = '\0';
939 				state = 0;
940 			}
941 			break;
942 
943 		case 2:	/* VPD-R Keyword Header */
944 			if (off == alloc) {
945 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
946 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
947 				    M_DEVBUF, M_WAITOK | M_ZERO);
948 			}
949 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
950 			if (vpd_nextbyte(&vrs, &byte2)) {
951 				state = -2;
952 				break;
953 			}
954 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
955 			if (vpd_nextbyte(&vrs, &byte2)) {
956 				state = -2;
957 				break;
958 			}
959 			dflen = byte2;
960 			if (dflen == 0 &&
961 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
962 			    2) == 0) {
963 				/*
964 				 * if this happens, we can't trust the rest
965 				 * of the VPD.
966 				 */
967 				pci_printf(cfg, "bad keyword length: %d\n",
968 				    dflen);
969 				cksumvalid = 0;
970 				state = -1;
971 				break;
972 			} else if (dflen == 0) {
973 				cfg->vpd.vpd_ros[off].value = malloc(1 *
974 				    sizeof(*cfg->vpd.vpd_ros[off].value),
975 				    M_DEVBUF, M_WAITOK);
976 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
977 			} else
978 				cfg->vpd.vpd_ros[off].value = malloc(
979 				    (dflen + 1) *
980 				    sizeof(*cfg->vpd.vpd_ros[off].value),
981 				    M_DEVBUF, M_WAITOK);
982 			remain -= 3;
983 			i = 0;
984 			/* keep in sync w/ state 3's transistions */
985 			if (dflen == 0 && remain == 0)
986 				state = 0;
987 			else if (dflen == 0)
988 				state = 2;
989 			else
990 				state = 3;
991 			break;
992 
993 		case 3:	/* VPD-R Keyword Value */
994 			cfg->vpd.vpd_ros[off].value[i++] = byte;
995 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
996 			    "RV", 2) == 0 && cksumvalid == -1) {
997 				if (vrs.cksum == 0)
998 					cksumvalid = 1;
999 				else {
1000 					if (bootverbose)
1001 						pci_printf(cfg,
1002 					    "bad VPD cksum, remain %hhu\n",
1003 						    vrs.cksum);
1004 					cksumvalid = 0;
1005 					state = -1;
1006 					break;
1007 				}
1008 			}
1009 			dflen--;
1010 			remain--;
1011 			/* keep in sync w/ state 2's transistions */
1012 			if (dflen == 0)
1013 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1014 			if (dflen == 0 && remain == 0) {
1015 				cfg->vpd.vpd_rocnt = off;
1016 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1017 				    off * sizeof(*cfg->vpd.vpd_ros),
1018 				    M_DEVBUF, M_WAITOK | M_ZERO);
1019 				state = 0;
1020 			} else if (dflen == 0)
1021 				state = 2;
1022 			break;
1023 
1024 		case 4:
1025 			remain--;
1026 			if (remain == 0)
1027 				state = 0;
1028 			break;
1029 
1030 		case 5:	/* VPD-W Keyword Header */
1031 			if (off == alloc) {
1032 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1033 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1034 				    M_DEVBUF, M_WAITOK | M_ZERO);
1035 			}
1036 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1037 			if (vpd_nextbyte(&vrs, &byte2)) {
1038 				state = -2;
1039 				break;
1040 			}
1041 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1042 			if (vpd_nextbyte(&vrs, &byte2)) {
1043 				state = -2;
1044 				break;
1045 			}
1046 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1047 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1048 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1049 			    sizeof(*cfg->vpd.vpd_w[off].value),
1050 			    M_DEVBUF, M_WAITOK);
1051 			remain -= 3;
1052 			i = 0;
1053 			/* keep in sync w/ state 6's transistions */
1054 			if (dflen == 0 && remain == 0)
1055 				state = 0;
1056 			else if (dflen == 0)
1057 				state = 5;
1058 			else
1059 				state = 6;
1060 			break;
1061 
1062 		case 6:	/* VPD-W Keyword Value */
1063 			cfg->vpd.vpd_w[off].value[i++] = byte;
1064 			dflen--;
1065 			remain--;
1066 			/* keep in sync w/ state 5's transistions */
1067 			if (dflen == 0)
1068 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1069 			if (dflen == 0 && remain == 0) {
1070 				cfg->vpd.vpd_wcnt = off;
1071 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1072 				    off * sizeof(*cfg->vpd.vpd_w),
1073 				    M_DEVBUF, M_WAITOK | M_ZERO);
1074 				state = 0;
1075 			} else if (dflen == 0)
1076 				state = 5;
1077 			break;
1078 
1079 		default:
1080 			pci_printf(cfg, "invalid state: %d\n", state);
1081 			state = -1;
1082 			break;
1083 		}
1084 	}
1085 
1086 	if (cksumvalid == 0 || state < -1) {
1087 		/* read-only data bad, clean up */
1088 		if (cfg->vpd.vpd_ros != NULL) {
1089 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1090 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1091 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1092 			cfg->vpd.vpd_ros = NULL;
1093 		}
1094 	}
1095 	if (state < -1) {
1096 		/* I/O error, clean up */
1097 		pci_printf(cfg, "failed to read VPD data.\n");
1098 		if (cfg->vpd.vpd_ident != NULL) {
1099 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1100 			cfg->vpd.vpd_ident = NULL;
1101 		}
1102 		if (cfg->vpd.vpd_w != NULL) {
1103 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1104 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1105 			free(cfg->vpd.vpd_w, M_DEVBUF);
1106 			cfg->vpd.vpd_w = NULL;
1107 		}
1108 	}
1109 	cfg->vpd.vpd_cached = 1;
1110 #undef REG
1111 #undef WREG
1112 }
1113 
1114 int
1115 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1116 {
1117 	struct pci_devinfo *dinfo = device_get_ivars(child);
1118 	pcicfgregs *cfg = &dinfo->cfg;
1119 
1120 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1121 		pci_read_vpd(device_get_parent(dev), cfg);
1122 
1123 	*identptr = cfg->vpd.vpd_ident;
1124 
1125 	if (*identptr == NULL)
1126 		return (ENXIO);
1127 
1128 	return (0);
1129 }
1130 
1131 int
1132 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1133 	const char **vptr)
1134 {
1135 	struct pci_devinfo *dinfo = device_get_ivars(child);
1136 	pcicfgregs *cfg = &dinfo->cfg;
1137 	int i;
1138 
1139 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1140 		pci_read_vpd(device_get_parent(dev), cfg);
1141 
1142 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1143 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1144 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1145 			*vptr = cfg->vpd.vpd_ros[i].value;
1146 			return (0);
1147 		}
1148 
1149 	*vptr = NULL;
1150 	return (ENXIO);
1151 }
1152 
1153 /*
1154  * Find the requested HyperTransport capability and return the offset
1155  * in configuration space via the pointer provided.  The function
1156  * returns 0 on success and an error code otherwise.
1157  */
1158 int
1159 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1160 {
1161 	int ptr, error;
1162 	uint16_t val;
1163 
1164 	error = pci_find_cap(child, PCIY_HT, &ptr);
1165 	if (error)
1166 		return (error);
1167 
1168 	/*
1169 	 * Traverse the capabilities list checking each HT capability
1170 	 * to see if it matches the requested HT capability.
1171 	 */
1172 	while (ptr != 0) {
1173 		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1174 		if (capability == PCIM_HTCAP_SLAVE ||
1175 		    capability == PCIM_HTCAP_HOST)
1176 			val &= 0xe000;
1177 		else
1178 			val &= PCIM_HTCMD_CAP_MASK;
1179 		if (val == capability) {
1180 			if (capreg != NULL)
1181 				*capreg = ptr;
1182 			return (0);
1183 		}
1184 
1185 		/* Skip to the next HT capability. */
1186 		while (ptr != 0) {
1187 			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1188 			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1189 			    PCIY_HT)
1190 				break;
1191 		}
1192 	}
1193 	return (ENOENT);
1194 }
1195 
1196 /*
1197  * Find the requested capability and return the offset in
1198  * configuration space via the pointer provided.  The function returns
1199  * 0 on success and an error code otherwise.
1200  */
1201 int
1202 pci_find_cap_method(device_t dev, device_t child, int capability,
1203     int *capreg)
1204 {
1205 	struct pci_devinfo *dinfo = device_get_ivars(child);
1206 	pcicfgregs *cfg = &dinfo->cfg;
1207 	u_int32_t status;
1208 	u_int8_t ptr;
1209 
1210 	/*
1211 	 * Check the CAP_LIST bit of the PCI status register first.
1212 	 */
1213 	status = pci_read_config(child, PCIR_STATUS, 2);
1214 	if (!(status & PCIM_STATUS_CAPPRESENT))
1215 		return (ENXIO);
1216 
1217 	/*
1218 	 * Determine the start pointer of the capabilities list.
1219 	 */
1220 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1221 	case PCIM_HDRTYPE_NORMAL:
1222 	case PCIM_HDRTYPE_BRIDGE:
1223 		ptr = PCIR_CAP_PTR;
1224 		break;
1225 	case PCIM_HDRTYPE_CARDBUS:
1226 		ptr = PCIR_CAP_PTR_2;
1227 		break;
1228 	default:
1229 		/* XXX: panic? */
1230 		return (ENXIO);		/* no extended capabilities support */
1231 	}
1232 	ptr = pci_read_config(child, ptr, 1);
1233 
1234 	/*
1235 	 * Traverse the capabilities list.
1236 	 */
1237 	while (ptr != 0) {
1238 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1239 			if (capreg != NULL)
1240 				*capreg = ptr;
1241 			return (0);
1242 		}
1243 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1244 	}
1245 
1246 	return (ENOENT);
1247 }
1248 
1249 /*
1250  * Find the requested extended capability and return the offset in
1251  * configuration space via the pointer provided.  The function returns
1252  * 0 on success and an error code otherwise.
1253  */
1254 int
1255 pci_find_extcap_method(device_t dev, device_t child, int capability,
1256     int *capreg)
1257 {
1258 	struct pci_devinfo *dinfo = device_get_ivars(child);
1259 	pcicfgregs *cfg = &dinfo->cfg;
1260 	uint32_t ecap;
1261 	uint16_t ptr;
1262 
1263 	/* Only supported for PCI-express devices. */
1264 	if (cfg->pcie.pcie_location == 0)
1265 		return (ENXIO);
1266 
1267 	ptr = PCIR_EXTCAP;
1268 	ecap = pci_read_config(child, ptr, 4);
1269 	if (ecap == 0xffffffff || ecap == 0)
1270 		return (ENOENT);
1271 	for (;;) {
1272 		if (PCI_EXTCAP_ID(ecap) == capability) {
1273 			if (capreg != NULL)
1274 				*capreg = ptr;
1275 			return (0);
1276 		}
1277 		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1278 		if (ptr == 0)
1279 			break;
1280 		ecap = pci_read_config(child, ptr, 4);
1281 	}
1282 
1283 	return (ENOENT);
1284 }
1285 
1286 /*
1287  * Support for MSI-X message interrupts.
1288  */
1289 void
1290 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1291 {
1292 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1293 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1294 	uint32_t offset;
1295 
1296 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1297 	offset = msix->msix_table_offset + index * 16;
1298 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1299 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1300 	bus_write_4(msix->msix_table_res, offset + 8, data);
1301 
1302 	/* Enable MSI -> HT mapping. */
1303 	pci_ht_map_msi(dev, address);
1304 }
1305 
1306 void
1307 pci_mask_msix(device_t dev, u_int index)
1308 {
1309 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1310 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1311 	uint32_t offset, val;
1312 
1313 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1314 	offset = msix->msix_table_offset + index * 16 + 12;
1315 	val = bus_read_4(msix->msix_table_res, offset);
1316 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1317 		val |= PCIM_MSIX_VCTRL_MASK;
1318 		bus_write_4(msix->msix_table_res, offset, val);
1319 	}
1320 }
1321 
1322 void
1323 pci_unmask_msix(device_t dev, u_int index)
1324 {
1325 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1326 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1327 	uint32_t offset, val;
1328 
1329 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1330 	offset = msix->msix_table_offset + index * 16 + 12;
1331 	val = bus_read_4(msix->msix_table_res, offset);
1332 	if (val & PCIM_MSIX_VCTRL_MASK) {
1333 		val &= ~PCIM_MSIX_VCTRL_MASK;
1334 		bus_write_4(msix->msix_table_res, offset, val);
1335 	}
1336 }
1337 
1338 int
1339 pci_pending_msix(device_t dev, u_int index)
1340 {
1341 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1342 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1343 	uint32_t offset, bit;
1344 
1345 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1346 	offset = msix->msix_pba_offset + (index / 32) * 4;
1347 	bit = 1 << index % 32;
1348 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1349 }
1350 
1351 /*
1352  * Restore MSI-X registers and table during resume.  If MSI-X is
1353  * enabled then walk the virtual table to restore the actual MSI-X
1354  * table.
1355  */
1356 static void
1357 pci_resume_msix(device_t dev)
1358 {
1359 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1360 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1361 	struct msix_table_entry *mte;
1362 	struct msix_vector *mv;
1363 	int i;
1364 
1365 	if (msix->msix_alloc > 0) {
1366 		/* First, mask all vectors. */
1367 		for (i = 0; i < msix->msix_msgnum; i++)
1368 			pci_mask_msix(dev, i);
1369 
1370 		/* Second, program any messages with at least one handler. */
1371 		for (i = 0; i < msix->msix_table_len; i++) {
1372 			mte = &msix->msix_table[i];
1373 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1374 				continue;
1375 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1376 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1377 			pci_unmask_msix(dev, i);
1378 		}
1379 	}
1380 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1381 	    msix->msix_ctrl, 2);
1382 }
1383 
1384 /*
1385  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1386  * returned in *count.  After this function returns, each message will be
1387  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1388  */
1389 int
1390 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1391 {
1392 	struct pci_devinfo *dinfo = device_get_ivars(child);
1393 	pcicfgregs *cfg = &dinfo->cfg;
1394 	struct resource_list_entry *rle;
1395 	int actual, error, i, irq, max;
1396 
1397 	/* Don't let count == 0 get us into trouble. */
1398 	if (*count == 0)
1399 		return (EINVAL);
1400 
1401 	/* If rid 0 is allocated, then fail. */
1402 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1403 	if (rle != NULL && rle->res != NULL)
1404 		return (ENXIO);
1405 
1406 	/* Already have allocated messages? */
1407 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1408 		return (ENXIO);
1409 
1410 	/* If MSI is blacklisted for this system, fail. */
1411 	if (pci_msi_blacklisted())
1412 		return (ENXIO);
1413 
1414 	/* MSI-X capability present? */
1415 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1416 		return (ENODEV);
1417 
1418 	/* Make sure the appropriate BARs are mapped. */
1419 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1420 	    cfg->msix.msix_table_bar);
1421 	if (rle == NULL || rle->res == NULL ||
1422 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1423 		return (ENXIO);
1424 	cfg->msix.msix_table_res = rle->res;
1425 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1426 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1427 		    cfg->msix.msix_pba_bar);
1428 		if (rle == NULL || rle->res == NULL ||
1429 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1430 			return (ENXIO);
1431 	}
1432 	cfg->msix.msix_pba_res = rle->res;
1433 
1434 	if (bootverbose)
1435 		device_printf(child,
1436 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1437 		    *count, cfg->msix.msix_msgnum);
1438 	max = min(*count, cfg->msix.msix_msgnum);
1439 	for (i = 0; i < max; i++) {
1440 		/* Allocate a message. */
1441 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1442 		if (error) {
1443 			if (i == 0)
1444 				return (error);
1445 			break;
1446 		}
1447 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1448 		    irq, 1);
1449 	}
1450 	actual = i;
1451 
1452 	if (bootverbose) {
1453 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1454 		if (actual == 1)
1455 			device_printf(child, "using IRQ %lu for MSI-X\n",
1456 			    rle->start);
1457 		else {
1458 			int run;
1459 
1460 			/*
1461 			 * Be fancy and try to print contiguous runs of
1462 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1463 			 * 'run' is true if we are in a range.
1464 			 */
1465 			device_printf(child, "using IRQs %lu", rle->start);
1466 			irq = rle->start;
1467 			run = 0;
1468 			for (i = 1; i < actual; i++) {
1469 				rle = resource_list_find(&dinfo->resources,
1470 				    SYS_RES_IRQ, i + 1);
1471 
1472 				/* Still in a run? */
1473 				if (rle->start == irq + 1) {
1474 					run = 1;
1475 					irq++;
1476 					continue;
1477 				}
1478 
1479 				/* Finish previous range. */
1480 				if (run) {
1481 					printf("-%d", irq);
1482 					run = 0;
1483 				}
1484 
1485 				/* Start new range. */
1486 				printf(",%lu", rle->start);
1487 				irq = rle->start;
1488 			}
1489 
1490 			/* Unfinished range? */
1491 			if (run)
1492 				printf("-%d", irq);
1493 			printf(" for MSI-X\n");
1494 		}
1495 	}
1496 
1497 	/* Mask all vectors. */
1498 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1499 		pci_mask_msix(child, i);
1500 
1501 	/* Allocate and initialize vector data and virtual table. */
1502 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1503 	    M_DEVBUF, M_WAITOK | M_ZERO);
1504 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1505 	    M_DEVBUF, M_WAITOK | M_ZERO);
1506 	for (i = 0; i < actual; i++) {
1507 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1508 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1509 		cfg->msix.msix_table[i].mte_vector = i + 1;
1510 	}
1511 
1512 	/* Update control register to enable MSI-X. */
1513 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1514 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1515 	    cfg->msix.msix_ctrl, 2);
1516 
1517 	/* Update counts of alloc'd messages. */
1518 	cfg->msix.msix_alloc = actual;
1519 	cfg->msix.msix_table_len = actual;
1520 	*count = actual;
1521 	return (0);
1522 }
1523 
1524 /*
1525  * By default, pci_alloc_msix() will assign the allocated IRQ
1526  * resources consecutively to the first N messages in the MSI-X table.
1527  * However, device drivers may want to use different layouts if they
1528  * either receive fewer messages than they asked for, or they wish to
1529  * populate the MSI-X table sparsely.  This method allows the driver
1530  * to specify what layout it wants.  It must be called after a
1531  * successful pci_alloc_msix() but before any of the associated
1532  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1533  *
1534  * The 'vectors' array contains 'count' message vectors.  The array
1535  * maps directly to the MSI-X table in that index 0 in the array
1536  * specifies the vector for the first message in the MSI-X table, etc.
1537  * The vector value in each array index can either be 0 to indicate
1538  * that no vector should be assigned to a message slot, or it can be a
1539  * number from 1 to N (where N is the count returned from a
1540  * succcessful call to pci_alloc_msix()) to indicate which message
1541  * vector (IRQ) to be used for the corresponding message.
1542  *
1543  * On successful return, each message with a non-zero vector will have
1544  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1545  * 1.  Additionally, if any of the IRQs allocated via the previous
1546  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1547  * will be freed back to the system automatically.
1548  *
1549  * For example, suppose a driver has a MSI-X table with 6 messages and
1550  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1551  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1552  * C.  After the call to pci_alloc_msix(), the device will be setup to
1553  * have an MSI-X table of ABC--- (where - means no vector assigned).
1554  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1555  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1556  * be freed back to the system.  This device will also have valid
1557  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1558  *
1559  * In any case, the SYS_RES_IRQ rid X will always map to the message
1560  * at MSI-X table index X - 1 and will only be valid if a vector is
1561  * assigned to that table entry.
1562  */
1563 int
1564 pci_remap_msix_method(device_t dev, device_t child, int count,
1565     const u_int *vectors)
1566 {
1567 	struct pci_devinfo *dinfo = device_get_ivars(child);
1568 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1569 	struct resource_list_entry *rle;
1570 	int i, irq, j, *used;
1571 
1572 	/*
1573 	 * Have to have at least one message in the table but the
1574 	 * table can't be bigger than the actual MSI-X table in the
1575 	 * device.
1576 	 */
1577 	if (count == 0 || count > msix->msix_msgnum)
1578 		return (EINVAL);
1579 
1580 	/* Sanity check the vectors. */
1581 	for (i = 0; i < count; i++)
1582 		if (vectors[i] > msix->msix_alloc)
1583 			return (EINVAL);
1584 
1585 	/*
1586 	 * Make sure there aren't any holes in the vectors to be used.
1587 	 * It's a big pain to support it, and it doesn't really make
1588 	 * sense anyway.  Also, at least one vector must be used.
1589 	 */
1590 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1591 	    M_ZERO);
1592 	for (i = 0; i < count; i++)
1593 		if (vectors[i] != 0)
1594 			used[vectors[i] - 1] = 1;
1595 	for (i = 0; i < msix->msix_alloc - 1; i++)
1596 		if (used[i] == 0 && used[i + 1] == 1) {
1597 			free(used, M_DEVBUF);
1598 			return (EINVAL);
1599 		}
1600 	if (used[0] != 1) {
1601 		free(used, M_DEVBUF);
1602 		return (EINVAL);
1603 	}
1604 
1605 	/* Make sure none of the resources are allocated. */
1606 	for (i = 0; i < msix->msix_table_len; i++) {
1607 		if (msix->msix_table[i].mte_vector == 0)
1608 			continue;
1609 		if (msix->msix_table[i].mte_handlers > 0)
1610 			return (EBUSY);
1611 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1612 		KASSERT(rle != NULL, ("missing resource"));
1613 		if (rle->res != NULL)
1614 			return (EBUSY);
1615 	}
1616 
1617 	/* Free the existing resource list entries. */
1618 	for (i = 0; i < msix->msix_table_len; i++) {
1619 		if (msix->msix_table[i].mte_vector == 0)
1620 			continue;
1621 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1622 	}
1623 
1624 	/*
1625 	 * Build the new virtual table keeping track of which vectors are
1626 	 * used.
1627 	 */
1628 	free(msix->msix_table, M_DEVBUF);
1629 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1630 	    M_DEVBUF, M_WAITOK | M_ZERO);
1631 	for (i = 0; i < count; i++)
1632 		msix->msix_table[i].mte_vector = vectors[i];
1633 	msix->msix_table_len = count;
1634 
1635 	/* Free any unused IRQs and resize the vectors array if necessary. */
1636 	j = msix->msix_alloc - 1;
1637 	if (used[j] == 0) {
1638 		struct msix_vector *vec;
1639 
1640 		while (used[j] == 0) {
1641 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1642 			    msix->msix_vectors[j].mv_irq);
1643 			j--;
1644 		}
1645 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1646 		    M_WAITOK);
1647 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1648 		    (j + 1));
1649 		free(msix->msix_vectors, M_DEVBUF);
1650 		msix->msix_vectors = vec;
1651 		msix->msix_alloc = j + 1;
1652 	}
1653 	free(used, M_DEVBUF);
1654 
1655 	/* Map the IRQs onto the rids. */
1656 	for (i = 0; i < count; i++) {
1657 		if (vectors[i] == 0)
1658 			continue;
1659 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1660 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1661 		    irq, 1);
1662 	}
1663 
1664 	if (bootverbose) {
1665 		device_printf(child, "Remapped MSI-X IRQs as: ");
1666 		for (i = 0; i < count; i++) {
1667 			if (i != 0)
1668 				printf(", ");
1669 			if (vectors[i] == 0)
1670 				printf("---");
1671 			else
1672 				printf("%d",
1673 				    msix->msix_vectors[vectors[i]].mv_irq);
1674 		}
1675 		printf("\n");
1676 	}
1677 
1678 	return (0);
1679 }
1680 
1681 static int
1682 pci_release_msix(device_t dev, device_t child)
1683 {
1684 	struct pci_devinfo *dinfo = device_get_ivars(child);
1685 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1686 	struct resource_list_entry *rle;
1687 	int i;
1688 
1689 	/* Do we have any messages to release? */
1690 	if (msix->msix_alloc == 0)
1691 		return (ENODEV);
1692 
1693 	/* Make sure none of the resources are allocated. */
1694 	for (i = 0; i < msix->msix_table_len; i++) {
1695 		if (msix->msix_table[i].mte_vector == 0)
1696 			continue;
1697 		if (msix->msix_table[i].mte_handlers > 0)
1698 			return (EBUSY);
1699 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1700 		KASSERT(rle != NULL, ("missing resource"));
1701 		if (rle->res != NULL)
1702 			return (EBUSY);
1703 	}
1704 
1705 	/* Update control register to disable MSI-X. */
1706 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1707 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1708 	    msix->msix_ctrl, 2);
1709 
1710 	/* Free the resource list entries. */
1711 	for (i = 0; i < msix->msix_table_len; i++) {
1712 		if (msix->msix_table[i].mte_vector == 0)
1713 			continue;
1714 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1715 	}
1716 	free(msix->msix_table, M_DEVBUF);
1717 	msix->msix_table_len = 0;
1718 
1719 	/* Release the IRQs. */
1720 	for (i = 0; i < msix->msix_alloc; i++)
1721 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1722 		    msix->msix_vectors[i].mv_irq);
1723 	free(msix->msix_vectors, M_DEVBUF);
1724 	msix->msix_alloc = 0;
1725 	return (0);
1726 }
1727 
1728 /*
1729  * Return the max supported MSI-X messages this device supports.
1730  * Basically, assuming the MD code can alloc messages, this function
1731  * should return the maximum value that pci_alloc_msix() can return.
1732  * Thus, it is subject to the tunables, etc.
1733  */
1734 int
1735 pci_msix_count_method(device_t dev, device_t child)
1736 {
1737 	struct pci_devinfo *dinfo = device_get_ivars(child);
1738 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1739 
1740 	if (pci_do_msix && msix->msix_location != 0)
1741 		return (msix->msix_msgnum);
1742 	return (0);
1743 }
1744 
1745 /*
1746  * HyperTransport MSI mapping control
1747  */
1748 void
1749 pci_ht_map_msi(device_t dev, uint64_t addr)
1750 {
1751 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1752 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1753 
1754 	if (!ht->ht_msimap)
1755 		return;
1756 
1757 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1758 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1759 		/* Enable MSI -> HT mapping. */
1760 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1761 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1762 		    ht->ht_msictrl, 2);
1763 	}
1764 
1765 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1766 		/* Disable MSI -> HT mapping. */
1767 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1768 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1769 		    ht->ht_msictrl, 2);
1770 	}
1771 }
1772 
1773 int
1774 pci_get_max_read_req(device_t dev)
1775 {
1776 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1777 	int cap;
1778 	uint16_t val;
1779 
1780 	cap = dinfo->cfg.pcie.pcie_location;
1781 	if (cap == 0)
1782 		return (0);
1783 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1784 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1785 	val >>= 12;
1786 	return (1 << (val + 7));
1787 }
1788 
1789 int
1790 pci_set_max_read_req(device_t dev, int size)
1791 {
1792 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1793 	int cap;
1794 	uint16_t val;
1795 
1796 	cap = dinfo->cfg.pcie.pcie_location;
1797 	if (cap == 0)
1798 		return (0);
1799 	if (size < 128)
1800 		size = 128;
1801 	if (size > 4096)
1802 		size = 4096;
1803 	size = (1 << (fls(size) - 1));
1804 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1805 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1806 	val |= (fls(size) - 8) << 12;
1807 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1808 	return (size);
1809 }
1810 
1811 /*
1812  * Support for MSI message signalled interrupts.
1813  */
1814 void
1815 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1816 {
1817 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1818 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1819 
1820 	/* Write data and address values. */
1821 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1822 	    address & 0xffffffff, 4);
1823 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1824 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1825 		    address >> 32, 4);
1826 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1827 		    data, 2);
1828 	} else
1829 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1830 		    2);
1831 
1832 	/* Enable MSI in the control register. */
1833 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1834 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1835 	    2);
1836 
1837 	/* Enable MSI -> HT mapping. */
1838 	pci_ht_map_msi(dev, address);
1839 }
1840 
1841 void
1842 pci_disable_msi(device_t dev)
1843 {
1844 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1845 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1846 
1847 	/* Disable MSI -> HT mapping. */
1848 	pci_ht_map_msi(dev, 0);
1849 
1850 	/* Disable MSI in the control register. */
1851 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1852 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1853 	    2);
1854 }
1855 
1856 /*
1857  * Restore MSI registers during resume.  If MSI is enabled then
1858  * restore the data and address registers in addition to the control
1859  * register.
1860  */
1861 static void
1862 pci_resume_msi(device_t dev)
1863 {
1864 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1865 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1866 	uint64_t address;
1867 	uint16_t data;
1868 
1869 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1870 		address = msi->msi_addr;
1871 		data = msi->msi_data;
1872 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1873 		    address & 0xffffffff, 4);
1874 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1875 			pci_write_config(dev, msi->msi_location +
1876 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1877 			pci_write_config(dev, msi->msi_location +
1878 			    PCIR_MSI_DATA_64BIT, data, 2);
1879 		} else
1880 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1881 			    data, 2);
1882 	}
1883 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1884 	    2);
1885 }
1886 
1887 static int
1888 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1889 {
1890 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1891 	pcicfgregs *cfg = &dinfo->cfg;
1892 	struct resource_list_entry *rle;
1893 	struct msix_table_entry *mte;
1894 	struct msix_vector *mv;
1895 	uint64_t addr;
1896 	uint32_t data;
1897 	int error, i, j;
1898 
1899 	/*
1900 	 * Handle MSI first.  We try to find this IRQ among our list
1901 	 * of MSI IRQs.  If we find it, we request updated address and
1902 	 * data registers and apply the results.
1903 	 */
1904 	if (cfg->msi.msi_alloc > 0) {
1905 
1906 		/* If we don't have any active handlers, nothing to do. */
1907 		if (cfg->msi.msi_handlers == 0)
1908 			return (0);
1909 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1910 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1911 			    i + 1);
1912 			if (rle->start == irq) {
1913 				error = PCIB_MAP_MSI(device_get_parent(bus),
1914 				    dev, irq, &addr, &data);
1915 				if (error)
1916 					return (error);
1917 				pci_disable_msi(dev);
1918 				dinfo->cfg.msi.msi_addr = addr;
1919 				dinfo->cfg.msi.msi_data = data;
1920 				pci_enable_msi(dev, addr, data);
1921 				return (0);
1922 			}
1923 		}
1924 		return (ENOENT);
1925 	}
1926 
1927 	/*
1928 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1929 	 * we request the updated mapping info.  If that works, we go
1930 	 * through all the slots that use this IRQ and update them.
1931 	 */
1932 	if (cfg->msix.msix_alloc > 0) {
1933 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1934 			mv = &cfg->msix.msix_vectors[i];
1935 			if (mv->mv_irq == irq) {
1936 				error = PCIB_MAP_MSI(device_get_parent(bus),
1937 				    dev, irq, &addr, &data);
1938 				if (error)
1939 					return (error);
1940 				mv->mv_address = addr;
1941 				mv->mv_data = data;
1942 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1943 					mte = &cfg->msix.msix_table[j];
1944 					if (mte->mte_vector != i + 1)
1945 						continue;
1946 					if (mte->mte_handlers == 0)
1947 						continue;
1948 					pci_mask_msix(dev, j);
1949 					pci_enable_msix(dev, j, addr, data);
1950 					pci_unmask_msix(dev, j);
1951 				}
1952 			}
1953 		}
1954 		return (ENOENT);
1955 	}
1956 
1957 	return (ENOENT);
1958 }
1959 
1960 /*
1961  * Returns true if the specified device is blacklisted because MSI
1962  * doesn't work.
1963  */
1964 int
1965 pci_msi_device_blacklisted(device_t dev)
1966 {
1967 	const struct pci_quirk *q;
1968 
1969 	if (!pci_honor_msi_blacklist)
1970 		return (0);
1971 
1972 	for (q = &pci_quirks[0]; q->devid; q++) {
1973 		if (q->devid == pci_get_devid(dev) &&
1974 		    q->type == PCI_QUIRK_DISABLE_MSI)
1975 			return (1);
1976 	}
1977 	return (0);
1978 }
1979 
1980 /*
1981  * Returns true if a specified chipset supports MSI when it is
1982  * emulated hardware in a virtual machine.
1983  */
1984 static int
1985 pci_msi_vm_chipset(device_t dev)
1986 {
1987 	const struct pci_quirk *q;
1988 
1989 	for (q = &pci_quirks[0]; q->devid; q++) {
1990 		if (q->devid == pci_get_devid(dev) &&
1991 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1992 			return (1);
1993 	}
1994 	return (0);
1995 }
1996 
1997 /*
1998  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1999  * we just check for blacklisted chipsets as represented by the
2000  * host-PCI bridge at device 0:0:0.  In the future, it may become
2001  * necessary to check other system attributes, such as the kenv values
2002  * that give the motherboard manufacturer and model number.
2003  */
2004 static int
2005 pci_msi_blacklisted(void)
2006 {
2007 	device_t dev;
2008 
2009 	if (!pci_honor_msi_blacklist)
2010 		return (0);
2011 
2012 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2013 	if (!(pcie_chipset || pcix_chipset)) {
2014 		if (vm_guest != VM_GUEST_NO) {
2015 			dev = pci_find_bsf(0, 0, 0);
2016 			if (dev != NULL)
2017 				return (pci_msi_vm_chipset(dev) == 0);
2018 		}
2019 		return (1);
2020 	}
2021 
2022 	dev = pci_find_bsf(0, 0, 0);
2023 	if (dev != NULL)
2024 		return (pci_msi_device_blacklisted(dev));
2025 	return (0);
2026 }
2027 
2028 /*
2029  * Attempt to allocate *count MSI messages.  The actual number allocated is
2030  * returned in *count.  After this function returns, each message will be
2031  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2032  */
2033 int
2034 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2035 {
2036 	struct pci_devinfo *dinfo = device_get_ivars(child);
2037 	pcicfgregs *cfg = &dinfo->cfg;
2038 	struct resource_list_entry *rle;
2039 	int actual, error, i, irqs[32];
2040 	uint16_t ctrl;
2041 
2042 	/* Don't let count == 0 get us into trouble. */
2043 	if (*count == 0)
2044 		return (EINVAL);
2045 
2046 	/* If rid 0 is allocated, then fail. */
2047 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2048 	if (rle != NULL && rle->res != NULL)
2049 		return (ENXIO);
2050 
2051 	/* Already have allocated messages? */
2052 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2053 		return (ENXIO);
2054 
2055 	/* If MSI is blacklisted for this system, fail. */
2056 	if (pci_msi_blacklisted())
2057 		return (ENXIO);
2058 
2059 	/* MSI capability present? */
2060 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2061 		return (ENODEV);
2062 
2063 	if (bootverbose)
2064 		device_printf(child,
2065 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2066 		    *count, cfg->msi.msi_msgnum);
2067 
2068 	/* Don't ask for more than the device supports. */
2069 	actual = min(*count, cfg->msi.msi_msgnum);
2070 
2071 	/* Don't ask for more than 32 messages. */
2072 	actual = min(actual, 32);
2073 
2074 	/* MSI requires power of 2 number of messages. */
2075 	if (!powerof2(actual))
2076 		return (EINVAL);
2077 
2078 	for (;;) {
2079 		/* Try to allocate N messages. */
2080 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2081 		    actual, irqs);
2082 		if (error == 0)
2083 			break;
2084 		if (actual == 1)
2085 			return (error);
2086 
2087 		/* Try N / 2. */
2088 		actual >>= 1;
2089 	}
2090 
2091 	/*
2092 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2093 	 * resources in the irqs[] array, so add new resources
2094 	 * starting at rid 1.
2095 	 */
2096 	for (i = 0; i < actual; i++)
2097 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2098 		    irqs[i], irqs[i], 1);
2099 
2100 	if (bootverbose) {
2101 		if (actual == 1)
2102 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2103 		else {
2104 			int run;
2105 
2106 			/*
2107 			 * Be fancy and try to print contiguous runs
2108 			 * of IRQ values as ranges.  'run' is true if
2109 			 * we are in a range.
2110 			 */
2111 			device_printf(child, "using IRQs %d", irqs[0]);
2112 			run = 0;
2113 			for (i = 1; i < actual; i++) {
2114 
2115 				/* Still in a run? */
2116 				if (irqs[i] == irqs[i - 1] + 1) {
2117 					run = 1;
2118 					continue;
2119 				}
2120 
2121 				/* Finish previous range. */
2122 				if (run) {
2123 					printf("-%d", irqs[i - 1]);
2124 					run = 0;
2125 				}
2126 
2127 				/* Start new range. */
2128 				printf(",%d", irqs[i]);
2129 			}
2130 
2131 			/* Unfinished range? */
2132 			if (run)
2133 				printf("-%d", irqs[actual - 1]);
2134 			printf(" for MSI\n");
2135 		}
2136 	}
2137 
2138 	/* Update control register with actual count. */
2139 	ctrl = cfg->msi.msi_ctrl;
2140 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2141 	ctrl |= (ffs(actual) - 1) << 4;
2142 	cfg->msi.msi_ctrl = ctrl;
2143 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2144 
2145 	/* Update counts of alloc'd messages. */
2146 	cfg->msi.msi_alloc = actual;
2147 	cfg->msi.msi_handlers = 0;
2148 	*count = actual;
2149 	return (0);
2150 }
2151 
2152 /* Release the MSI messages associated with this device. */
2153 int
2154 pci_release_msi_method(device_t dev, device_t child)
2155 {
2156 	struct pci_devinfo *dinfo = device_get_ivars(child);
2157 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2158 	struct resource_list_entry *rle;
2159 	int error, i, irqs[32];
2160 
2161 	/* Try MSI-X first. */
2162 	error = pci_release_msix(dev, child);
2163 	if (error != ENODEV)
2164 		return (error);
2165 
2166 	/* Do we have any messages to release? */
2167 	if (msi->msi_alloc == 0)
2168 		return (ENODEV);
2169 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2170 
2171 	/* Make sure none of the resources are allocated. */
2172 	if (msi->msi_handlers > 0)
2173 		return (EBUSY);
2174 	for (i = 0; i < msi->msi_alloc; i++) {
2175 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2176 		KASSERT(rle != NULL, ("missing MSI resource"));
2177 		if (rle->res != NULL)
2178 			return (EBUSY);
2179 		irqs[i] = rle->start;
2180 	}
2181 
2182 	/* Update control register with 0 count. */
2183 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2184 	    ("%s: MSI still enabled", __func__));
2185 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2186 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2187 	    msi->msi_ctrl, 2);
2188 
2189 	/* Release the messages. */
2190 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2191 	for (i = 0; i < msi->msi_alloc; i++)
2192 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2193 
2194 	/* Update alloc count. */
2195 	msi->msi_alloc = 0;
2196 	msi->msi_addr = 0;
2197 	msi->msi_data = 0;
2198 	return (0);
2199 }
2200 
2201 /*
2202  * Return the max supported MSI messages this device supports.
2203  * Basically, assuming the MD code can alloc messages, this function
2204  * should return the maximum value that pci_alloc_msi() can return.
2205  * Thus, it is subject to the tunables, etc.
2206  */
2207 int
2208 pci_msi_count_method(device_t dev, device_t child)
2209 {
2210 	struct pci_devinfo *dinfo = device_get_ivars(child);
2211 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2212 
2213 	if (pci_do_msi && msi->msi_location != 0)
2214 		return (msi->msi_msgnum);
2215 	return (0);
2216 }
2217 
2218 /* free pcicfgregs structure and all depending data structures */
2219 
2220 int
2221 pci_freecfg(struct pci_devinfo *dinfo)
2222 {
2223 	struct devlist *devlist_head;
2224 	struct pci_map *pm, *next;
2225 	int i;
2226 
2227 	devlist_head = &pci_devq;
2228 
2229 	if (dinfo->cfg.vpd.vpd_reg) {
2230 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2231 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2232 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2233 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2234 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2235 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2236 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2237 	}
2238 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2239 		free(pm, M_DEVBUF);
2240 	}
2241 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2242 	free(dinfo, M_DEVBUF);
2243 
2244 	/* increment the generation count */
2245 	pci_generation++;
2246 
2247 	/* we're losing one device */
2248 	pci_numdevs--;
2249 	return (0);
2250 }
2251 
2252 /*
2253  * PCI power manangement
2254  */
2255 int
2256 pci_set_powerstate_method(device_t dev, device_t child, int state)
2257 {
2258 	struct pci_devinfo *dinfo = device_get_ivars(child);
2259 	pcicfgregs *cfg = &dinfo->cfg;
2260 	uint16_t status;
2261 	int result, oldstate, highest, delay;
2262 
2263 	if (cfg->pp.pp_cap == 0)
2264 		return (EOPNOTSUPP);
2265 
2266 	/*
2267 	 * Optimize a no state change request away.  While it would be OK to
2268 	 * write to the hardware in theory, some devices have shown odd
2269 	 * behavior when going from D3 -> D3.
2270 	 */
2271 	oldstate = pci_get_powerstate(child);
2272 	if (oldstate == state)
2273 		return (0);
2274 
2275 	/*
2276 	 * The PCI power management specification states that after a state
2277 	 * transition between PCI power states, system software must
2278 	 * guarantee a minimal delay before the function accesses the device.
2279 	 * Compute the worst case delay that we need to guarantee before we
2280 	 * access the device.  Many devices will be responsive much more
2281 	 * quickly than this delay, but there are some that don't respond
2282 	 * instantly to state changes.  Transitions to/from D3 state require
2283 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2284 	 * is done below with DELAY rather than a sleeper function because
2285 	 * this function can be called from contexts where we cannot sleep.
2286 	 */
2287 	highest = (oldstate > state) ? oldstate : state;
2288 	if (highest == PCI_POWERSTATE_D3)
2289 	    delay = 10000;
2290 	else if (highest == PCI_POWERSTATE_D2)
2291 	    delay = 200;
2292 	else
2293 	    delay = 0;
2294 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2295 	    & ~PCIM_PSTAT_DMASK;
2296 	result = 0;
2297 	switch (state) {
2298 	case PCI_POWERSTATE_D0:
2299 		status |= PCIM_PSTAT_D0;
2300 		break;
2301 	case PCI_POWERSTATE_D1:
2302 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2303 			return (EOPNOTSUPP);
2304 		status |= PCIM_PSTAT_D1;
2305 		break;
2306 	case PCI_POWERSTATE_D2:
2307 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2308 			return (EOPNOTSUPP);
2309 		status |= PCIM_PSTAT_D2;
2310 		break;
2311 	case PCI_POWERSTATE_D3:
2312 		status |= PCIM_PSTAT_D3;
2313 		break;
2314 	default:
2315 		return (EINVAL);
2316 	}
2317 
2318 	if (bootverbose)
2319 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2320 		    state);
2321 
2322 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2323 	if (delay)
2324 		DELAY(delay);
2325 	return (0);
2326 }
2327 
2328 int
2329 pci_get_powerstate_method(device_t dev, device_t child)
2330 {
2331 	struct pci_devinfo *dinfo = device_get_ivars(child);
2332 	pcicfgregs *cfg = &dinfo->cfg;
2333 	uint16_t status;
2334 	int result;
2335 
2336 	if (cfg->pp.pp_cap != 0) {
2337 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2338 		switch (status & PCIM_PSTAT_DMASK) {
2339 		case PCIM_PSTAT_D0:
2340 			result = PCI_POWERSTATE_D0;
2341 			break;
2342 		case PCIM_PSTAT_D1:
2343 			result = PCI_POWERSTATE_D1;
2344 			break;
2345 		case PCIM_PSTAT_D2:
2346 			result = PCI_POWERSTATE_D2;
2347 			break;
2348 		case PCIM_PSTAT_D3:
2349 			result = PCI_POWERSTATE_D3;
2350 			break;
2351 		default:
2352 			result = PCI_POWERSTATE_UNKNOWN;
2353 			break;
2354 		}
2355 	} else {
2356 		/* No support, device is always at D0 */
2357 		result = PCI_POWERSTATE_D0;
2358 	}
2359 	return (result);
2360 }
2361 
2362 /*
2363  * Some convenience functions for PCI device drivers.
2364  */
2365 
2366 static __inline void
2367 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2368 {
2369 	uint16_t	command;
2370 
2371 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2372 	command |= bit;
2373 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2374 }
2375 
2376 static __inline void
2377 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2378 {
2379 	uint16_t	command;
2380 
2381 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2382 	command &= ~bit;
2383 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2384 }
2385 
2386 int
2387 pci_enable_busmaster_method(device_t dev, device_t child)
2388 {
2389 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2390 	return (0);
2391 }
2392 
2393 int
2394 pci_disable_busmaster_method(device_t dev, device_t child)
2395 {
2396 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2397 	return (0);
2398 }
2399 
2400 int
2401 pci_enable_io_method(device_t dev, device_t child, int space)
2402 {
2403 	uint16_t bit;
2404 
2405 	switch(space) {
2406 	case SYS_RES_IOPORT:
2407 		bit = PCIM_CMD_PORTEN;
2408 		break;
2409 	case SYS_RES_MEMORY:
2410 		bit = PCIM_CMD_MEMEN;
2411 		break;
2412 	default:
2413 		return (EINVAL);
2414 	}
2415 	pci_set_command_bit(dev, child, bit);
2416 	return (0);
2417 }
2418 
2419 int
2420 pci_disable_io_method(device_t dev, device_t child, int space)
2421 {
2422 	uint16_t bit;
2423 
2424 	switch(space) {
2425 	case SYS_RES_IOPORT:
2426 		bit = PCIM_CMD_PORTEN;
2427 		break;
2428 	case SYS_RES_MEMORY:
2429 		bit = PCIM_CMD_MEMEN;
2430 		break;
2431 	default:
2432 		return (EINVAL);
2433 	}
2434 	pci_clear_command_bit(dev, child, bit);
2435 	return (0);
2436 }
2437 
2438 /*
2439  * New style pci driver.  Parent device is either a pci-host-bridge or a
2440  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2441  */
2442 
2443 void
2444 pci_print_verbose(struct pci_devinfo *dinfo)
2445 {
2446 
2447 	if (bootverbose) {
2448 		pcicfgregs *cfg = &dinfo->cfg;
2449 
2450 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2451 		    cfg->vendor, cfg->device, cfg->revid);
2452 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2453 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2454 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2455 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2456 		    cfg->mfdev);
2457 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2458 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2459 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2460 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2461 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2462 		if (cfg->intpin > 0)
2463 			printf("\tintpin=%c, irq=%d\n",
2464 			    cfg->intpin +'a' -1, cfg->intline);
2465 		if (cfg->pp.pp_cap) {
2466 			uint16_t status;
2467 
2468 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2469 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2470 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2471 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2472 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2473 			    status & PCIM_PSTAT_DMASK);
2474 		}
2475 		if (cfg->msi.msi_location) {
2476 			int ctrl;
2477 
2478 			ctrl = cfg->msi.msi_ctrl;
2479 			printf("\tMSI supports %d message%s%s%s\n",
2480 			    cfg->msi.msi_msgnum,
2481 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2482 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2483 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2484 		}
2485 		if (cfg->msix.msix_location) {
2486 			printf("\tMSI-X supports %d message%s ",
2487 			    cfg->msix.msix_msgnum,
2488 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2489 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2490 				printf("in map 0x%x\n",
2491 				    cfg->msix.msix_table_bar);
2492 			else
2493 				printf("in maps 0x%x and 0x%x\n",
2494 				    cfg->msix.msix_table_bar,
2495 				    cfg->msix.msix_pba_bar);
2496 		}
2497 	}
2498 }
2499 
2500 static int
2501 pci_porten(device_t dev)
2502 {
2503 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2504 }
2505 
2506 static int
2507 pci_memen(device_t dev)
2508 {
2509 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2510 }
2511 
2512 static void
2513 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2514 {
2515 	struct pci_devinfo *dinfo;
2516 	pci_addr_t map, testval;
2517 	int ln2range;
2518 	uint16_t cmd;
2519 
2520 	/*
2521 	 * The device ROM BAR is special.  It is always a 32-bit
2522 	 * memory BAR.  Bit 0 is special and should not be set when
2523 	 * sizing the BAR.
2524 	 */
2525 	dinfo = device_get_ivars(dev);
2526 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2527 		map = pci_read_config(dev, reg, 4);
2528 		pci_write_config(dev, reg, 0xfffffffe, 4);
2529 		testval = pci_read_config(dev, reg, 4);
2530 		pci_write_config(dev, reg, map, 4);
2531 		*mapp = map;
2532 		*testvalp = testval;
2533 		return;
2534 	}
2535 
2536 	map = pci_read_config(dev, reg, 4);
2537 	ln2range = pci_maprange(map);
2538 	if (ln2range == 64)
2539 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2540 
2541 	/*
2542 	 * Disable decoding via the command register before
2543 	 * determining the BAR's length since we will be placing it in
2544 	 * a weird state.
2545 	 */
2546 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2547 	pci_write_config(dev, PCIR_COMMAND,
2548 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2549 
2550 	/*
2551 	 * Determine the BAR's length by writing all 1's.  The bottom
2552 	 * log_2(size) bits of the BAR will stick as 0 when we read
2553 	 * the value back.
2554 	 */
2555 	pci_write_config(dev, reg, 0xffffffff, 4);
2556 	testval = pci_read_config(dev, reg, 4);
2557 	if (ln2range == 64) {
2558 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2559 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2560 	}
2561 
2562 	/*
2563 	 * Restore the original value of the BAR.  We may have reprogrammed
2564 	 * the BAR of the low-level console device and when booting verbose,
2565 	 * we need the console device addressable.
2566 	 */
2567 	pci_write_config(dev, reg, map, 4);
2568 	if (ln2range == 64)
2569 		pci_write_config(dev, reg + 4, map >> 32, 4);
2570 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2571 
2572 	*mapp = map;
2573 	*testvalp = testval;
2574 }
2575 
2576 static void
2577 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2578 {
2579 	struct pci_devinfo *dinfo;
2580 	int ln2range;
2581 
2582 	/* The device ROM BAR is always a 32-bit memory BAR. */
2583 	dinfo = device_get_ivars(dev);
2584 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2585 		ln2range = 32;
2586 	else
2587 		ln2range = pci_maprange(pm->pm_value);
2588 	pci_write_config(dev, pm->pm_reg, base, 4);
2589 	if (ln2range == 64)
2590 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2591 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2592 	if (ln2range == 64)
2593 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2594 		    pm->pm_reg + 4, 4) << 32;
2595 }
2596 
2597 struct pci_map *
2598 pci_find_bar(device_t dev, int reg)
2599 {
2600 	struct pci_devinfo *dinfo;
2601 	struct pci_map *pm;
2602 
2603 	dinfo = device_get_ivars(dev);
2604 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2605 		if (pm->pm_reg == reg)
2606 			return (pm);
2607 	}
2608 	return (NULL);
2609 }
2610 
2611 int
2612 pci_bar_enabled(device_t dev, struct pci_map *pm)
2613 {
2614 	struct pci_devinfo *dinfo;
2615 	uint16_t cmd;
2616 
2617 	dinfo = device_get_ivars(dev);
2618 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2619 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2620 		return (0);
2621 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2622 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2623 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2624 	else
2625 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2626 }
2627 
2628 static struct pci_map *
2629 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2630 {
2631 	struct pci_devinfo *dinfo;
2632 	struct pci_map *pm, *prev;
2633 
2634 	dinfo = device_get_ivars(dev);
2635 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2636 	pm->pm_reg = reg;
2637 	pm->pm_value = value;
2638 	pm->pm_size = size;
2639 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2640 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2641 		    reg));
2642 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2643 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2644 			break;
2645 	}
2646 	if (prev != NULL)
2647 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2648 	else
2649 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2650 	return (pm);
2651 }
2652 
2653 static void
2654 pci_restore_bars(device_t dev)
2655 {
2656 	struct pci_devinfo *dinfo;
2657 	struct pci_map *pm;
2658 	int ln2range;
2659 
2660 	dinfo = device_get_ivars(dev);
2661 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2662 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2663 			ln2range = 32;
2664 		else
2665 			ln2range = pci_maprange(pm->pm_value);
2666 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2667 		if (ln2range == 64)
2668 			pci_write_config(dev, pm->pm_reg + 4,
2669 			    pm->pm_value >> 32, 4);
2670 	}
2671 }
2672 
2673 /*
2674  * Add a resource based on a pci map register. Return 1 if the map
2675  * register is a 32bit map register or 2 if it is a 64bit register.
2676  */
2677 static int
2678 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2679     int force, int prefetch)
2680 {
2681 	struct pci_map *pm;
2682 	pci_addr_t base, map, testval;
2683 	pci_addr_t start, end, count;
2684 	int barlen, basezero, maprange, mapsize, type;
2685 	uint16_t cmd;
2686 	struct resource *res;
2687 
2688 	/*
2689 	 * The BAR may already exist if the device is a CardBus card
2690 	 * whose CIS is stored in this BAR.
2691 	 */
2692 	pm = pci_find_bar(dev, reg);
2693 	if (pm != NULL) {
2694 		maprange = pci_maprange(pm->pm_value);
2695 		barlen = maprange == 64 ? 2 : 1;
2696 		return (barlen);
2697 	}
2698 
2699 	pci_read_bar(dev, reg, &map, &testval);
2700 	if (PCI_BAR_MEM(map)) {
2701 		type = SYS_RES_MEMORY;
2702 		if (map & PCIM_BAR_MEM_PREFETCH)
2703 			prefetch = 1;
2704 	} else
2705 		type = SYS_RES_IOPORT;
2706 	mapsize = pci_mapsize(testval);
2707 	base = pci_mapbase(map);
2708 #ifdef __PCI_BAR_ZERO_VALID
2709 	basezero = 0;
2710 #else
2711 	basezero = base == 0;
2712 #endif
2713 	maprange = pci_maprange(map);
2714 	barlen = maprange == 64 ? 2 : 1;
2715 
2716 	/*
2717 	 * For I/O registers, if bottom bit is set, and the next bit up
2718 	 * isn't clear, we know we have a BAR that doesn't conform to the
2719 	 * spec, so ignore it.  Also, sanity check the size of the data
2720 	 * areas to the type of memory involved.  Memory must be at least
2721 	 * 16 bytes in size, while I/O ranges must be at least 4.
2722 	 */
2723 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2724 		return (barlen);
2725 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2726 	    (type == SYS_RES_IOPORT && mapsize < 2))
2727 		return (barlen);
2728 
2729 	/* Save a record of this BAR. */
2730 	pm = pci_add_bar(dev, reg, map, mapsize);
2731 	if (bootverbose) {
2732 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2733 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2734 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2735 			printf(", port disabled\n");
2736 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2737 			printf(", memory disabled\n");
2738 		else
2739 			printf(", enabled\n");
2740 	}
2741 
2742 	/*
2743 	 * If base is 0, then we have problems if this architecture does
2744 	 * not allow that.  It is best to ignore such entries for the
2745 	 * moment.  These will be allocated later if the driver specifically
2746 	 * requests them.  However, some removable busses look better when
2747 	 * all resources are allocated, so allow '0' to be overriden.
2748 	 *
2749 	 * Similarly treat maps whose values is the same as the test value
2750 	 * read back.  These maps have had all f's written to them by the
2751 	 * BIOS in an attempt to disable the resources.
2752 	 */
2753 	if (!force && (basezero || map == testval))
2754 		return (barlen);
2755 	if ((u_long)base != base) {
2756 		device_printf(bus,
2757 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2758 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2759 		    pci_get_function(dev), reg);
2760 		return (barlen);
2761 	}
2762 
2763 	/*
2764 	 * This code theoretically does the right thing, but has
2765 	 * undesirable side effects in some cases where peripherals
2766 	 * respond oddly to having these bits enabled.  Let the user
2767 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2768 	 * default).
2769 	 */
2770 	if (pci_enable_io_modes) {
2771 		/* Turn on resources that have been left off by a lazy BIOS */
2772 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2773 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2774 			cmd |= PCIM_CMD_PORTEN;
2775 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2776 		}
2777 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2778 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2779 			cmd |= PCIM_CMD_MEMEN;
2780 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2781 		}
2782 	} else {
2783 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2784 			return (barlen);
2785 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2786 			return (barlen);
2787 	}
2788 
2789 	count = (pci_addr_t)1 << mapsize;
2790 	if (basezero || base == pci_mapbase(testval)) {
2791 		start = 0;	/* Let the parent decide. */
2792 		end = ~0ul;
2793 	} else {
2794 		start = base;
2795 		end = base + count - 1;
2796 	}
2797 	resource_list_add(rl, type, reg, start, end, count);
2798 
2799 	/*
2800 	 * Try to allocate the resource for this BAR from our parent
2801 	 * so that this resource range is already reserved.  The
2802 	 * driver for this device will later inherit this resource in
2803 	 * pci_alloc_resource().
2804 	 */
2805 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2806 	    prefetch ? RF_PREFETCHABLE : 0);
2807 	if (res == NULL) {
2808 		/*
2809 		 * If the allocation fails, clear the BAR and delete
2810 		 * the resource list entry to force
2811 		 * pci_alloc_resource() to allocate resources from the
2812 		 * parent.
2813 		 */
2814 		resource_list_delete(rl, type, reg);
2815 		start = 0;
2816 	} else
2817 		start = rman_get_start(res);
2818 	pci_write_bar(dev, pm, start);
2819 	return (barlen);
2820 }
2821 
2822 /*
2823  * For ATA devices we need to decide early what addressing mode to use.
2824  * Legacy demands that the primary and secondary ATA ports sits on the
2825  * same addresses that old ISA hardware did. This dictates that we use
2826  * those addresses and ignore the BAR's if we cannot set PCI native
2827  * addressing mode.
2828  */
2829 static void
2830 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2831     uint32_t prefetchmask)
2832 {
2833 	struct resource *r;
2834 	int rid, type, progif;
2835 #if 0
2836 	/* if this device supports PCI native addressing use it */
2837 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2838 	if ((progif & 0x8a) == 0x8a) {
2839 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2840 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2841 			printf("Trying ATA native PCI addressing mode\n");
2842 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2843 		}
2844 	}
2845 #endif
2846 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2847 	type = SYS_RES_IOPORT;
2848 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2849 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2850 		    prefetchmask & (1 << 0));
2851 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2852 		    prefetchmask & (1 << 1));
2853 	} else {
2854 		rid = PCIR_BAR(0);
2855 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2856 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2857 		    0x1f7, 8, 0);
2858 		rid = PCIR_BAR(1);
2859 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2860 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2861 		    0x3f6, 1, 0);
2862 	}
2863 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2864 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2865 		    prefetchmask & (1 << 2));
2866 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2867 		    prefetchmask & (1 << 3));
2868 	} else {
2869 		rid = PCIR_BAR(2);
2870 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2871 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2872 		    0x177, 8, 0);
2873 		rid = PCIR_BAR(3);
2874 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2875 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2876 		    0x376, 1, 0);
2877 	}
2878 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2879 	    prefetchmask & (1 << 4));
2880 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2881 	    prefetchmask & (1 << 5));
2882 }
2883 
2884 static void
2885 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2886 {
2887 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2888 	pcicfgregs *cfg = &dinfo->cfg;
2889 	char tunable_name[64];
2890 	int irq;
2891 
2892 	/* Has to have an intpin to have an interrupt. */
2893 	if (cfg->intpin == 0)
2894 		return;
2895 
2896 	/* Let the user override the IRQ with a tunable. */
2897 	irq = PCI_INVALID_IRQ;
2898 	snprintf(tunable_name, sizeof(tunable_name),
2899 	    "hw.pci%d.%d.%d.INT%c.irq",
2900 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2901 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2902 		irq = PCI_INVALID_IRQ;
2903 
2904 	/*
2905 	 * If we didn't get an IRQ via the tunable, then we either use the
2906 	 * IRQ value in the intline register or we ask the bus to route an
2907 	 * interrupt for us.  If force_route is true, then we only use the
2908 	 * value in the intline register if the bus was unable to assign an
2909 	 * IRQ.
2910 	 */
2911 	if (!PCI_INTERRUPT_VALID(irq)) {
2912 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2913 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2914 		if (!PCI_INTERRUPT_VALID(irq))
2915 			irq = cfg->intline;
2916 	}
2917 
2918 	/* If after all that we don't have an IRQ, just bail. */
2919 	if (!PCI_INTERRUPT_VALID(irq))
2920 		return;
2921 
2922 	/* Update the config register if it changed. */
2923 	if (irq != cfg->intline) {
2924 		cfg->intline = irq;
2925 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2926 	}
2927 
2928 	/* Add this IRQ as rid 0 interrupt resource. */
2929 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2930 }
2931 
2932 /* Perform early OHCI takeover from SMM. */
2933 static void
2934 ohci_early_takeover(device_t self)
2935 {
2936 	struct resource *res;
2937 	uint32_t ctl;
2938 	int rid;
2939 	int i;
2940 
2941 	rid = PCIR_BAR(0);
2942 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2943 	if (res == NULL)
2944 		return;
2945 
2946 	ctl = bus_read_4(res, OHCI_CONTROL);
2947 	if (ctl & OHCI_IR) {
2948 		if (bootverbose)
2949 			printf("ohci early: "
2950 			    "SMM active, request owner change\n");
2951 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2952 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2953 			DELAY(1000);
2954 			ctl = bus_read_4(res, OHCI_CONTROL);
2955 		}
2956 		if (ctl & OHCI_IR) {
2957 			if (bootverbose)
2958 				printf("ohci early: "
2959 				    "SMM does not respond, resetting\n");
2960 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2961 		}
2962 		/* Disable interrupts */
2963 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2964 	}
2965 
2966 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2967 }
2968 
2969 /* Perform early UHCI takeover from SMM. */
2970 static void
2971 uhci_early_takeover(device_t self)
2972 {
2973 	struct resource *res;
2974 	int rid;
2975 
2976 	/*
2977 	 * Set the PIRQD enable bit and switch off all the others. We don't
2978 	 * want legacy support to interfere with us XXX Does this also mean
2979 	 * that the BIOS won't touch the keyboard anymore if it is connected
2980 	 * to the ports of the root hub?
2981 	 */
2982 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2983 
2984 	/* Disable interrupts */
2985 	rid = PCI_UHCI_BASE_REG;
2986 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2987 	if (res != NULL) {
2988 		bus_write_2(res, UHCI_INTR, 0);
2989 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2990 	}
2991 }
2992 
2993 /* Perform early EHCI takeover from SMM. */
2994 static void
2995 ehci_early_takeover(device_t self)
2996 {
2997 	struct resource *res;
2998 	uint32_t cparams;
2999 	uint32_t eec;
3000 	uint8_t eecp;
3001 	uint8_t bios_sem;
3002 	uint8_t offs;
3003 	int rid;
3004 	int i;
3005 
3006 	rid = PCIR_BAR(0);
3007 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3008 	if (res == NULL)
3009 		return;
3010 
3011 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3012 
3013 	/* Synchronise with the BIOS if it owns the controller. */
3014 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3015 	    eecp = EHCI_EECP_NEXT(eec)) {
3016 		eec = pci_read_config(self, eecp, 4);
3017 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3018 			continue;
3019 		}
3020 		bios_sem = pci_read_config(self, eecp +
3021 		    EHCI_LEGSUP_BIOS_SEM, 1);
3022 		if (bios_sem == 0) {
3023 			continue;
3024 		}
3025 		if (bootverbose)
3026 			printf("ehci early: "
3027 			    "SMM active, request owner change\n");
3028 
3029 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3030 
3031 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3032 			DELAY(1000);
3033 			bios_sem = pci_read_config(self, eecp +
3034 			    EHCI_LEGSUP_BIOS_SEM, 1);
3035 		}
3036 
3037 		if (bios_sem != 0) {
3038 			if (bootverbose)
3039 				printf("ehci early: "
3040 				    "SMM does not respond\n");
3041 		}
3042 		/* Disable interrupts */
3043 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3044 		bus_write_4(res, offs + EHCI_USBINTR, 0);
3045 	}
3046 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3047 }
3048 
3049 /* Perform early XHCI takeover from SMM. */
3050 static void
3051 xhci_early_takeover(device_t self)
3052 {
3053 	struct resource *res;
3054 	uint32_t cparams;
3055 	uint32_t eec;
3056 	uint8_t eecp;
3057 	uint8_t bios_sem;
3058 	uint8_t offs;
3059 	int rid;
3060 	int i;
3061 
3062 	rid = PCIR_BAR(0);
3063 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3064 	if (res == NULL)
3065 		return;
3066 
3067 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3068 
3069 	eec = -1;
3070 
3071 	/* Synchronise with the BIOS if it owns the controller. */
3072 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3073 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3074 		eec = bus_read_4(res, eecp);
3075 
3076 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3077 			continue;
3078 
3079 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3080 		if (bios_sem == 0)
3081 			continue;
3082 
3083 		if (bootverbose)
3084 			printf("xhci early: "
3085 			    "SMM active, request owner change\n");
3086 
3087 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3088 
3089 		/* wait a maximum of 5 second */
3090 
3091 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3092 			DELAY(1000);
3093 			bios_sem = bus_read_1(res, eecp +
3094 			    XHCI_XECP_BIOS_SEM);
3095 		}
3096 
3097 		if (bios_sem != 0) {
3098 			if (bootverbose)
3099 				printf("xhci early: "
3100 				    "SMM does not respond\n");
3101 		}
3102 
3103 		/* Disable interrupts */
3104 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3105 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3106 		bus_read_4(res, offs + XHCI_USBSTS);
3107 	}
3108 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3109 }
3110 
3111 void
3112 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3113 {
3114 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3115 	pcicfgregs *cfg = &dinfo->cfg;
3116 	struct resource_list *rl = &dinfo->resources;
3117 	const struct pci_quirk *q;
3118 	int i;
3119 
3120 	/* ATA devices needs special map treatment */
3121 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3122 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3123 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3124 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3125 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3126 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3127 	else
3128 		for (i = 0; i < cfg->nummaps;)
3129 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3130 			    prefetchmask & (1 << i));
3131 
3132 	/*
3133 	 * Add additional, quirked resources.
3134 	 */
3135 	for (q = &pci_quirks[0]; q->devid; q++) {
3136 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3137 		    && q->type == PCI_QUIRK_MAP_REG)
3138 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3139 	}
3140 
3141 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3142 #ifdef __PCI_REROUTE_INTERRUPT
3143 		/*
3144 		 * Try to re-route interrupts. Sometimes the BIOS or
3145 		 * firmware may leave bogus values in these registers.
3146 		 * If the re-route fails, then just stick with what we
3147 		 * have.
3148 		 */
3149 		pci_assign_interrupt(bus, dev, 1);
3150 #else
3151 		pci_assign_interrupt(bus, dev, 0);
3152 #endif
3153 	}
3154 
3155 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3156 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3157 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3158 			xhci_early_takeover(dev);
3159 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3160 			ehci_early_takeover(dev);
3161 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3162 			ohci_early_takeover(dev);
3163 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3164 			uhci_early_takeover(dev);
3165 	}
3166 }
3167 
3168 void
3169 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3170 {
3171 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3172 	device_t pcib = device_get_parent(dev);
3173 	struct pci_devinfo *dinfo;
3174 	int maxslots;
3175 	int s, f, pcifunchigh;
3176 	uint8_t hdrtype;
3177 
3178 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3179 	    ("dinfo_size too small"));
3180 	maxslots = PCIB_MAXSLOTS(pcib);
3181 	for (s = 0; s <= maxslots; s++) {
3182 		pcifunchigh = 0;
3183 		f = 0;
3184 		DELAY(1);
3185 		hdrtype = REG(PCIR_HDRTYPE, 1);
3186 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3187 			continue;
3188 		if (hdrtype & PCIM_MFDEV)
3189 			pcifunchigh = PCI_FUNCMAX;
3190 		for (f = 0; f <= pcifunchigh; f++) {
3191 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3192 			    dinfo_size);
3193 			if (dinfo != NULL) {
3194 				pci_add_child(dev, dinfo);
3195 			}
3196 		}
3197 	}
3198 #undef REG
3199 }
3200 
3201 void
3202 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3203 {
3204 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3205 	device_set_ivars(dinfo->cfg.dev, dinfo);
3206 	resource_list_init(&dinfo->resources);
3207 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3208 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3209 	pci_print_verbose(dinfo);
3210 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3211 }
3212 
3213 static int
3214 pci_probe(device_t dev)
3215 {
3216 
3217 	device_set_desc(dev, "PCI bus");
3218 
3219 	/* Allow other subclasses to override this driver. */
3220 	return (BUS_PROBE_GENERIC);
3221 }
3222 
3223 int
3224 pci_attach_common(device_t dev)
3225 {
3226 	struct pci_softc *sc;
3227 	int busno, domain;
3228 #ifdef PCI_DMA_BOUNDARY
3229 	int error, tag_valid;
3230 #endif
3231 
3232 	sc = device_get_softc(dev);
3233 	domain = pcib_get_domain(dev);
3234 	busno = pcib_get_bus(dev);
3235 	if (bootverbose)
3236 		device_printf(dev, "domain=%d, physical bus=%d\n",
3237 		    domain, busno);
3238 #ifdef PCI_DMA_BOUNDARY
3239 	tag_valid = 0;
3240 	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3241 	    devclass_find("pci")) {
3242 		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3243 		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3244 		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3245 		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3246 		if (error)
3247 			device_printf(dev, "Failed to create DMA tag: %d\n",
3248 			    error);
3249 		else
3250 			tag_valid = 1;
3251 	}
3252 	if (!tag_valid)
3253 #endif
3254 		sc->sc_dma_tag = bus_get_dma_tag(dev);
3255 	return (0);
3256 }
3257 
3258 static int
3259 pci_attach(device_t dev)
3260 {
3261 	int busno, domain, error;
3262 
3263 	error = pci_attach_common(dev);
3264 	if (error)
3265 		return (error);
3266 
3267 	/*
3268 	 * Since there can be multiple independantly numbered PCI
3269 	 * busses on systems with multiple PCI domains, we can't use
3270 	 * the unit number to decide which bus we are probing. We ask
3271 	 * the parent pcib what our domain and bus numbers are.
3272 	 */
3273 	domain = pcib_get_domain(dev);
3274 	busno = pcib_get_bus(dev);
3275 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3276 	return (bus_generic_attach(dev));
3277 }
3278 
3279 static void
3280 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3281     int state)
3282 {
3283 	device_t child, pcib;
3284 	struct pci_devinfo *dinfo;
3285 	int dstate, i;
3286 
3287 	/*
3288 	 * Set the device to the given state.  If the firmware suggests
3289 	 * a different power state, use it instead.  If power management
3290 	 * is not present, the firmware is responsible for managing
3291 	 * device power.  Skip children who aren't attached since they
3292 	 * are handled separately.
3293 	 */
3294 	pcib = device_get_parent(dev);
3295 	for (i = 0; i < numdevs; i++) {
3296 		child = devlist[i];
3297 		dinfo = device_get_ivars(child);
3298 		dstate = state;
3299 		if (device_is_attached(child) &&
3300 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3301 			pci_set_powerstate(child, dstate);
3302 	}
3303 }
3304 
3305 int
3306 pci_suspend(device_t dev)
3307 {
3308 	device_t child, *devlist;
3309 	struct pci_devinfo *dinfo;
3310 	int error, i, numdevs;
3311 
3312 	/*
3313 	 * Save the PCI configuration space for each child and set the
3314 	 * device in the appropriate power state for this sleep state.
3315 	 */
3316 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3317 		return (error);
3318 	for (i = 0; i < numdevs; i++) {
3319 		child = devlist[i];
3320 		dinfo = device_get_ivars(child);
3321 		pci_cfg_save(child, dinfo, 0);
3322 	}
3323 
3324 	/* Suspend devices before potentially powering them down. */
3325 	error = bus_generic_suspend(dev);
3326 	if (error) {
3327 		free(devlist, M_TEMP);
3328 		return (error);
3329 	}
3330 	if (pci_do_power_suspend)
3331 		pci_set_power_children(dev, devlist, numdevs,
3332 		    PCI_POWERSTATE_D3);
3333 	free(devlist, M_TEMP);
3334 	return (0);
3335 }
3336 
3337 int
3338 pci_resume(device_t dev)
3339 {
3340 	device_t child, *devlist;
3341 	struct pci_devinfo *dinfo;
3342 	int error, i, numdevs;
3343 
3344 	/*
3345 	 * Set each child to D0 and restore its PCI configuration space.
3346 	 */
3347 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3348 		return (error);
3349 	if (pci_do_power_resume)
3350 		pci_set_power_children(dev, devlist, numdevs,
3351 		    PCI_POWERSTATE_D0);
3352 
3353 	/* Now the device is powered up, restore its config space. */
3354 	for (i = 0; i < numdevs; i++) {
3355 		child = devlist[i];
3356 		dinfo = device_get_ivars(child);
3357 
3358 		pci_cfg_restore(child, dinfo);
3359 		if (!device_is_attached(child))
3360 			pci_cfg_save(child, dinfo, 1);
3361 	}
3362 
3363 	/*
3364 	 * Resume critical devices first, then everything else later.
3365 	 */
3366 	for (i = 0; i < numdevs; i++) {
3367 		child = devlist[i];
3368 		switch (pci_get_class(child)) {
3369 		case PCIC_DISPLAY:
3370 		case PCIC_MEMORY:
3371 		case PCIC_BRIDGE:
3372 		case PCIC_BASEPERIPH:
3373 			DEVICE_RESUME(child);
3374 			break;
3375 		}
3376 	}
3377 	for (i = 0; i < numdevs; i++) {
3378 		child = devlist[i];
3379 		switch (pci_get_class(child)) {
3380 		case PCIC_DISPLAY:
3381 		case PCIC_MEMORY:
3382 		case PCIC_BRIDGE:
3383 		case PCIC_BASEPERIPH:
3384 			break;
3385 		default:
3386 			DEVICE_RESUME(child);
3387 		}
3388 	}
3389 	free(devlist, M_TEMP);
3390 	return (0);
3391 }
3392 
3393 static void
3394 pci_load_vendor_data(void)
3395 {
3396 	caddr_t data;
3397 	void *ptr;
3398 	size_t sz;
3399 
3400 	data = preload_search_by_type("pci_vendor_data");
3401 	if (data != NULL) {
3402 		ptr = preload_fetch_addr(data);
3403 		sz = preload_fetch_size(data);
3404 		if (ptr != NULL && sz != 0) {
3405 			pci_vendordata = ptr;
3406 			pci_vendordata_size = sz;
3407 			/* terminate the database */
3408 			pci_vendordata[pci_vendordata_size] = '\n';
3409 		}
3410 	}
3411 }
3412 
3413 void
3414 pci_driver_added(device_t dev, driver_t *driver)
3415 {
3416 	int numdevs;
3417 	device_t *devlist;
3418 	device_t child;
3419 	struct pci_devinfo *dinfo;
3420 	int i;
3421 
3422 	if (bootverbose)
3423 		device_printf(dev, "driver added\n");
3424 	DEVICE_IDENTIFY(driver, dev);
3425 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3426 		return;
3427 	for (i = 0; i < numdevs; i++) {
3428 		child = devlist[i];
3429 		if (device_get_state(child) != DS_NOTPRESENT)
3430 			continue;
3431 		dinfo = device_get_ivars(child);
3432 		pci_print_verbose(dinfo);
3433 		if (bootverbose)
3434 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3435 		pci_cfg_restore(child, dinfo);
3436 		if (device_probe_and_attach(child) != 0)
3437 			pci_cfg_save(child, dinfo, 1);
3438 	}
3439 	free(devlist, M_TEMP);
3440 }
3441 
3442 int
3443 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3444     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3445 {
3446 	struct pci_devinfo *dinfo;
3447 	struct msix_table_entry *mte;
3448 	struct msix_vector *mv;
3449 	uint64_t addr;
3450 	uint32_t data;
3451 	void *cookie;
3452 	int error, rid;
3453 
3454 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3455 	    arg, &cookie);
3456 	if (error)
3457 		return (error);
3458 
3459 	/* If this is not a direct child, just bail out. */
3460 	if (device_get_parent(child) != dev) {
3461 		*cookiep = cookie;
3462 		return(0);
3463 	}
3464 
3465 	rid = rman_get_rid(irq);
3466 	if (rid == 0) {
3467 		/* Make sure that INTx is enabled */
3468 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3469 	} else {
3470 		/*
3471 		 * Check to see if the interrupt is MSI or MSI-X.
3472 		 * Ask our parent to map the MSI and give
3473 		 * us the address and data register values.
3474 		 * If we fail for some reason, teardown the
3475 		 * interrupt handler.
3476 		 */
3477 		dinfo = device_get_ivars(child);
3478 		if (dinfo->cfg.msi.msi_alloc > 0) {
3479 			if (dinfo->cfg.msi.msi_addr == 0) {
3480 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3481 			    ("MSI has handlers, but vectors not mapped"));
3482 				error = PCIB_MAP_MSI(device_get_parent(dev),
3483 				    child, rman_get_start(irq), &addr, &data);
3484 				if (error)
3485 					goto bad;
3486 				dinfo->cfg.msi.msi_addr = addr;
3487 				dinfo->cfg.msi.msi_data = data;
3488 			}
3489 			if (dinfo->cfg.msi.msi_handlers == 0)
3490 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3491 				    dinfo->cfg.msi.msi_data);
3492 			dinfo->cfg.msi.msi_handlers++;
3493 		} else {
3494 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3495 			    ("No MSI or MSI-X interrupts allocated"));
3496 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3497 			    ("MSI-X index too high"));
3498 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3499 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3500 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3501 			KASSERT(mv->mv_irq == rman_get_start(irq),
3502 			    ("IRQ mismatch"));
3503 			if (mv->mv_address == 0) {
3504 				KASSERT(mte->mte_handlers == 0,
3505 		    ("MSI-X table entry has handlers, but vector not mapped"));
3506 				error = PCIB_MAP_MSI(device_get_parent(dev),
3507 				    child, rman_get_start(irq), &addr, &data);
3508 				if (error)
3509 					goto bad;
3510 				mv->mv_address = addr;
3511 				mv->mv_data = data;
3512 			}
3513 			if (mte->mte_handlers == 0) {
3514 				pci_enable_msix(child, rid - 1, mv->mv_address,
3515 				    mv->mv_data);
3516 				pci_unmask_msix(child, rid - 1);
3517 			}
3518 			mte->mte_handlers++;
3519 		}
3520 
3521 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3522 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3523 	bad:
3524 		if (error) {
3525 			(void)bus_generic_teardown_intr(dev, child, irq,
3526 			    cookie);
3527 			return (error);
3528 		}
3529 	}
3530 	*cookiep = cookie;
3531 	return (0);
3532 }
3533 
3534 int
3535 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3536     void *cookie)
3537 {
3538 	struct msix_table_entry *mte;
3539 	struct resource_list_entry *rle;
3540 	struct pci_devinfo *dinfo;
3541 	int error, rid;
3542 
3543 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3544 		return (EINVAL);
3545 
3546 	/* If this isn't a direct child, just bail out */
3547 	if (device_get_parent(child) != dev)
3548 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3549 
3550 	rid = rman_get_rid(irq);
3551 	if (rid == 0) {
3552 		/* Mask INTx */
3553 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3554 	} else {
3555 		/*
3556 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3557 		 * decrement the appropriate handlers count and mask the
3558 		 * MSI-X message, or disable MSI messages if the count
3559 		 * drops to 0.
3560 		 */
3561 		dinfo = device_get_ivars(child);
3562 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3563 		if (rle->res != irq)
3564 			return (EINVAL);
3565 		if (dinfo->cfg.msi.msi_alloc > 0) {
3566 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3567 			    ("MSI-X index too high"));
3568 			if (dinfo->cfg.msi.msi_handlers == 0)
3569 				return (EINVAL);
3570 			dinfo->cfg.msi.msi_handlers--;
3571 			if (dinfo->cfg.msi.msi_handlers == 0)
3572 				pci_disable_msi(child);
3573 		} else {
3574 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3575 			    ("No MSI or MSI-X interrupts allocated"));
3576 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3577 			    ("MSI-X index too high"));
3578 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3579 			if (mte->mte_handlers == 0)
3580 				return (EINVAL);
3581 			mte->mte_handlers--;
3582 			if (mte->mte_handlers == 0)
3583 				pci_mask_msix(child, rid - 1);
3584 		}
3585 	}
3586 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3587 	if (rid > 0)
3588 		KASSERT(error == 0,
3589 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3590 	return (error);
3591 }
3592 
3593 int
3594 pci_print_child(device_t dev, device_t child)
3595 {
3596 	struct pci_devinfo *dinfo;
3597 	struct resource_list *rl;
3598 	int retval = 0;
3599 
3600 	dinfo = device_get_ivars(child);
3601 	rl = &dinfo->resources;
3602 
3603 	retval += bus_print_child_header(dev, child);
3604 
3605 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3606 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3607 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3608 	if (device_get_flags(dev))
3609 		retval += printf(" flags %#x", device_get_flags(dev));
3610 
3611 	retval += printf(" at device %d.%d", pci_get_slot(child),
3612 	    pci_get_function(child));
3613 
3614 	retval += bus_print_child_footer(dev, child);
3615 
3616 	return (retval);
3617 }
3618 
3619 static struct
3620 {
3621 	int	class;
3622 	int	subclass;
3623 	char	*desc;
3624 } pci_nomatch_tab[] = {
3625 	{PCIC_OLD,		-1,			"old"},
3626 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3627 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3628 	{PCIC_STORAGE,		-1,			"mass storage"},
3629 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3630 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3631 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3632 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3633 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3634 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3635 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3636 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3637 	{PCIC_NETWORK,		-1,			"network"},
3638 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3639 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3640 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3641 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3642 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3643 	{PCIC_DISPLAY,		-1,			"display"},
3644 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3645 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3646 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3647 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3648 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3649 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3650 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3651 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3652 	{PCIC_MEMORY,		-1,			"memory"},
3653 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3654 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3655 	{PCIC_BRIDGE,		-1,			"bridge"},
3656 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3657 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3658 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3659 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3660 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3661 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3662 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3663 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3664 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3665 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3666 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3667 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3668 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3669 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3670 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3671 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3672 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3673 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3674 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3675 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3676 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3677 	{PCIC_INPUTDEV,		-1,			"input device"},
3678 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3679 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3680 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3681 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3682 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3683 	{PCIC_DOCKING,		-1,			"docking station"},
3684 	{PCIC_PROCESSOR,	-1,			"processor"},
3685 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3686 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3687 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3688 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3689 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3690 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3691 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3692 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3693 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3694 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3695 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3696 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3697 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3698 	{PCIC_SATCOM,		-1,			"satellite communication"},
3699 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3700 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3701 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3702 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3703 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3704 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3705 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3706 	{PCIC_DASP,		-1,			"dasp"},
3707 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3708 	{0, 0,		NULL}
3709 };
3710 
3711 void
3712 pci_probe_nomatch(device_t dev, device_t child)
3713 {
3714 	int	i;
3715 	char	*cp, *scp, *device;
3716 
3717 	/*
3718 	 * Look for a listing for this device in a loaded device database.
3719 	 */
3720 	if ((device = pci_describe_device(child)) != NULL) {
3721 		device_printf(dev, "<%s>", device);
3722 		free(device, M_DEVBUF);
3723 	} else {
3724 		/*
3725 		 * Scan the class/subclass descriptions for a general
3726 		 * description.
3727 		 */
3728 		cp = "unknown";
3729 		scp = NULL;
3730 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3731 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3732 				if (pci_nomatch_tab[i].subclass == -1) {
3733 					cp = pci_nomatch_tab[i].desc;
3734 				} else if (pci_nomatch_tab[i].subclass ==
3735 				    pci_get_subclass(child)) {
3736 					scp = pci_nomatch_tab[i].desc;
3737 				}
3738 			}
3739 		}
3740 		device_printf(dev, "<%s%s%s>",
3741 		    cp ? cp : "",
3742 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3743 		    scp ? scp : "");
3744 	}
3745 	printf(" at device %d.%d (no driver attached)\n",
3746 	    pci_get_slot(child), pci_get_function(child));
3747 	pci_cfg_save(child, device_get_ivars(child), 1);
3748 	return;
3749 }
3750 
3751 /*
3752  * Parse the PCI device database, if loaded, and return a pointer to a
3753  * description of the device.
3754  *
3755  * The database is flat text formatted as follows:
3756  *
3757  * Any line not in a valid format is ignored.
3758  * Lines are terminated with newline '\n' characters.
3759  *
3760  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3761  * the vendor name.
3762  *
3763  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3764  * - devices cannot be listed without a corresponding VENDOR line.
3765  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3766  * another TAB, then the device name.
3767  */
3768 
3769 /*
3770  * Assuming (ptr) points to the beginning of a line in the database,
3771  * return the vendor or device and description of the next entry.
3772  * The value of (vendor) or (device) inappropriate for the entry type
3773  * is set to -1.  Returns nonzero at the end of the database.
3774  *
3775  * Note that this is slightly unrobust in the face of corrupt data;
3776  * we attempt to safeguard against this by spamming the end of the
3777  * database with a newline when we initialise.
3778  */
3779 static int
3780 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3781 {
3782 	char	*cp = *ptr;
3783 	int	left;
3784 
3785 	*device = -1;
3786 	*vendor = -1;
3787 	**desc = '\0';
3788 	for (;;) {
3789 		left = pci_vendordata_size - (cp - pci_vendordata);
3790 		if (left <= 0) {
3791 			*ptr = cp;
3792 			return(1);
3793 		}
3794 
3795 		/* vendor entry? */
3796 		if (*cp != '\t' &&
3797 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3798 			break;
3799 		/* device entry? */
3800 		if (*cp == '\t' &&
3801 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3802 			break;
3803 
3804 		/* skip to next line */
3805 		while (*cp != '\n' && left > 0) {
3806 			cp++;
3807 			left--;
3808 		}
3809 		if (*cp == '\n') {
3810 			cp++;
3811 			left--;
3812 		}
3813 	}
3814 	/* skip to next line */
3815 	while (*cp != '\n' && left > 0) {
3816 		cp++;
3817 		left--;
3818 	}
3819 	if (*cp == '\n' && left > 0)
3820 		cp++;
3821 	*ptr = cp;
3822 	return(0);
3823 }
3824 
3825 static char *
3826 pci_describe_device(device_t dev)
3827 {
3828 	int	vendor, device;
3829 	char	*desc, *vp, *dp, *line;
3830 
3831 	desc = vp = dp = NULL;
3832 
3833 	/*
3834 	 * If we have no vendor data, we can't do anything.
3835 	 */
3836 	if (pci_vendordata == NULL)
3837 		goto out;
3838 
3839 	/*
3840 	 * Scan the vendor data looking for this device
3841 	 */
3842 	line = pci_vendordata;
3843 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3844 		goto out;
3845 	for (;;) {
3846 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3847 			goto out;
3848 		if (vendor == pci_get_vendor(dev))
3849 			break;
3850 	}
3851 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3852 		goto out;
3853 	for (;;) {
3854 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3855 			*dp = 0;
3856 			break;
3857 		}
3858 		if (vendor != -1) {
3859 			*dp = 0;
3860 			break;
3861 		}
3862 		if (device == pci_get_device(dev))
3863 			break;
3864 	}
3865 	if (dp[0] == '\0')
3866 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3867 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3868 	    NULL)
3869 		sprintf(desc, "%s, %s", vp, dp);
3870  out:
3871 	if (vp != NULL)
3872 		free(vp, M_DEVBUF);
3873 	if (dp != NULL)
3874 		free(dp, M_DEVBUF);
3875 	return(desc);
3876 }
3877 
3878 int
3879 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3880 {
3881 	struct pci_devinfo *dinfo;
3882 	pcicfgregs *cfg;
3883 
3884 	dinfo = device_get_ivars(child);
3885 	cfg = &dinfo->cfg;
3886 
3887 	switch (which) {
3888 	case PCI_IVAR_ETHADDR:
3889 		/*
3890 		 * The generic accessor doesn't deal with failure, so
3891 		 * we set the return value, then return an error.
3892 		 */
3893 		*((uint8_t **) result) = NULL;
3894 		return (EINVAL);
3895 	case PCI_IVAR_SUBVENDOR:
3896 		*result = cfg->subvendor;
3897 		break;
3898 	case PCI_IVAR_SUBDEVICE:
3899 		*result = cfg->subdevice;
3900 		break;
3901 	case PCI_IVAR_VENDOR:
3902 		*result = cfg->vendor;
3903 		break;
3904 	case PCI_IVAR_DEVICE:
3905 		*result = cfg->device;
3906 		break;
3907 	case PCI_IVAR_DEVID:
3908 		*result = (cfg->device << 16) | cfg->vendor;
3909 		break;
3910 	case PCI_IVAR_CLASS:
3911 		*result = cfg->baseclass;
3912 		break;
3913 	case PCI_IVAR_SUBCLASS:
3914 		*result = cfg->subclass;
3915 		break;
3916 	case PCI_IVAR_PROGIF:
3917 		*result = cfg->progif;
3918 		break;
3919 	case PCI_IVAR_REVID:
3920 		*result = cfg->revid;
3921 		break;
3922 	case PCI_IVAR_INTPIN:
3923 		*result = cfg->intpin;
3924 		break;
3925 	case PCI_IVAR_IRQ:
3926 		*result = cfg->intline;
3927 		break;
3928 	case PCI_IVAR_DOMAIN:
3929 		*result = cfg->domain;
3930 		break;
3931 	case PCI_IVAR_BUS:
3932 		*result = cfg->bus;
3933 		break;
3934 	case PCI_IVAR_SLOT:
3935 		*result = cfg->slot;
3936 		break;
3937 	case PCI_IVAR_FUNCTION:
3938 		*result = cfg->func;
3939 		break;
3940 	case PCI_IVAR_CMDREG:
3941 		*result = cfg->cmdreg;
3942 		break;
3943 	case PCI_IVAR_CACHELNSZ:
3944 		*result = cfg->cachelnsz;
3945 		break;
3946 	case PCI_IVAR_MINGNT:
3947 		*result = cfg->mingnt;
3948 		break;
3949 	case PCI_IVAR_MAXLAT:
3950 		*result = cfg->maxlat;
3951 		break;
3952 	case PCI_IVAR_LATTIMER:
3953 		*result = cfg->lattimer;
3954 		break;
3955 	default:
3956 		return (ENOENT);
3957 	}
3958 	return (0);
3959 }
3960 
3961 int
3962 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3963 {
3964 	struct pci_devinfo *dinfo;
3965 
3966 	dinfo = device_get_ivars(child);
3967 
3968 	switch (which) {
3969 	case PCI_IVAR_INTPIN:
3970 		dinfo->cfg.intpin = value;
3971 		return (0);
3972 	case PCI_IVAR_ETHADDR:
3973 	case PCI_IVAR_SUBVENDOR:
3974 	case PCI_IVAR_SUBDEVICE:
3975 	case PCI_IVAR_VENDOR:
3976 	case PCI_IVAR_DEVICE:
3977 	case PCI_IVAR_DEVID:
3978 	case PCI_IVAR_CLASS:
3979 	case PCI_IVAR_SUBCLASS:
3980 	case PCI_IVAR_PROGIF:
3981 	case PCI_IVAR_REVID:
3982 	case PCI_IVAR_IRQ:
3983 	case PCI_IVAR_DOMAIN:
3984 	case PCI_IVAR_BUS:
3985 	case PCI_IVAR_SLOT:
3986 	case PCI_IVAR_FUNCTION:
3987 		return (EINVAL);	/* disallow for now */
3988 
3989 	default:
3990 		return (ENOENT);
3991 	}
3992 }
3993 
3994 #include "opt_ddb.h"
3995 #ifdef DDB
3996 #include <ddb/ddb.h>
3997 #include <sys/cons.h>
3998 
3999 /*
4000  * List resources based on pci map registers, used for within ddb
4001  */
4002 
4003 DB_SHOW_COMMAND(pciregs, db_pci_dump)
4004 {
4005 	struct pci_devinfo *dinfo;
4006 	struct devlist *devlist_head;
4007 	struct pci_conf *p;
4008 	const char *name;
4009 	int i, error, none_count;
4010 
4011 	none_count = 0;
4012 	/* get the head of the device queue */
4013 	devlist_head = &pci_devq;
4014 
4015 	/*
4016 	 * Go through the list of devices and print out devices
4017 	 */
4018 	for (error = 0, i = 0,
4019 	     dinfo = STAILQ_FIRST(devlist_head);
4020 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4021 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4022 
4023 		/* Populate pd_name and pd_unit */
4024 		name = NULL;
4025 		if (dinfo->cfg.dev)
4026 			name = device_get_name(dinfo->cfg.dev);
4027 
4028 		p = &dinfo->conf;
4029 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4030 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4031 			(name && *name) ? name : "none",
4032 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4033 			none_count++,
4034 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4035 			p->pc_sel.pc_func, (p->pc_class << 16) |
4036 			(p->pc_subclass << 8) | p->pc_progif,
4037 			(p->pc_subdevice << 16) | p->pc_subvendor,
4038 			(p->pc_device << 16) | p->pc_vendor,
4039 			p->pc_revid, p->pc_hdr);
4040 	}
4041 }
4042 #endif /* DDB */
4043 
4044 static struct resource *
4045 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4046     u_long start, u_long end, u_long count, u_int flags)
4047 {
4048 	struct pci_devinfo *dinfo = device_get_ivars(child);
4049 	struct resource_list *rl = &dinfo->resources;
4050 	struct resource_list_entry *rle;
4051 	struct resource *res;
4052 	struct pci_map *pm;
4053 	pci_addr_t map, testval;
4054 	int mapsize;
4055 
4056 	res = NULL;
4057 	pm = pci_find_bar(child, *rid);
4058 	if (pm != NULL) {
4059 		/* This is a BAR that we failed to allocate earlier. */
4060 		mapsize = pm->pm_size;
4061 		map = pm->pm_value;
4062 	} else {
4063 		/*
4064 		 * Weed out the bogons, and figure out how large the
4065 		 * BAR/map is.  BARs that read back 0 here are bogus
4066 		 * and unimplemented.  Note: atapci in legacy mode are
4067 		 * special and handled elsewhere in the code.  If you
4068 		 * have a atapci device in legacy mode and it fails
4069 		 * here, that other code is broken.
4070 		 */
4071 		pci_read_bar(child, *rid, &map, &testval);
4072 
4073 		/*
4074 		 * Determine the size of the BAR and ignore BARs with a size
4075 		 * of 0.  Device ROM BARs use a different mask value.
4076 		 */
4077 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4078 			mapsize = pci_romsize(testval);
4079 		else
4080 			mapsize = pci_mapsize(testval);
4081 		if (mapsize == 0)
4082 			goto out;
4083 		pm = pci_add_bar(child, *rid, map, mapsize);
4084 	}
4085 
4086 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4087 		if (type != SYS_RES_MEMORY) {
4088 			if (bootverbose)
4089 				device_printf(dev,
4090 				    "child %s requested type %d for rid %#x,"
4091 				    " but the BAR says it is an memio\n",
4092 				    device_get_nameunit(child), type, *rid);
4093 			goto out;
4094 		}
4095 	} else {
4096 		if (type != SYS_RES_IOPORT) {
4097 			if (bootverbose)
4098 				device_printf(dev,
4099 				    "child %s requested type %d for rid %#x,"
4100 				    " but the BAR says it is an ioport\n",
4101 				    device_get_nameunit(child), type, *rid);
4102 			goto out;
4103 		}
4104 	}
4105 
4106 	/*
4107 	 * For real BARs, we need to override the size that
4108 	 * the driver requests, because that's what the BAR
4109 	 * actually uses and we would otherwise have a
4110 	 * situation where we might allocate the excess to
4111 	 * another driver, which won't work.
4112 	 */
4113 	count = (pci_addr_t)1 << mapsize;
4114 	if (RF_ALIGNMENT(flags) < mapsize)
4115 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4116 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4117 		flags |= RF_PREFETCHABLE;
4118 
4119 	/*
4120 	 * Allocate enough resource, and then write back the
4121 	 * appropriate BAR for that resource.
4122 	 */
4123 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4124 	    start, end, count, flags & ~RF_ACTIVE);
4125 	if (res == NULL) {
4126 		device_printf(child,
4127 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4128 		    count, *rid, type, start, end);
4129 		goto out;
4130 	}
4131 	resource_list_add(rl, type, *rid, start, end, count);
4132 	rle = resource_list_find(rl, type, *rid);
4133 	if (rle == NULL)
4134 		panic("pci_reserve_map: unexpectedly can't find resource.");
4135 	rle->res = res;
4136 	rle->start = rman_get_start(res);
4137 	rle->end = rman_get_end(res);
4138 	rle->count = count;
4139 	rle->flags = RLE_RESERVED;
4140 	if (bootverbose)
4141 		device_printf(child,
4142 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4143 		    count, *rid, type, rman_get_start(res));
4144 	map = rman_get_start(res);
4145 	pci_write_bar(child, pm, map);
4146 out:;
4147 	return (res);
4148 }
4149 
4150 struct resource *
4151 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4152 		   u_long start, u_long end, u_long count, u_int flags)
4153 {
4154 	struct pci_devinfo *dinfo = device_get_ivars(child);
4155 	struct resource_list *rl = &dinfo->resources;
4156 	struct resource_list_entry *rle;
4157 	struct resource *res;
4158 	pcicfgregs *cfg = &dinfo->cfg;
4159 
4160 	if (device_get_parent(child) != dev)
4161 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4162 		    type, rid, start, end, count, flags));
4163 
4164 	/*
4165 	 * Perform lazy resource allocation
4166 	 */
4167 	switch (type) {
4168 	case SYS_RES_IRQ:
4169 		/*
4170 		 * Can't alloc legacy interrupt once MSI messages have
4171 		 * been allocated.
4172 		 */
4173 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4174 		    cfg->msix.msix_alloc > 0))
4175 			return (NULL);
4176 
4177 		/*
4178 		 * If the child device doesn't have an interrupt
4179 		 * routed and is deserving of an interrupt, try to
4180 		 * assign it one.
4181 		 */
4182 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4183 		    (cfg->intpin != 0))
4184 			pci_assign_interrupt(dev, child, 0);
4185 		break;
4186 	case SYS_RES_IOPORT:
4187 	case SYS_RES_MEMORY:
4188 #ifdef NEW_PCIB
4189 		/*
4190 		 * PCI-PCI bridge I/O window resources are not BARs.
4191 		 * For those allocations just pass the request up the
4192 		 * tree.
4193 		 */
4194 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4195 			switch (*rid) {
4196 			case PCIR_IOBASEL_1:
4197 			case PCIR_MEMBASE_1:
4198 			case PCIR_PMBASEL_1:
4199 				/*
4200 				 * XXX: Should we bother creating a resource
4201 				 * list entry?
4202 				 */
4203 				return (bus_generic_alloc_resource(dev, child,
4204 				    type, rid, start, end, count, flags));
4205 			}
4206 		}
4207 #endif
4208 		/* Reserve resources for this BAR if needed. */
4209 		rle = resource_list_find(rl, type, *rid);
4210 		if (rle == NULL) {
4211 			res = pci_reserve_map(dev, child, type, rid, start, end,
4212 			    count, flags);
4213 			if (res == NULL)
4214 				return (NULL);
4215 		}
4216 	}
4217 	return (resource_list_alloc(rl, dev, child, type, rid,
4218 	    start, end, count, flags));
4219 }
4220 
4221 int
4222 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4223     struct resource *r)
4224 {
4225 	struct pci_devinfo *dinfo;
4226 	int error;
4227 
4228 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4229 	if (error)
4230 		return (error);
4231 
4232 	/* Enable decoding in the command register when activating BARs. */
4233 	if (device_get_parent(child) == dev) {
4234 		/* Device ROMs need their decoding explicitly enabled. */
4235 		dinfo = device_get_ivars(child);
4236 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4237 			pci_write_bar(child, pci_find_bar(child, rid),
4238 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4239 		switch (type) {
4240 		case SYS_RES_IOPORT:
4241 		case SYS_RES_MEMORY:
4242 			error = PCI_ENABLE_IO(dev, child, type);
4243 			break;
4244 		}
4245 	}
4246 	return (error);
4247 }
4248 
4249 int
4250 pci_deactivate_resource(device_t dev, device_t child, int type,
4251     int rid, struct resource *r)
4252 {
4253 	struct pci_devinfo *dinfo;
4254 	int error;
4255 
4256 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4257 	if (error)
4258 		return (error);
4259 
4260 	/* Disable decoding for device ROMs. */
4261 	if (device_get_parent(child) == dev) {
4262 		dinfo = device_get_ivars(child);
4263 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4264 			pci_write_bar(child, pci_find_bar(child, rid),
4265 			    rman_get_start(r));
4266 	}
4267 	return (0);
4268 }
4269 
4270 void
4271 pci_delete_child(device_t dev, device_t child)
4272 {
4273 	struct resource_list_entry *rle;
4274 	struct resource_list *rl;
4275 	struct pci_devinfo *dinfo;
4276 
4277 	dinfo = device_get_ivars(child);
4278 	rl = &dinfo->resources;
4279 
4280 	if (device_is_attached(child))
4281 		device_detach(child);
4282 
4283 	/* Turn off access to resources we're about to free */
4284 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4285 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4286 
4287 	/* Free all allocated resources */
4288 	STAILQ_FOREACH(rle, rl, link) {
4289 		if (rle->res) {
4290 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4291 			    resource_list_busy(rl, rle->type, rle->rid)) {
4292 				pci_printf(&dinfo->cfg,
4293 				    "Resource still owned, oops. "
4294 				    "(type=%d, rid=%d, addr=%lx)\n",
4295 				    rle->type, rle->rid,
4296 				    rman_get_start(rle->res));
4297 				bus_release_resource(child, rle->type, rle->rid,
4298 				    rle->res);
4299 			}
4300 			resource_list_unreserve(rl, dev, child, rle->type,
4301 			    rle->rid);
4302 		}
4303 	}
4304 	resource_list_free(rl);
4305 
4306 	device_delete_child(dev, child);
4307 	pci_freecfg(dinfo);
4308 }
4309 
4310 void
4311 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4312 {
4313 	struct pci_devinfo *dinfo;
4314 	struct resource_list *rl;
4315 	struct resource_list_entry *rle;
4316 
4317 	if (device_get_parent(child) != dev)
4318 		return;
4319 
4320 	dinfo = device_get_ivars(child);
4321 	rl = &dinfo->resources;
4322 	rle = resource_list_find(rl, type, rid);
4323 	if (rle == NULL)
4324 		return;
4325 
4326 	if (rle->res) {
4327 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4328 		    resource_list_busy(rl, type, rid)) {
4329 			device_printf(dev, "delete_resource: "
4330 			    "Resource still owned by child, oops. "
4331 			    "(type=%d, rid=%d, addr=%lx)\n",
4332 			    type, rid, rman_get_start(rle->res));
4333 			return;
4334 		}
4335 
4336 #ifndef __PCI_BAR_ZERO_VALID
4337 		/*
4338 		 * If this is a BAR, clear the BAR so it stops
4339 		 * decoding before releasing the resource.
4340 		 */
4341 		switch (type) {
4342 		case SYS_RES_IOPORT:
4343 		case SYS_RES_MEMORY:
4344 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4345 			break;
4346 		}
4347 #endif
4348 		resource_list_unreserve(rl, dev, child, type, rid);
4349 	}
4350 	resource_list_delete(rl, type, rid);
4351 }
4352 
4353 struct resource_list *
4354 pci_get_resource_list (device_t dev, device_t child)
4355 {
4356 	struct pci_devinfo *dinfo = device_get_ivars(child);
4357 
4358 	return (&dinfo->resources);
4359 }
4360 
4361 bus_dma_tag_t
4362 pci_get_dma_tag(device_t bus, device_t dev)
4363 {
4364 	struct pci_softc *sc = device_get_softc(bus);
4365 
4366 	return (sc->sc_dma_tag);
4367 }
4368 
4369 uint32_t
4370 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4371 {
4372 	struct pci_devinfo *dinfo = device_get_ivars(child);
4373 	pcicfgregs *cfg = &dinfo->cfg;
4374 
4375 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4376 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4377 }
4378 
4379 void
4380 pci_write_config_method(device_t dev, device_t child, int reg,
4381     uint32_t val, int width)
4382 {
4383 	struct pci_devinfo *dinfo = device_get_ivars(child);
4384 	pcicfgregs *cfg = &dinfo->cfg;
4385 
4386 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4387 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4388 }
4389 
4390 int
4391 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4392     size_t buflen)
4393 {
4394 
4395 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4396 	    pci_get_function(child));
4397 	return (0);
4398 }
4399 
4400 int
4401 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4402     size_t buflen)
4403 {
4404 	struct pci_devinfo *dinfo;
4405 	pcicfgregs *cfg;
4406 
4407 	dinfo = device_get_ivars(child);
4408 	cfg = &dinfo->cfg;
4409 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4410 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4411 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4412 	    cfg->progif);
4413 	return (0);
4414 }
4415 
4416 int
4417 pci_assign_interrupt_method(device_t dev, device_t child)
4418 {
4419 	struct pci_devinfo *dinfo = device_get_ivars(child);
4420 	pcicfgregs *cfg = &dinfo->cfg;
4421 
4422 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4423 	    cfg->intpin));
4424 }
4425 
4426 static int
4427 pci_modevent(module_t mod, int what, void *arg)
4428 {
4429 	static struct cdev *pci_cdev;
4430 
4431 	switch (what) {
4432 	case MOD_LOAD:
4433 		STAILQ_INIT(&pci_devq);
4434 		pci_generation = 0;
4435 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4436 		    "pci");
4437 		pci_load_vendor_data();
4438 		break;
4439 
4440 	case MOD_UNLOAD:
4441 		destroy_dev(pci_cdev);
4442 		break;
4443 	}
4444 
4445 	return (0);
4446 }
4447 
4448 static void
4449 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4450 {
4451 #define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4452 	struct pcicfg_pcie *cfg;
4453 	int version, pos;
4454 
4455 	cfg = &dinfo->cfg.pcie;
4456 	pos = cfg->pcie_location;
4457 
4458 	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4459 
4460 	WREG(PCIR_EXPRESS_DEVICE_CTL, cfg->pcie_device_ctl);
4461 
4462 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4463 	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4464 	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4465 		WREG(PCIR_EXPRESS_LINK_CTL, cfg->pcie_link_ctl);
4466 
4467 	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4468 	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4469 	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4470 		WREG(PCIR_EXPRESS_SLOT_CTL, cfg->pcie_slot_ctl);
4471 
4472 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4473 	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4474 		WREG(PCIR_EXPRESS_ROOT_CTL, cfg->pcie_root_ctl);
4475 
4476 	if (version > 1) {
4477 		WREG(PCIR_EXPRESS_DEVICE_CTL2, cfg->pcie_device_ctl2);
4478 		WREG(PCIR_EXPRESS_LINK_CTL2, cfg->pcie_link_ctl2);
4479 		WREG(PCIR_EXPRESS_SLOT_CTL2, cfg->pcie_slot_ctl2);
4480 	}
4481 #undef WREG
4482 }
4483 
4484 static void
4485 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4486 {
4487 	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4488 	    dinfo->cfg.pcix.pcix_command,  2);
4489 }
4490 
4491 void
4492 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4493 {
4494 
4495 	/*
4496 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4497 	 * which we know need special treatment.  Type 2 devices are
4498 	 * cardbus bridges which also require special treatment.
4499 	 * Other types are unknown, and we err on the side of safety
4500 	 * by ignoring them.
4501 	 */
4502 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4503 		return;
4504 
4505 	/*
4506 	 * Restore the device to full power mode.  We must do this
4507 	 * before we restore the registers because moving from D3 to
4508 	 * D0 will cause the chip's BARs and some other registers to
4509 	 * be reset to some unknown power on reset values.  Cut down
4510 	 * the noise on boot by doing nothing if we are already in
4511 	 * state D0.
4512 	 */
4513 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4514 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4515 	pci_restore_bars(dev);
4516 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4517 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4518 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4519 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4520 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4521 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4522 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4523 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4524 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4525 
4526 	/*
4527 	 * Restore extended capabilities for PCI-Express and PCI-X
4528 	 */
4529 	if (dinfo->cfg.pcie.pcie_location != 0)
4530 		pci_cfg_restore_pcie(dev, dinfo);
4531 	if (dinfo->cfg.pcix.pcix_location != 0)
4532 		pci_cfg_restore_pcix(dev, dinfo);
4533 
4534 	/* Restore MSI and MSI-X configurations if they are present. */
4535 	if (dinfo->cfg.msi.msi_location != 0)
4536 		pci_resume_msi(dev);
4537 	if (dinfo->cfg.msix.msix_location != 0)
4538 		pci_resume_msix(dev);
4539 }
4540 
4541 static void
4542 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4543 {
4544 #define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4545 	struct pcicfg_pcie *cfg;
4546 	int version, pos;
4547 
4548 	cfg = &dinfo->cfg.pcie;
4549 	pos = cfg->pcie_location;
4550 
4551 	cfg->pcie_flags = RREG(PCIR_EXPRESS_FLAGS);
4552 
4553 	version = cfg->pcie_flags & PCIM_EXP_FLAGS_VERSION;
4554 
4555 	cfg->pcie_device_ctl = RREG(PCIR_EXPRESS_DEVICE_CTL);
4556 
4557 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4558 	    cfg->pcie_type == PCIM_EXP_TYPE_ENDPOINT ||
4559 	    cfg->pcie_type == PCIM_EXP_TYPE_LEGACY_ENDPOINT)
4560 		cfg->pcie_link_ctl = RREG(PCIR_EXPRESS_LINK_CTL);
4561 
4562 	if (version > 1 || (cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4563 	    (cfg->pcie_type == PCIM_EXP_TYPE_DOWNSTREAM_PORT &&
4564 	     (cfg->pcie_flags & PCIM_EXP_FLAGS_SLOT))))
4565 		cfg->pcie_slot_ctl = RREG(PCIR_EXPRESS_SLOT_CTL);
4566 
4567 	if (version > 1 || cfg->pcie_type == PCIM_EXP_TYPE_ROOT_PORT ||
4568 	    cfg->pcie_type == PCIM_EXP_TYPE_ROOT_EC)
4569 		cfg->pcie_root_ctl = RREG(PCIR_EXPRESS_ROOT_CTL);
4570 
4571 	if (version > 1) {
4572 		cfg->pcie_device_ctl2 = RREG(PCIR_EXPRESS_DEVICE_CTL2);
4573 		cfg->pcie_link_ctl2 = RREG(PCIR_EXPRESS_LINK_CTL2);
4574 		cfg->pcie_slot_ctl2 = RREG(PCIR_EXPRESS_SLOT_CTL2);
4575 	}
4576 #undef RREG
4577 }
4578 
4579 static void
4580 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4581 {
4582 	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4583 	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4584 }
4585 
4586 void
4587 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4588 {
4589 	uint32_t cls;
4590 	int ps;
4591 
4592 	/*
4593 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4594 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4595 	 * which also require special treatment.  Other types are unknown, and
4596 	 * we err on the side of safety by ignoring them.  Powering down
4597 	 * bridges should not be undertaken lightly.
4598 	 */
4599 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4600 		return;
4601 
4602 	/*
4603 	 * Some drivers apparently write to these registers w/o updating our
4604 	 * cached copy.  No harm happens if we update the copy, so do so here
4605 	 * so we can restore them.  The COMMAND register is modified by the
4606 	 * bus w/o updating the cache.  This should represent the normally
4607 	 * writable portion of the 'defined' part of type 0 headers.  In
4608 	 * theory we also need to save/restore the PCI capability structures
4609 	 * we know about, but apart from power we don't know any that are
4610 	 * writable.
4611 	 */
4612 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4613 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4614 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4615 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4616 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4617 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4618 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4619 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4620 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4621 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4622 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4623 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4624 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4625 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4626 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4627 
4628 	if (dinfo->cfg.pcie.pcie_location != 0)
4629 		pci_cfg_save_pcie(dev, dinfo);
4630 
4631 	if (dinfo->cfg.pcix.pcix_location != 0)
4632 		pci_cfg_save_pcix(dev, dinfo);
4633 
4634 	/*
4635 	 * don't set the state for display devices, base peripherals and
4636 	 * memory devices since bad things happen when they are powered down.
4637 	 * We should (a) have drivers that can easily detach and (b) use
4638 	 * generic drivers for these devices so that some device actually
4639 	 * attaches.  We need to make sure that when we implement (a) we don't
4640 	 * power the device down on a reattach.
4641 	 */
4642 	cls = pci_get_class(dev);
4643 	if (!setstate)
4644 		return;
4645 	switch (pci_do_power_nodriver)
4646 	{
4647 		case 0:		/* NO powerdown at all */
4648 			return;
4649 		case 1:		/* Conservative about what to power down */
4650 			if (cls == PCIC_STORAGE)
4651 				return;
4652 			/*FALLTHROUGH*/
4653 		case 2:		/* Agressive about what to power down */
4654 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4655 			    cls == PCIC_BASEPERIPH)
4656 				return;
4657 			/*FALLTHROUGH*/
4658 		case 3:		/* Power down everything */
4659 			break;
4660 	}
4661 	/*
4662 	 * PCI spec says we can only go into D3 state from D0 state.
4663 	 * Transition from D[12] into D0 before going to D3 state.
4664 	 */
4665 	ps = pci_get_powerstate(dev);
4666 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4667 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4668 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4669 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4670 }
4671 
4672 /* Wrapper APIs suitable for device driver use. */
4673 void
4674 pci_save_state(device_t dev)
4675 {
4676 	struct pci_devinfo *dinfo;
4677 
4678 	dinfo = device_get_ivars(dev);
4679 	pci_cfg_save(dev, dinfo, 0);
4680 }
4681 
4682 void
4683 pci_restore_state(device_t dev)
4684 {
4685 	struct pci_devinfo *dinfo;
4686 
4687 	dinfo = device_get_ivars(dev);
4688 	pci_cfg_restore(dev, dinfo);
4689 }
4690