xref: /freebsd/sys/dev/pci/pci.c (revision 682c9e0fed0115eb6f283e755901c0aac90e86e8)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #define	PCIR_IS_BIOS(cfg, reg)						\
73 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75 
76 
77 static pci_addr_t	pci_mapbase(uint64_t mapreg);
78 static const char	*pci_maptype(uint64_t mapreg);
79 static int		pci_mapsize(uint64_t testval);
80 static int		pci_maprange(uint64_t mapreg);
81 static pci_addr_t	pci_rombase(uint64_t mapreg);
82 static int		pci_romsize(uint64_t testval);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85 
86 static int		pci_porten(device_t dev);
87 static int		pci_memen(device_t dev);
88 static void		pci_assign_interrupt(device_t bus, device_t dev,
89 			    int force_route);
90 static int		pci_add_map(device_t bus, device_t dev, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_enable_msix(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix(device_t dev, u_int index);
115 static void		pci_unmask_msix(device_t dev, u_int index);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152 
153 	/* PCI interface */
154 	DEVMETHOD(pci_read_config,	pci_read_config_method),
155 	DEVMETHOD(pci_write_config,	pci_write_config_method),
156 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172 
173 	{ 0, 0 }
174 };
175 
176 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177 
178 static devclass_t pci_devclass;
179 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180 MODULE_VERSION(pci, 1);
181 
182 static char	*pci_vendordata;
183 static size_t	pci_vendordata_size;
184 
185 
186 struct pci_quirk {
187 	uint32_t devid;	/* Vendor/device of the card */
188 	int	type;
189 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
190 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
191 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
192 	int	arg1;
193 	int	arg2;
194 };
195 
196 struct pci_quirk pci_quirks[] = {
197 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202 
203 	/*
204 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206 	 */
207 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 
210 	/*
211 	 * MSI doesn't work on earlier Intel chipsets including
212 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213 	 */
214 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224 	 * bridge.
225 	 */
226 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 
228 	/*
229 	 * Some virtualization environments emulate an older chipset
230 	 * but support MSI just fine.  QEMU uses the Intel 82440.
231 	 */
232 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
233 
234 	{ 0 }
235 };
236 
237 /* map register information */
238 #define	PCI_MAPMEM	0x01	/* memory map */
239 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
240 #define	PCI_MAPPORT	0x04	/* port map */
241 
242 struct devlist pci_devq;
243 uint32_t pci_generation;
244 uint32_t pci_numdevs = 0;
245 static int pcie_chipset, pcix_chipset;
246 
247 /* sysctl vars */
248 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
249 
250 static int pci_enable_io_modes = 1;
251 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
252 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
253     &pci_enable_io_modes, 1,
254     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
255 enable these bits correctly.  We'd like to do this all the time, but there\n\
256 are some peripherals that this causes problems with.");
257 
258 static int pci_do_power_nodriver = 0;
259 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
260 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
261     &pci_do_power_nodriver, 0,
262   "Place a function into D3 state when no driver attaches to it.  0 means\n\
263 disable.  1 means conservatively place devices into D3 state.  2 means\n\
264 agressively place devices into D3 state.  3 means put absolutely everything\n\
265 in D3 state.");
266 
267 int pci_do_power_resume = 1;
268 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
269 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
270     &pci_do_power_resume, 1,
271   "Transition from D3 -> D0 on resume.");
272 
273 int pci_do_power_suspend = 1;
274 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
275 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
276     &pci_do_power_suspend, 1,
277   "Transition from D0 -> D3 on suspend.");
278 
279 static int pci_do_msi = 1;
280 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
281 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
282     "Enable support for MSI interrupts");
283 
284 static int pci_do_msix = 1;
285 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
287     "Enable support for MSI-X interrupts");
288 
289 static int pci_honor_msi_blacklist = 1;
290 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
291 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
292     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
293 
294 #if defined(__i386__) || defined(__amd64__)
295 static int pci_usb_takeover = 1;
296 #else
297 static int pci_usb_takeover = 0;
298 #endif
299 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
300 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
301     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
302 Disable this if you depend on BIOS emulation of USB devices, that is\n\
303 you use USB devices (like keyboard or mouse) but do not load USB drivers");
304 
305 /* Find a device_t by bus/slot/function in domain 0 */
306 
307 device_t
308 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
309 {
310 
311 	return (pci_find_dbsf(0, bus, slot, func));
312 }
313 
314 /* Find a device_t by domain/bus/slot/function */
315 
316 device_t
317 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
318 {
319 	struct pci_devinfo *dinfo;
320 
321 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322 		if ((dinfo->cfg.domain == domain) &&
323 		    (dinfo->cfg.bus == bus) &&
324 		    (dinfo->cfg.slot == slot) &&
325 		    (dinfo->cfg.func == func)) {
326 			return (dinfo->cfg.dev);
327 		}
328 	}
329 
330 	return (NULL);
331 }
332 
333 /* Find a device_t by vendor/device ID */
334 
335 device_t
336 pci_find_device(uint16_t vendor, uint16_t device)
337 {
338 	struct pci_devinfo *dinfo;
339 
340 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
341 		if ((dinfo->cfg.vendor == vendor) &&
342 		    (dinfo->cfg.device == device)) {
343 			return (dinfo->cfg.dev);
344 		}
345 	}
346 
347 	return (NULL);
348 }
349 
350 device_t
351 pci_find_class(uint8_t class, uint8_t subclass)
352 {
353 	struct pci_devinfo *dinfo;
354 
355 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
356 		if (dinfo->cfg.baseclass == class &&
357 		    dinfo->cfg.subclass == subclass) {
358 			return (dinfo->cfg.dev);
359 		}
360 	}
361 
362 	return (NULL);
363 }
364 
365 static int
366 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
367 {
368 	va_list ap;
369 	int retval;
370 
371 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
372 	    cfg->func);
373 	va_start(ap, fmt);
374 	retval += vprintf(fmt, ap);
375 	va_end(ap);
376 	return (retval);
377 }
378 
379 /* return base address of memory or port map */
380 
381 static pci_addr_t
382 pci_mapbase(uint64_t mapreg)
383 {
384 
385 	if (PCI_BAR_MEM(mapreg))
386 		return (mapreg & PCIM_BAR_MEM_BASE);
387 	else
388 		return (mapreg & PCIM_BAR_IO_BASE);
389 }
390 
391 /* return map type of memory or port map */
392 
393 static const char *
394 pci_maptype(uint64_t mapreg)
395 {
396 
397 	if (PCI_BAR_IO(mapreg))
398 		return ("I/O Port");
399 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
400 		return ("Prefetchable Memory");
401 	return ("Memory");
402 }
403 
404 /* return log2 of map size decoded for memory or port map */
405 
406 static int
407 pci_mapsize(uint64_t testval)
408 {
409 	int ln2size;
410 
411 	testval = pci_mapbase(testval);
412 	ln2size = 0;
413 	if (testval != 0) {
414 		while ((testval & 1) == 0)
415 		{
416 			ln2size++;
417 			testval >>= 1;
418 		}
419 	}
420 	return (ln2size);
421 }
422 
423 /* return base address of device ROM */
424 
425 static pci_addr_t
426 pci_rombase(uint64_t mapreg)
427 {
428 
429 	return (mapreg & PCIM_BIOS_ADDR_MASK);
430 }
431 
432 /* return log2 of map size decided for device ROM */
433 
434 static int
435 pci_romsize(uint64_t testval)
436 {
437 	int ln2size;
438 
439 	testval = pci_rombase(testval);
440 	ln2size = 0;
441 	if (testval != 0) {
442 		while ((testval & 1) == 0)
443 		{
444 			ln2size++;
445 			testval >>= 1;
446 		}
447 	}
448 	return (ln2size);
449 }
450 
451 /* return log2 of address range supported by map register */
452 
453 static int
454 pci_maprange(uint64_t mapreg)
455 {
456 	int ln2range = 0;
457 
458 	if (PCI_BAR_IO(mapreg))
459 		ln2range = 32;
460 	else
461 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
462 		case PCIM_BAR_MEM_32:
463 			ln2range = 32;
464 			break;
465 		case PCIM_BAR_MEM_1MB:
466 			ln2range = 20;
467 			break;
468 		case PCIM_BAR_MEM_64:
469 			ln2range = 64;
470 			break;
471 		}
472 	return (ln2range);
473 }
474 
475 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
476 
477 static void
478 pci_fixancient(pcicfgregs *cfg)
479 {
480 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
481 		return;
482 
483 	/* PCI to PCI bridges use header type 1 */
484 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
485 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
486 }
487 
488 /* extract header type specific config data */
489 
490 static void
491 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
492 {
493 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
494 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
495 	case PCIM_HDRTYPE_NORMAL:
496 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
497 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
498 		cfg->nummaps	    = PCI_MAXMAPS_0;
499 		break;
500 	case PCIM_HDRTYPE_BRIDGE:
501 		cfg->nummaps	    = PCI_MAXMAPS_1;
502 		break;
503 	case PCIM_HDRTYPE_CARDBUS:
504 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
505 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
506 		cfg->nummaps	    = PCI_MAXMAPS_2;
507 		break;
508 	}
509 #undef REG
510 }
511 
512 /* read configuration header into pcicfgregs structure */
513 struct pci_devinfo *
514 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
515 {
516 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
517 	pcicfgregs *cfg = NULL;
518 	struct pci_devinfo *devlist_entry;
519 	struct devlist *devlist_head;
520 
521 	devlist_head = &pci_devq;
522 
523 	devlist_entry = NULL;
524 
525 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
526 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
527 		if (devlist_entry == NULL)
528 			return (NULL);
529 
530 		cfg = &devlist_entry->cfg;
531 
532 		cfg->domain		= d;
533 		cfg->bus		= b;
534 		cfg->slot		= s;
535 		cfg->func		= f;
536 		cfg->vendor		= REG(PCIR_VENDOR, 2);
537 		cfg->device		= REG(PCIR_DEVICE, 2);
538 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
539 		cfg->statreg		= REG(PCIR_STATUS, 2);
540 		cfg->baseclass		= REG(PCIR_CLASS, 1);
541 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
542 		cfg->progif		= REG(PCIR_PROGIF, 1);
543 		cfg->revid		= REG(PCIR_REVID, 1);
544 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
545 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
546 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
547 		cfg->intpin		= REG(PCIR_INTPIN, 1);
548 		cfg->intline		= REG(PCIR_INTLINE, 1);
549 
550 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
551 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
552 
553 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
554 		cfg->hdrtype		&= ~PCIM_MFDEV;
555 		STAILQ_INIT(&cfg->maps);
556 
557 		pci_fixancient(cfg);
558 		pci_hdrtypedata(pcib, b, s, f, cfg);
559 
560 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
561 			pci_read_cap(pcib, cfg);
562 
563 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
564 
565 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
566 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
567 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
568 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
569 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
570 
571 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
572 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
573 		devlist_entry->conf.pc_vendor = cfg->vendor;
574 		devlist_entry->conf.pc_device = cfg->device;
575 
576 		devlist_entry->conf.pc_class = cfg->baseclass;
577 		devlist_entry->conf.pc_subclass = cfg->subclass;
578 		devlist_entry->conf.pc_progif = cfg->progif;
579 		devlist_entry->conf.pc_revid = cfg->revid;
580 
581 		pci_numdevs++;
582 		pci_generation++;
583 	}
584 	return (devlist_entry);
585 #undef REG
586 }
587 
588 static void
589 pci_read_cap(device_t pcib, pcicfgregs *cfg)
590 {
591 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
592 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
593 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
594 	uint64_t addr;
595 #endif
596 	uint32_t val;
597 	int	ptr, nextptr, ptrptr;
598 
599 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
600 	case PCIM_HDRTYPE_NORMAL:
601 	case PCIM_HDRTYPE_BRIDGE:
602 		ptrptr = PCIR_CAP_PTR;
603 		break;
604 	case PCIM_HDRTYPE_CARDBUS:
605 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
606 		break;
607 	default:
608 		return;		/* no extended capabilities support */
609 	}
610 	nextptr = REG(ptrptr, 1);	/* sanity check? */
611 
612 	/*
613 	 * Read capability entries.
614 	 */
615 	while (nextptr != 0) {
616 		/* Sanity check */
617 		if (nextptr > 255) {
618 			printf("illegal PCI extended capability offset %d\n",
619 			    nextptr);
620 			return;
621 		}
622 		/* Find the next entry */
623 		ptr = nextptr;
624 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
625 
626 		/* Process this entry */
627 		switch (REG(ptr + PCICAP_ID, 1)) {
628 		case PCIY_PMG:		/* PCI power management */
629 			if (cfg->pp.pp_cap == 0) {
630 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
631 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
632 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
633 				if ((nextptr - ptr) > PCIR_POWER_DATA)
634 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
635 			}
636 			break;
637 		case PCIY_HT:		/* HyperTransport */
638 			/* Determine HT-specific capability type. */
639 			val = REG(ptr + PCIR_HT_COMMAND, 2);
640 
641 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
642 				cfg->ht.ht_slave = ptr;
643 
644 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
645 			switch (val & PCIM_HTCMD_CAP_MASK) {
646 			case PCIM_HTCAP_MSI_MAPPING:
647 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
648 					/* Sanity check the mapping window. */
649 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
650 					    4);
651 					addr <<= 32;
652 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
653 					    4);
654 					if (addr != MSI_INTEL_ADDR_BASE)
655 						device_printf(pcib,
656 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
657 						    cfg->domain, cfg->bus,
658 						    cfg->slot, cfg->func,
659 						    (long long)addr);
660 				} else
661 					addr = MSI_INTEL_ADDR_BASE;
662 
663 				cfg->ht.ht_msimap = ptr;
664 				cfg->ht.ht_msictrl = val;
665 				cfg->ht.ht_msiaddr = addr;
666 				break;
667 			}
668 #endif
669 			break;
670 		case PCIY_MSI:		/* PCI MSI */
671 			cfg->msi.msi_location = ptr;
672 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
673 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
674 						     PCIM_MSICTRL_MMC_MASK)>>1);
675 			break;
676 		case PCIY_MSIX:		/* PCI MSI-X */
677 			cfg->msix.msix_location = ptr;
678 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
679 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
680 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
681 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
682 			cfg->msix.msix_table_bar = PCIR_BAR(val &
683 			    PCIM_MSIX_BIR_MASK);
684 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
685 			val = REG(ptr + PCIR_MSIX_PBA, 4);
686 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
687 			    PCIM_MSIX_BIR_MASK);
688 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
689 			break;
690 		case PCIY_VPD:		/* PCI Vital Product Data */
691 			cfg->vpd.vpd_reg = ptr;
692 			break;
693 		case PCIY_SUBVENDOR:
694 			/* Should always be true. */
695 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
696 			    PCIM_HDRTYPE_BRIDGE) {
697 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
698 				cfg->subvendor = val & 0xffff;
699 				cfg->subdevice = val >> 16;
700 			}
701 			break;
702 		case PCIY_PCIX:		/* PCI-X */
703 			/*
704 			 * Assume we have a PCI-X chipset if we have
705 			 * at least one PCI-PCI bridge with a PCI-X
706 			 * capability.  Note that some systems with
707 			 * PCI-express or HT chipsets might match on
708 			 * this check as well.
709 			 */
710 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
711 			    PCIM_HDRTYPE_BRIDGE)
712 				pcix_chipset = 1;
713 			break;
714 		case PCIY_EXPRESS:	/* PCI-express */
715 			/*
716 			 * Assume we have a PCI-express chipset if we have
717 			 * at least one PCI-express device.
718 			 */
719 			pcie_chipset = 1;
720 			break;
721 		default:
722 			break;
723 		}
724 	}
725 
726 
727 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
728 	/*
729 	 * Enable the MSI mapping window for all HyperTransport
730 	 * slaves.  PCI-PCI bridges have their windows enabled via
731 	 * PCIB_MAP_MSI().
732 	 */
733 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
734 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
735 		device_printf(pcib,
736 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
737 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
738 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
739 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
740 		     2);
741 	}
742 #endif
743 /* REG and WREG use carry through to next functions */
744 }
745 
746 /*
747  * PCI Vital Product Data
748  */
749 
750 #define	PCI_VPD_TIMEOUT		1000000
751 
752 static int
753 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
754 {
755 	int count = PCI_VPD_TIMEOUT;
756 
757 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
758 
759 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
760 
761 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
762 		if (--count < 0)
763 			return (ENXIO);
764 		DELAY(1);	/* limit looping */
765 	}
766 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
767 
768 	return (0);
769 }
770 
771 #if 0
772 static int
773 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
774 {
775 	int count = PCI_VPD_TIMEOUT;
776 
777 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
778 
779 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
780 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
781 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
782 		if (--count < 0)
783 			return (ENXIO);
784 		DELAY(1);	/* limit looping */
785 	}
786 
787 	return (0);
788 }
789 #endif
790 
791 #undef PCI_VPD_TIMEOUT
792 
793 struct vpd_readstate {
794 	device_t	pcib;
795 	pcicfgregs	*cfg;
796 	uint32_t	val;
797 	int		bytesinval;
798 	int		off;
799 	uint8_t		cksum;
800 };
801 
802 static int
803 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
804 {
805 	uint32_t reg;
806 	uint8_t byte;
807 
808 	if (vrs->bytesinval == 0) {
809 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
810 			return (ENXIO);
811 		vrs->val = le32toh(reg);
812 		vrs->off += 4;
813 		byte = vrs->val & 0xff;
814 		vrs->bytesinval = 3;
815 	} else {
816 		vrs->val = vrs->val >> 8;
817 		byte = vrs->val & 0xff;
818 		vrs->bytesinval--;
819 	}
820 
821 	vrs->cksum += byte;
822 	*data = byte;
823 	return (0);
824 }
825 
826 static void
827 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
828 {
829 	struct vpd_readstate vrs;
830 	int state;
831 	int name;
832 	int remain;
833 	int i;
834 	int alloc, off;		/* alloc/off for RO/W arrays */
835 	int cksumvalid;
836 	int dflen;
837 	uint8_t byte;
838 	uint8_t byte2;
839 
840 	/* init vpd reader */
841 	vrs.bytesinval = 0;
842 	vrs.off = 0;
843 	vrs.pcib = pcib;
844 	vrs.cfg = cfg;
845 	vrs.cksum = 0;
846 
847 	state = 0;
848 	name = remain = i = 0;	/* shut up stupid gcc */
849 	alloc = off = 0;	/* shut up stupid gcc */
850 	dflen = 0;		/* shut up stupid gcc */
851 	cksumvalid = -1;
852 	while (state >= 0) {
853 		if (vpd_nextbyte(&vrs, &byte)) {
854 			state = -2;
855 			break;
856 		}
857 #if 0
858 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
859 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
860 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
861 #endif
862 		switch (state) {
863 		case 0:		/* item name */
864 			if (byte & 0x80) {
865 				if (vpd_nextbyte(&vrs, &byte2)) {
866 					state = -2;
867 					break;
868 				}
869 				remain = byte2;
870 				if (vpd_nextbyte(&vrs, &byte2)) {
871 					state = -2;
872 					break;
873 				}
874 				remain |= byte2 << 8;
875 				if (remain > (0x7f*4 - vrs.off)) {
876 					state = -1;
877 					printf(
878 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
879 					    cfg->domain, cfg->bus, cfg->slot,
880 					    cfg->func, remain);
881 				}
882 				name = byte & 0x7f;
883 			} else {
884 				remain = byte & 0x7;
885 				name = (byte >> 3) & 0xf;
886 			}
887 			switch (name) {
888 			case 0x2:	/* String */
889 				cfg->vpd.vpd_ident = malloc(remain + 1,
890 				    M_DEVBUF, M_WAITOK);
891 				i = 0;
892 				state = 1;
893 				break;
894 			case 0xf:	/* End */
895 				state = -1;
896 				break;
897 			case 0x10:	/* VPD-R */
898 				alloc = 8;
899 				off = 0;
900 				cfg->vpd.vpd_ros = malloc(alloc *
901 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
902 				    M_WAITOK | M_ZERO);
903 				state = 2;
904 				break;
905 			case 0x11:	/* VPD-W */
906 				alloc = 8;
907 				off = 0;
908 				cfg->vpd.vpd_w = malloc(alloc *
909 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
910 				    M_WAITOK | M_ZERO);
911 				state = 5;
912 				break;
913 			default:	/* Invalid data, abort */
914 				state = -1;
915 				break;
916 			}
917 			break;
918 
919 		case 1:	/* Identifier String */
920 			cfg->vpd.vpd_ident[i++] = byte;
921 			remain--;
922 			if (remain == 0)  {
923 				cfg->vpd.vpd_ident[i] = '\0';
924 				state = 0;
925 			}
926 			break;
927 
928 		case 2:	/* VPD-R Keyword Header */
929 			if (off == alloc) {
930 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
931 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
932 				    M_DEVBUF, M_WAITOK | M_ZERO);
933 			}
934 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
935 			if (vpd_nextbyte(&vrs, &byte2)) {
936 				state = -2;
937 				break;
938 			}
939 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
940 			if (vpd_nextbyte(&vrs, &byte2)) {
941 				state = -2;
942 				break;
943 			}
944 			dflen = byte2;
945 			if (dflen == 0 &&
946 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
947 			    2) == 0) {
948 				/*
949 				 * if this happens, we can't trust the rest
950 				 * of the VPD.
951 				 */
952 				printf(
953 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
954 				    cfg->domain, cfg->bus, cfg->slot,
955 				    cfg->func, dflen);
956 				cksumvalid = 0;
957 				state = -1;
958 				break;
959 			} else if (dflen == 0) {
960 				cfg->vpd.vpd_ros[off].value = malloc(1 *
961 				    sizeof(*cfg->vpd.vpd_ros[off].value),
962 				    M_DEVBUF, M_WAITOK);
963 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
964 			} else
965 				cfg->vpd.vpd_ros[off].value = malloc(
966 				    (dflen + 1) *
967 				    sizeof(*cfg->vpd.vpd_ros[off].value),
968 				    M_DEVBUF, M_WAITOK);
969 			remain -= 3;
970 			i = 0;
971 			/* keep in sync w/ state 3's transistions */
972 			if (dflen == 0 && remain == 0)
973 				state = 0;
974 			else if (dflen == 0)
975 				state = 2;
976 			else
977 				state = 3;
978 			break;
979 
980 		case 3:	/* VPD-R Keyword Value */
981 			cfg->vpd.vpd_ros[off].value[i++] = byte;
982 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
983 			    "RV", 2) == 0 && cksumvalid == -1) {
984 				if (vrs.cksum == 0)
985 					cksumvalid = 1;
986 				else {
987 					if (bootverbose)
988 						printf(
989 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
990 						    cfg->domain, cfg->bus,
991 						    cfg->slot, cfg->func,
992 						    vrs.cksum);
993 					cksumvalid = 0;
994 					state = -1;
995 					break;
996 				}
997 			}
998 			dflen--;
999 			remain--;
1000 			/* keep in sync w/ state 2's transistions */
1001 			if (dflen == 0)
1002 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1003 			if (dflen == 0 && remain == 0) {
1004 				cfg->vpd.vpd_rocnt = off;
1005 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1006 				    off * sizeof(*cfg->vpd.vpd_ros),
1007 				    M_DEVBUF, M_WAITOK | M_ZERO);
1008 				state = 0;
1009 			} else if (dflen == 0)
1010 				state = 2;
1011 			break;
1012 
1013 		case 4:
1014 			remain--;
1015 			if (remain == 0)
1016 				state = 0;
1017 			break;
1018 
1019 		case 5:	/* VPD-W Keyword Header */
1020 			if (off == alloc) {
1021 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1022 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1023 				    M_DEVBUF, M_WAITOK | M_ZERO);
1024 			}
1025 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1026 			if (vpd_nextbyte(&vrs, &byte2)) {
1027 				state = -2;
1028 				break;
1029 			}
1030 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1031 			if (vpd_nextbyte(&vrs, &byte2)) {
1032 				state = -2;
1033 				break;
1034 			}
1035 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1036 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1037 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1038 			    sizeof(*cfg->vpd.vpd_w[off].value),
1039 			    M_DEVBUF, M_WAITOK);
1040 			remain -= 3;
1041 			i = 0;
1042 			/* keep in sync w/ state 6's transistions */
1043 			if (dflen == 0 && remain == 0)
1044 				state = 0;
1045 			else if (dflen == 0)
1046 				state = 5;
1047 			else
1048 				state = 6;
1049 			break;
1050 
1051 		case 6:	/* VPD-W Keyword Value */
1052 			cfg->vpd.vpd_w[off].value[i++] = byte;
1053 			dflen--;
1054 			remain--;
1055 			/* keep in sync w/ state 5's transistions */
1056 			if (dflen == 0)
1057 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1058 			if (dflen == 0 && remain == 0) {
1059 				cfg->vpd.vpd_wcnt = off;
1060 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1061 				    off * sizeof(*cfg->vpd.vpd_w),
1062 				    M_DEVBUF, M_WAITOK | M_ZERO);
1063 				state = 0;
1064 			} else if (dflen == 0)
1065 				state = 5;
1066 			break;
1067 
1068 		default:
1069 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1070 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1071 			    state);
1072 			state = -1;
1073 			break;
1074 		}
1075 	}
1076 
1077 	if (cksumvalid == 0 || state < -1) {
1078 		/* read-only data bad, clean up */
1079 		if (cfg->vpd.vpd_ros != NULL) {
1080 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1081 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1082 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1083 			cfg->vpd.vpd_ros = NULL;
1084 		}
1085 	}
1086 	if (state < -1) {
1087 		/* I/O error, clean up */
1088 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1089 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1090 		if (cfg->vpd.vpd_ident != NULL) {
1091 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1092 			cfg->vpd.vpd_ident = NULL;
1093 		}
1094 		if (cfg->vpd.vpd_w != NULL) {
1095 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1096 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1097 			free(cfg->vpd.vpd_w, M_DEVBUF);
1098 			cfg->vpd.vpd_w = NULL;
1099 		}
1100 	}
1101 	cfg->vpd.vpd_cached = 1;
1102 #undef REG
1103 #undef WREG
1104 }
1105 
1106 int
1107 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1108 {
1109 	struct pci_devinfo *dinfo = device_get_ivars(child);
1110 	pcicfgregs *cfg = &dinfo->cfg;
1111 
1112 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1113 		pci_read_vpd(device_get_parent(dev), cfg);
1114 
1115 	*identptr = cfg->vpd.vpd_ident;
1116 
1117 	if (*identptr == NULL)
1118 		return (ENXIO);
1119 
1120 	return (0);
1121 }
1122 
1123 int
1124 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1125 	const char **vptr)
1126 {
1127 	struct pci_devinfo *dinfo = device_get_ivars(child);
1128 	pcicfgregs *cfg = &dinfo->cfg;
1129 	int i;
1130 
1131 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1132 		pci_read_vpd(device_get_parent(dev), cfg);
1133 
1134 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1135 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1136 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1137 			*vptr = cfg->vpd.vpd_ros[i].value;
1138 		}
1139 
1140 	if (i != cfg->vpd.vpd_rocnt)
1141 		return (0);
1142 
1143 	*vptr = NULL;
1144 	return (ENXIO);
1145 }
1146 
1147 /*
1148  * Find the requested extended capability and return the offset in
1149  * configuration space via the pointer provided. The function returns
1150  * 0 on success and error code otherwise.
1151  */
1152 int
1153 pci_find_extcap_method(device_t dev, device_t child, int capability,
1154     int *capreg)
1155 {
1156 	struct pci_devinfo *dinfo = device_get_ivars(child);
1157 	pcicfgregs *cfg = &dinfo->cfg;
1158 	u_int32_t status;
1159 	u_int8_t ptr;
1160 
1161 	/*
1162 	 * Check the CAP_LIST bit of the PCI status register first.
1163 	 */
1164 	status = pci_read_config(child, PCIR_STATUS, 2);
1165 	if (!(status & PCIM_STATUS_CAPPRESENT))
1166 		return (ENXIO);
1167 
1168 	/*
1169 	 * Determine the start pointer of the capabilities list.
1170 	 */
1171 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1172 	case PCIM_HDRTYPE_NORMAL:
1173 	case PCIM_HDRTYPE_BRIDGE:
1174 		ptr = PCIR_CAP_PTR;
1175 		break;
1176 	case PCIM_HDRTYPE_CARDBUS:
1177 		ptr = PCIR_CAP_PTR_2;
1178 		break;
1179 	default:
1180 		/* XXX: panic? */
1181 		return (ENXIO);		/* no extended capabilities support */
1182 	}
1183 	ptr = pci_read_config(child, ptr, 1);
1184 
1185 	/*
1186 	 * Traverse the capabilities list.
1187 	 */
1188 	while (ptr != 0) {
1189 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1190 			if (capreg != NULL)
1191 				*capreg = ptr;
1192 			return (0);
1193 		}
1194 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1195 	}
1196 
1197 	return (ENOENT);
1198 }
1199 
1200 /*
1201  * Support for MSI-X message interrupts.
1202  */
1203 void
1204 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1205 {
1206 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1207 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1208 	uint32_t offset;
1209 
1210 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1211 	offset = msix->msix_table_offset + index * 16;
1212 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1213 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1214 	bus_write_4(msix->msix_table_res, offset + 8, data);
1215 
1216 	/* Enable MSI -> HT mapping. */
1217 	pci_ht_map_msi(dev, address);
1218 }
1219 
1220 void
1221 pci_mask_msix(device_t dev, u_int index)
1222 {
1223 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1224 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1225 	uint32_t offset, val;
1226 
1227 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1228 	offset = msix->msix_table_offset + index * 16 + 12;
1229 	val = bus_read_4(msix->msix_table_res, offset);
1230 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1231 		val |= PCIM_MSIX_VCTRL_MASK;
1232 		bus_write_4(msix->msix_table_res, offset, val);
1233 	}
1234 }
1235 
1236 void
1237 pci_unmask_msix(device_t dev, u_int index)
1238 {
1239 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1240 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1241 	uint32_t offset, val;
1242 
1243 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1244 	offset = msix->msix_table_offset + index * 16 + 12;
1245 	val = bus_read_4(msix->msix_table_res, offset);
1246 	if (val & PCIM_MSIX_VCTRL_MASK) {
1247 		val &= ~PCIM_MSIX_VCTRL_MASK;
1248 		bus_write_4(msix->msix_table_res, offset, val);
1249 	}
1250 }
1251 
1252 int
1253 pci_pending_msix(device_t dev, u_int index)
1254 {
1255 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1256 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1257 	uint32_t offset, bit;
1258 
1259 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1260 	offset = msix->msix_pba_offset + (index / 32) * 4;
1261 	bit = 1 << index % 32;
1262 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1263 }
1264 
1265 /*
1266  * Restore MSI-X registers and table during resume.  If MSI-X is
1267  * enabled then walk the virtual table to restore the actual MSI-X
1268  * table.
1269  */
1270 static void
1271 pci_resume_msix(device_t dev)
1272 {
1273 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1274 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1275 	struct msix_table_entry *mte;
1276 	struct msix_vector *mv;
1277 	int i;
1278 
1279 	if (msix->msix_alloc > 0) {
1280 		/* First, mask all vectors. */
1281 		for (i = 0; i < msix->msix_msgnum; i++)
1282 			pci_mask_msix(dev, i);
1283 
1284 		/* Second, program any messages with at least one handler. */
1285 		for (i = 0; i < msix->msix_table_len; i++) {
1286 			mte = &msix->msix_table[i];
1287 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1288 				continue;
1289 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1290 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1291 			pci_unmask_msix(dev, i);
1292 		}
1293 	}
1294 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1295 	    msix->msix_ctrl, 2);
1296 }
1297 
1298 /*
1299  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1300  * returned in *count.  After this function returns, each message will be
1301  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1302  */
1303 int
1304 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1305 {
1306 	struct pci_devinfo *dinfo = device_get_ivars(child);
1307 	pcicfgregs *cfg = &dinfo->cfg;
1308 	struct resource_list_entry *rle;
1309 	int actual, error, i, irq, max;
1310 
1311 	/* Don't let count == 0 get us into trouble. */
1312 	if (*count == 0)
1313 		return (EINVAL);
1314 
1315 	/* If rid 0 is allocated, then fail. */
1316 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1317 	if (rle != NULL && rle->res != NULL)
1318 		return (ENXIO);
1319 
1320 	/* Already have allocated messages? */
1321 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1322 		return (ENXIO);
1323 
1324 	/* If MSI is blacklisted for this system, fail. */
1325 	if (pci_msi_blacklisted())
1326 		return (ENXIO);
1327 
1328 	/* MSI-X capability present? */
1329 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1330 		return (ENODEV);
1331 
1332 	/* Make sure the appropriate BARs are mapped. */
1333 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1334 	    cfg->msix.msix_table_bar);
1335 	if (rle == NULL || rle->res == NULL ||
1336 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1337 		return (ENXIO);
1338 	cfg->msix.msix_table_res = rle->res;
1339 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1340 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1341 		    cfg->msix.msix_pba_bar);
1342 		if (rle == NULL || rle->res == NULL ||
1343 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1344 			return (ENXIO);
1345 	}
1346 	cfg->msix.msix_pba_res = rle->res;
1347 
1348 	if (bootverbose)
1349 		device_printf(child,
1350 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1351 		    *count, cfg->msix.msix_msgnum);
1352 	max = min(*count, cfg->msix.msix_msgnum);
1353 	for (i = 0; i < max; i++) {
1354 		/* Allocate a message. */
1355 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1356 		if (error) {
1357 			if (i == 0)
1358 				return (error);
1359 			break;
1360 		}
1361 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1362 		    irq, 1);
1363 	}
1364 	actual = i;
1365 
1366 	if (bootverbose) {
1367 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1368 		if (actual == 1)
1369 			device_printf(child, "using IRQ %lu for MSI-X\n",
1370 			    rle->start);
1371 		else {
1372 			int run;
1373 
1374 			/*
1375 			 * Be fancy and try to print contiguous runs of
1376 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1377 			 * 'run' is true if we are in a range.
1378 			 */
1379 			device_printf(child, "using IRQs %lu", rle->start);
1380 			irq = rle->start;
1381 			run = 0;
1382 			for (i = 1; i < actual; i++) {
1383 				rle = resource_list_find(&dinfo->resources,
1384 				    SYS_RES_IRQ, i + 1);
1385 
1386 				/* Still in a run? */
1387 				if (rle->start == irq + 1) {
1388 					run = 1;
1389 					irq++;
1390 					continue;
1391 				}
1392 
1393 				/* Finish previous range. */
1394 				if (run) {
1395 					printf("-%d", irq);
1396 					run = 0;
1397 				}
1398 
1399 				/* Start new range. */
1400 				printf(",%lu", rle->start);
1401 				irq = rle->start;
1402 			}
1403 
1404 			/* Unfinished range? */
1405 			if (run)
1406 				printf("-%d", irq);
1407 			printf(" for MSI-X\n");
1408 		}
1409 	}
1410 
1411 	/* Mask all vectors. */
1412 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1413 		pci_mask_msix(child, i);
1414 
1415 	/* Allocate and initialize vector data and virtual table. */
1416 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1417 	    M_DEVBUF, M_WAITOK | M_ZERO);
1418 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1419 	    M_DEVBUF, M_WAITOK | M_ZERO);
1420 	for (i = 0; i < actual; i++) {
1421 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1422 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1423 		cfg->msix.msix_table[i].mte_vector = i + 1;
1424 	}
1425 
1426 	/* Update control register to enable MSI-X. */
1427 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1428 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1429 	    cfg->msix.msix_ctrl, 2);
1430 
1431 	/* Update counts of alloc'd messages. */
1432 	cfg->msix.msix_alloc = actual;
1433 	cfg->msix.msix_table_len = actual;
1434 	*count = actual;
1435 	return (0);
1436 }
1437 
1438 /*
1439  * By default, pci_alloc_msix() will assign the allocated IRQ
1440  * resources consecutively to the first N messages in the MSI-X table.
1441  * However, device drivers may want to use different layouts if they
1442  * either receive fewer messages than they asked for, or they wish to
1443  * populate the MSI-X table sparsely.  This method allows the driver
1444  * to specify what layout it wants.  It must be called after a
1445  * successful pci_alloc_msix() but before any of the associated
1446  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1447  *
1448  * The 'vectors' array contains 'count' message vectors.  The array
1449  * maps directly to the MSI-X table in that index 0 in the array
1450  * specifies the vector for the first message in the MSI-X table, etc.
1451  * The vector value in each array index can either be 0 to indicate
1452  * that no vector should be assigned to a message slot, or it can be a
1453  * number from 1 to N (where N is the count returned from a
1454  * succcessful call to pci_alloc_msix()) to indicate which message
1455  * vector (IRQ) to be used for the corresponding message.
1456  *
1457  * On successful return, each message with a non-zero vector will have
1458  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1459  * 1.  Additionally, if any of the IRQs allocated via the previous
1460  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1461  * will be freed back to the system automatically.
1462  *
1463  * For example, suppose a driver has a MSI-X table with 6 messages and
1464  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1465  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1466  * C.  After the call to pci_alloc_msix(), the device will be setup to
1467  * have an MSI-X table of ABC--- (where - means no vector assigned).
1468  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1469  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1470  * be freed back to the system.  This device will also have valid
1471  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1472  *
1473  * In any case, the SYS_RES_IRQ rid X will always map to the message
1474  * at MSI-X table index X - 1 and will only be valid if a vector is
1475  * assigned to that table entry.
1476  */
1477 int
1478 pci_remap_msix_method(device_t dev, device_t child, int count,
1479     const u_int *vectors)
1480 {
1481 	struct pci_devinfo *dinfo = device_get_ivars(child);
1482 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1483 	struct resource_list_entry *rle;
1484 	int i, irq, j, *used;
1485 
1486 	/*
1487 	 * Have to have at least one message in the table but the
1488 	 * table can't be bigger than the actual MSI-X table in the
1489 	 * device.
1490 	 */
1491 	if (count == 0 || count > msix->msix_msgnum)
1492 		return (EINVAL);
1493 
1494 	/* Sanity check the vectors. */
1495 	for (i = 0; i < count; i++)
1496 		if (vectors[i] > msix->msix_alloc)
1497 			return (EINVAL);
1498 
1499 	/*
1500 	 * Make sure there aren't any holes in the vectors to be used.
1501 	 * It's a big pain to support it, and it doesn't really make
1502 	 * sense anyway.  Also, at least one vector must be used.
1503 	 */
1504 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1505 	    M_ZERO);
1506 	for (i = 0; i < count; i++)
1507 		if (vectors[i] != 0)
1508 			used[vectors[i] - 1] = 1;
1509 	for (i = 0; i < msix->msix_alloc - 1; i++)
1510 		if (used[i] == 0 && used[i + 1] == 1) {
1511 			free(used, M_DEVBUF);
1512 			return (EINVAL);
1513 		}
1514 	if (used[0] != 1) {
1515 		free(used, M_DEVBUF);
1516 		return (EINVAL);
1517 	}
1518 
1519 	/* Make sure none of the resources are allocated. */
1520 	for (i = 0; i < msix->msix_table_len; i++) {
1521 		if (msix->msix_table[i].mte_vector == 0)
1522 			continue;
1523 		if (msix->msix_table[i].mte_handlers > 0)
1524 			return (EBUSY);
1525 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526 		KASSERT(rle != NULL, ("missing resource"));
1527 		if (rle->res != NULL)
1528 			return (EBUSY);
1529 	}
1530 
1531 	/* Free the existing resource list entries. */
1532 	for (i = 0; i < msix->msix_table_len; i++) {
1533 		if (msix->msix_table[i].mte_vector == 0)
1534 			continue;
1535 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1536 	}
1537 
1538 	/*
1539 	 * Build the new virtual table keeping track of which vectors are
1540 	 * used.
1541 	 */
1542 	free(msix->msix_table, M_DEVBUF);
1543 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1544 	    M_DEVBUF, M_WAITOK | M_ZERO);
1545 	for (i = 0; i < count; i++)
1546 		msix->msix_table[i].mte_vector = vectors[i];
1547 	msix->msix_table_len = count;
1548 
1549 	/* Free any unused IRQs and resize the vectors array if necessary. */
1550 	j = msix->msix_alloc - 1;
1551 	if (used[j] == 0) {
1552 		struct msix_vector *vec;
1553 
1554 		while (used[j] == 0) {
1555 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1556 			    msix->msix_vectors[j].mv_irq);
1557 			j--;
1558 		}
1559 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1560 		    M_WAITOK);
1561 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1562 		    (j + 1));
1563 		free(msix->msix_vectors, M_DEVBUF);
1564 		msix->msix_vectors = vec;
1565 		msix->msix_alloc = j + 1;
1566 	}
1567 	free(used, M_DEVBUF);
1568 
1569 	/* Map the IRQs onto the rids. */
1570 	for (i = 0; i < count; i++) {
1571 		if (vectors[i] == 0)
1572 			continue;
1573 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1574 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1575 		    irq, 1);
1576 	}
1577 
1578 	if (bootverbose) {
1579 		device_printf(child, "Remapped MSI-X IRQs as: ");
1580 		for (i = 0; i < count; i++) {
1581 			if (i != 0)
1582 				printf(", ");
1583 			if (vectors[i] == 0)
1584 				printf("---");
1585 			else
1586 				printf("%d",
1587 				    msix->msix_vectors[vectors[i]].mv_irq);
1588 		}
1589 		printf("\n");
1590 	}
1591 
1592 	return (0);
1593 }
1594 
1595 static int
1596 pci_release_msix(device_t dev, device_t child)
1597 {
1598 	struct pci_devinfo *dinfo = device_get_ivars(child);
1599 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1600 	struct resource_list_entry *rle;
1601 	int i;
1602 
1603 	/* Do we have any messages to release? */
1604 	if (msix->msix_alloc == 0)
1605 		return (ENODEV);
1606 
1607 	/* Make sure none of the resources are allocated. */
1608 	for (i = 0; i < msix->msix_table_len; i++) {
1609 		if (msix->msix_table[i].mte_vector == 0)
1610 			continue;
1611 		if (msix->msix_table[i].mte_handlers > 0)
1612 			return (EBUSY);
1613 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1614 		KASSERT(rle != NULL, ("missing resource"));
1615 		if (rle->res != NULL)
1616 			return (EBUSY);
1617 	}
1618 
1619 	/* Update control register to disable MSI-X. */
1620 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1621 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1622 	    msix->msix_ctrl, 2);
1623 
1624 	/* Free the resource list entries. */
1625 	for (i = 0; i < msix->msix_table_len; i++) {
1626 		if (msix->msix_table[i].mte_vector == 0)
1627 			continue;
1628 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1629 	}
1630 	free(msix->msix_table, M_DEVBUF);
1631 	msix->msix_table_len = 0;
1632 
1633 	/* Release the IRQs. */
1634 	for (i = 0; i < msix->msix_alloc; i++)
1635 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1636 		    msix->msix_vectors[i].mv_irq);
1637 	free(msix->msix_vectors, M_DEVBUF);
1638 	msix->msix_alloc = 0;
1639 	return (0);
1640 }
1641 
1642 /*
1643  * Return the max supported MSI-X messages this device supports.
1644  * Basically, assuming the MD code can alloc messages, this function
1645  * should return the maximum value that pci_alloc_msix() can return.
1646  * Thus, it is subject to the tunables, etc.
1647  */
1648 int
1649 pci_msix_count_method(device_t dev, device_t child)
1650 {
1651 	struct pci_devinfo *dinfo = device_get_ivars(child);
1652 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1653 
1654 	if (pci_do_msix && msix->msix_location != 0)
1655 		return (msix->msix_msgnum);
1656 	return (0);
1657 }
1658 
1659 /*
1660  * HyperTransport MSI mapping control
1661  */
1662 void
1663 pci_ht_map_msi(device_t dev, uint64_t addr)
1664 {
1665 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1666 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1667 
1668 	if (!ht->ht_msimap)
1669 		return;
1670 
1671 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1672 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1673 		/* Enable MSI -> HT mapping. */
1674 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1675 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1676 		    ht->ht_msictrl, 2);
1677 	}
1678 
1679 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1680 		/* Disable MSI -> HT mapping. */
1681 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1682 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1683 		    ht->ht_msictrl, 2);
1684 	}
1685 }
1686 
1687 int
1688 pci_get_max_read_req(device_t dev)
1689 {
1690 	int cap;
1691 	uint16_t val;
1692 
1693 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1694 		return (0);
1695 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1696 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1697 	val >>= 12;
1698 	return (1 << (val + 7));
1699 }
1700 
1701 int
1702 pci_set_max_read_req(device_t dev, int size)
1703 {
1704 	int cap;
1705 	uint16_t val;
1706 
1707 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1708 		return (0);
1709 	if (size < 128)
1710 		size = 128;
1711 	if (size > 4096)
1712 		size = 4096;
1713 	size = (1 << (fls(size) - 1));
1714 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1715 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1716 	val |= (fls(size) - 8) << 12;
1717 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1718 	return (size);
1719 }
1720 
1721 /*
1722  * Support for MSI message signalled interrupts.
1723  */
1724 void
1725 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1726 {
1727 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1728 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1729 
1730 	/* Write data and address values. */
1731 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1732 	    address & 0xffffffff, 4);
1733 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1734 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1735 		    address >> 32, 4);
1736 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1737 		    data, 2);
1738 	} else
1739 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1740 		    2);
1741 
1742 	/* Enable MSI in the control register. */
1743 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1744 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1745 	    2);
1746 
1747 	/* Enable MSI -> HT mapping. */
1748 	pci_ht_map_msi(dev, address);
1749 }
1750 
1751 void
1752 pci_disable_msi(device_t dev)
1753 {
1754 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1755 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1756 
1757 	/* Disable MSI -> HT mapping. */
1758 	pci_ht_map_msi(dev, 0);
1759 
1760 	/* Disable MSI in the control register. */
1761 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1762 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1763 	    2);
1764 }
1765 
1766 /*
1767  * Restore MSI registers during resume.  If MSI is enabled then
1768  * restore the data and address registers in addition to the control
1769  * register.
1770  */
1771 static void
1772 pci_resume_msi(device_t dev)
1773 {
1774 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1775 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1776 	uint64_t address;
1777 	uint16_t data;
1778 
1779 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1780 		address = msi->msi_addr;
1781 		data = msi->msi_data;
1782 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1783 		    address & 0xffffffff, 4);
1784 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1785 			pci_write_config(dev, msi->msi_location +
1786 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1787 			pci_write_config(dev, msi->msi_location +
1788 			    PCIR_MSI_DATA_64BIT, data, 2);
1789 		} else
1790 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1791 			    data, 2);
1792 	}
1793 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1794 	    2);
1795 }
1796 
1797 static int
1798 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1799 {
1800 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1801 	pcicfgregs *cfg = &dinfo->cfg;
1802 	struct resource_list_entry *rle;
1803 	struct msix_table_entry *mte;
1804 	struct msix_vector *mv;
1805 	uint64_t addr;
1806 	uint32_t data;
1807 	int error, i, j;
1808 
1809 	/*
1810 	 * Handle MSI first.  We try to find this IRQ among our list
1811 	 * of MSI IRQs.  If we find it, we request updated address and
1812 	 * data registers and apply the results.
1813 	 */
1814 	if (cfg->msi.msi_alloc > 0) {
1815 
1816 		/* If we don't have any active handlers, nothing to do. */
1817 		if (cfg->msi.msi_handlers == 0)
1818 			return (0);
1819 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1820 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1821 			    i + 1);
1822 			if (rle->start == irq) {
1823 				error = PCIB_MAP_MSI(device_get_parent(bus),
1824 				    dev, irq, &addr, &data);
1825 				if (error)
1826 					return (error);
1827 				pci_disable_msi(dev);
1828 				dinfo->cfg.msi.msi_addr = addr;
1829 				dinfo->cfg.msi.msi_data = data;
1830 				pci_enable_msi(dev, addr, data);
1831 				return (0);
1832 			}
1833 		}
1834 		return (ENOENT);
1835 	}
1836 
1837 	/*
1838 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1839 	 * we request the updated mapping info.  If that works, we go
1840 	 * through all the slots that use this IRQ and update them.
1841 	 */
1842 	if (cfg->msix.msix_alloc > 0) {
1843 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1844 			mv = &cfg->msix.msix_vectors[i];
1845 			if (mv->mv_irq == irq) {
1846 				error = PCIB_MAP_MSI(device_get_parent(bus),
1847 				    dev, irq, &addr, &data);
1848 				if (error)
1849 					return (error);
1850 				mv->mv_address = addr;
1851 				mv->mv_data = data;
1852 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1853 					mte = &cfg->msix.msix_table[j];
1854 					if (mte->mte_vector != i + 1)
1855 						continue;
1856 					if (mte->mte_handlers == 0)
1857 						continue;
1858 					pci_mask_msix(dev, j);
1859 					pci_enable_msix(dev, j, addr, data);
1860 					pci_unmask_msix(dev, j);
1861 				}
1862 			}
1863 		}
1864 		return (ENOENT);
1865 	}
1866 
1867 	return (ENOENT);
1868 }
1869 
1870 /*
1871  * Returns true if the specified device is blacklisted because MSI
1872  * doesn't work.
1873  */
1874 int
1875 pci_msi_device_blacklisted(device_t dev)
1876 {
1877 	struct pci_quirk *q;
1878 
1879 	if (!pci_honor_msi_blacklist)
1880 		return (0);
1881 
1882 	for (q = &pci_quirks[0]; q->devid; q++) {
1883 		if (q->devid == pci_get_devid(dev) &&
1884 		    q->type == PCI_QUIRK_DISABLE_MSI)
1885 			return (1);
1886 	}
1887 	return (0);
1888 }
1889 
1890 /*
1891  * Returns true if a specified chipset supports MSI when it is
1892  * emulated hardware in a virtual machine.
1893  */
1894 static int
1895 pci_msi_vm_chipset(device_t dev)
1896 {
1897 	struct pci_quirk *q;
1898 
1899 	for (q = &pci_quirks[0]; q->devid; q++) {
1900 		if (q->devid == pci_get_devid(dev) &&
1901 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1902 			return (1);
1903 	}
1904 	return (0);
1905 }
1906 
1907 /*
1908  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1909  * we just check for blacklisted chipsets as represented by the
1910  * host-PCI bridge at device 0:0:0.  In the future, it may become
1911  * necessary to check other system attributes, such as the kenv values
1912  * that give the motherboard manufacturer and model number.
1913  */
1914 static int
1915 pci_msi_blacklisted(void)
1916 {
1917 	device_t dev;
1918 
1919 	if (!pci_honor_msi_blacklist)
1920 		return (0);
1921 
1922 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1923 	if (!(pcie_chipset || pcix_chipset)) {
1924 		if (vm_guest != VM_GUEST_NO) {
1925 			dev = pci_find_bsf(0, 0, 0);
1926 			if (dev != NULL)
1927 				return (pci_msi_vm_chipset(dev) == 0);
1928 		}
1929 		return (1);
1930 	}
1931 
1932 	dev = pci_find_bsf(0, 0, 0);
1933 	if (dev != NULL)
1934 		return (pci_msi_device_blacklisted(dev));
1935 	return (0);
1936 }
1937 
1938 /*
1939  * Attempt to allocate *count MSI messages.  The actual number allocated is
1940  * returned in *count.  After this function returns, each message will be
1941  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1942  */
1943 int
1944 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1945 {
1946 	struct pci_devinfo *dinfo = device_get_ivars(child);
1947 	pcicfgregs *cfg = &dinfo->cfg;
1948 	struct resource_list_entry *rle;
1949 	int actual, error, i, irqs[32];
1950 	uint16_t ctrl;
1951 
1952 	/* Don't let count == 0 get us into trouble. */
1953 	if (*count == 0)
1954 		return (EINVAL);
1955 
1956 	/* If rid 0 is allocated, then fail. */
1957 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1958 	if (rle != NULL && rle->res != NULL)
1959 		return (ENXIO);
1960 
1961 	/* Already have allocated messages? */
1962 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1963 		return (ENXIO);
1964 
1965 	/* If MSI is blacklisted for this system, fail. */
1966 	if (pci_msi_blacklisted())
1967 		return (ENXIO);
1968 
1969 	/* MSI capability present? */
1970 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1971 		return (ENODEV);
1972 
1973 	if (bootverbose)
1974 		device_printf(child,
1975 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1976 		    *count, cfg->msi.msi_msgnum);
1977 
1978 	/* Don't ask for more than the device supports. */
1979 	actual = min(*count, cfg->msi.msi_msgnum);
1980 
1981 	/* Don't ask for more than 32 messages. */
1982 	actual = min(actual, 32);
1983 
1984 	/* MSI requires power of 2 number of messages. */
1985 	if (!powerof2(actual))
1986 		return (EINVAL);
1987 
1988 	for (;;) {
1989 		/* Try to allocate N messages. */
1990 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1991 		    actual, irqs);
1992 		if (error == 0)
1993 			break;
1994 		if (actual == 1)
1995 			return (error);
1996 
1997 		/* Try N / 2. */
1998 		actual >>= 1;
1999 	}
2000 
2001 	/*
2002 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2003 	 * resources in the irqs[] array, so add new resources
2004 	 * starting at rid 1.
2005 	 */
2006 	for (i = 0; i < actual; i++)
2007 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2008 		    irqs[i], irqs[i], 1);
2009 
2010 	if (bootverbose) {
2011 		if (actual == 1)
2012 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2013 		else {
2014 			int run;
2015 
2016 			/*
2017 			 * Be fancy and try to print contiguous runs
2018 			 * of IRQ values as ranges.  'run' is true if
2019 			 * we are in a range.
2020 			 */
2021 			device_printf(child, "using IRQs %d", irqs[0]);
2022 			run = 0;
2023 			for (i = 1; i < actual; i++) {
2024 
2025 				/* Still in a run? */
2026 				if (irqs[i] == irqs[i - 1] + 1) {
2027 					run = 1;
2028 					continue;
2029 				}
2030 
2031 				/* Finish previous range. */
2032 				if (run) {
2033 					printf("-%d", irqs[i - 1]);
2034 					run = 0;
2035 				}
2036 
2037 				/* Start new range. */
2038 				printf(",%d", irqs[i]);
2039 			}
2040 
2041 			/* Unfinished range? */
2042 			if (run)
2043 				printf("-%d", irqs[actual - 1]);
2044 			printf(" for MSI\n");
2045 		}
2046 	}
2047 
2048 	/* Update control register with actual count. */
2049 	ctrl = cfg->msi.msi_ctrl;
2050 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2051 	ctrl |= (ffs(actual) - 1) << 4;
2052 	cfg->msi.msi_ctrl = ctrl;
2053 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2054 
2055 	/* Update counts of alloc'd messages. */
2056 	cfg->msi.msi_alloc = actual;
2057 	cfg->msi.msi_handlers = 0;
2058 	*count = actual;
2059 	return (0);
2060 }
2061 
2062 /* Release the MSI messages associated with this device. */
2063 int
2064 pci_release_msi_method(device_t dev, device_t child)
2065 {
2066 	struct pci_devinfo *dinfo = device_get_ivars(child);
2067 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2068 	struct resource_list_entry *rle;
2069 	int error, i, irqs[32];
2070 
2071 	/* Try MSI-X first. */
2072 	error = pci_release_msix(dev, child);
2073 	if (error != ENODEV)
2074 		return (error);
2075 
2076 	/* Do we have any messages to release? */
2077 	if (msi->msi_alloc == 0)
2078 		return (ENODEV);
2079 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2080 
2081 	/* Make sure none of the resources are allocated. */
2082 	if (msi->msi_handlers > 0)
2083 		return (EBUSY);
2084 	for (i = 0; i < msi->msi_alloc; i++) {
2085 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2086 		KASSERT(rle != NULL, ("missing MSI resource"));
2087 		if (rle->res != NULL)
2088 			return (EBUSY);
2089 		irqs[i] = rle->start;
2090 	}
2091 
2092 	/* Update control register with 0 count. */
2093 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2094 	    ("%s: MSI still enabled", __func__));
2095 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2096 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2097 	    msi->msi_ctrl, 2);
2098 
2099 	/* Release the messages. */
2100 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2101 	for (i = 0; i < msi->msi_alloc; i++)
2102 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2103 
2104 	/* Update alloc count. */
2105 	msi->msi_alloc = 0;
2106 	msi->msi_addr = 0;
2107 	msi->msi_data = 0;
2108 	return (0);
2109 }
2110 
2111 /*
2112  * Return the max supported MSI messages this device supports.
2113  * Basically, assuming the MD code can alloc messages, this function
2114  * should return the maximum value that pci_alloc_msi() can return.
2115  * Thus, it is subject to the tunables, etc.
2116  */
2117 int
2118 pci_msi_count_method(device_t dev, device_t child)
2119 {
2120 	struct pci_devinfo *dinfo = device_get_ivars(child);
2121 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2122 
2123 	if (pci_do_msi && msi->msi_location != 0)
2124 		return (msi->msi_msgnum);
2125 	return (0);
2126 }
2127 
2128 /* free pcicfgregs structure and all depending data structures */
2129 
2130 int
2131 pci_freecfg(struct pci_devinfo *dinfo)
2132 {
2133 	struct devlist *devlist_head;
2134 	struct pci_map *pm, *next;
2135 	int i;
2136 
2137 	devlist_head = &pci_devq;
2138 
2139 	if (dinfo->cfg.vpd.vpd_reg) {
2140 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2141 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2142 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2143 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2144 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2145 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2146 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2147 	}
2148 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2149 		free(pm, M_DEVBUF);
2150 	}
2151 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2152 	free(dinfo, M_DEVBUF);
2153 
2154 	/* increment the generation count */
2155 	pci_generation++;
2156 
2157 	/* we're losing one device */
2158 	pci_numdevs--;
2159 	return (0);
2160 }
2161 
2162 /*
2163  * PCI power manangement
2164  */
2165 int
2166 pci_set_powerstate_method(device_t dev, device_t child, int state)
2167 {
2168 	struct pci_devinfo *dinfo = device_get_ivars(child);
2169 	pcicfgregs *cfg = &dinfo->cfg;
2170 	uint16_t status;
2171 	int result, oldstate, highest, delay;
2172 
2173 	if (cfg->pp.pp_cap == 0)
2174 		return (EOPNOTSUPP);
2175 
2176 	/*
2177 	 * Optimize a no state change request away.  While it would be OK to
2178 	 * write to the hardware in theory, some devices have shown odd
2179 	 * behavior when going from D3 -> D3.
2180 	 */
2181 	oldstate = pci_get_powerstate(child);
2182 	if (oldstate == state)
2183 		return (0);
2184 
2185 	/*
2186 	 * The PCI power management specification states that after a state
2187 	 * transition between PCI power states, system software must
2188 	 * guarantee a minimal delay before the function accesses the device.
2189 	 * Compute the worst case delay that we need to guarantee before we
2190 	 * access the device.  Many devices will be responsive much more
2191 	 * quickly than this delay, but there are some that don't respond
2192 	 * instantly to state changes.  Transitions to/from D3 state require
2193 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2194 	 * is done below with DELAY rather than a sleeper function because
2195 	 * this function can be called from contexts where we cannot sleep.
2196 	 */
2197 	highest = (oldstate > state) ? oldstate : state;
2198 	if (highest == PCI_POWERSTATE_D3)
2199 	    delay = 10000;
2200 	else if (highest == PCI_POWERSTATE_D2)
2201 	    delay = 200;
2202 	else
2203 	    delay = 0;
2204 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2205 	    & ~PCIM_PSTAT_DMASK;
2206 	result = 0;
2207 	switch (state) {
2208 	case PCI_POWERSTATE_D0:
2209 		status |= PCIM_PSTAT_D0;
2210 		break;
2211 	case PCI_POWERSTATE_D1:
2212 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2213 			return (EOPNOTSUPP);
2214 		status |= PCIM_PSTAT_D1;
2215 		break;
2216 	case PCI_POWERSTATE_D2:
2217 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2218 			return (EOPNOTSUPP);
2219 		status |= PCIM_PSTAT_D2;
2220 		break;
2221 	case PCI_POWERSTATE_D3:
2222 		status |= PCIM_PSTAT_D3;
2223 		break;
2224 	default:
2225 		return (EINVAL);
2226 	}
2227 
2228 	if (bootverbose)
2229 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2230 		    state);
2231 
2232 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2233 	if (delay)
2234 		DELAY(delay);
2235 	return (0);
2236 }
2237 
2238 int
2239 pci_get_powerstate_method(device_t dev, device_t child)
2240 {
2241 	struct pci_devinfo *dinfo = device_get_ivars(child);
2242 	pcicfgregs *cfg = &dinfo->cfg;
2243 	uint16_t status;
2244 	int result;
2245 
2246 	if (cfg->pp.pp_cap != 0) {
2247 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2248 		switch (status & PCIM_PSTAT_DMASK) {
2249 		case PCIM_PSTAT_D0:
2250 			result = PCI_POWERSTATE_D0;
2251 			break;
2252 		case PCIM_PSTAT_D1:
2253 			result = PCI_POWERSTATE_D1;
2254 			break;
2255 		case PCIM_PSTAT_D2:
2256 			result = PCI_POWERSTATE_D2;
2257 			break;
2258 		case PCIM_PSTAT_D3:
2259 			result = PCI_POWERSTATE_D3;
2260 			break;
2261 		default:
2262 			result = PCI_POWERSTATE_UNKNOWN;
2263 			break;
2264 		}
2265 	} else {
2266 		/* No support, device is always at D0 */
2267 		result = PCI_POWERSTATE_D0;
2268 	}
2269 	return (result);
2270 }
2271 
2272 /*
2273  * Some convenience functions for PCI device drivers.
2274  */
2275 
2276 static __inline void
2277 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2278 {
2279 	uint16_t	command;
2280 
2281 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2282 	command |= bit;
2283 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2284 }
2285 
2286 static __inline void
2287 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2288 {
2289 	uint16_t	command;
2290 
2291 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2292 	command &= ~bit;
2293 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2294 }
2295 
2296 int
2297 pci_enable_busmaster_method(device_t dev, device_t child)
2298 {
2299 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2300 	return (0);
2301 }
2302 
2303 int
2304 pci_disable_busmaster_method(device_t dev, device_t child)
2305 {
2306 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2307 	return (0);
2308 }
2309 
2310 int
2311 pci_enable_io_method(device_t dev, device_t child, int space)
2312 {
2313 	uint16_t bit;
2314 
2315 	switch(space) {
2316 	case SYS_RES_IOPORT:
2317 		bit = PCIM_CMD_PORTEN;
2318 		break;
2319 	case SYS_RES_MEMORY:
2320 		bit = PCIM_CMD_MEMEN;
2321 		break;
2322 	default:
2323 		return (EINVAL);
2324 	}
2325 	pci_set_command_bit(dev, child, bit);
2326 	return (0);
2327 }
2328 
2329 int
2330 pci_disable_io_method(device_t dev, device_t child, int space)
2331 {
2332 	uint16_t bit;
2333 
2334 	switch(space) {
2335 	case SYS_RES_IOPORT:
2336 		bit = PCIM_CMD_PORTEN;
2337 		break;
2338 	case SYS_RES_MEMORY:
2339 		bit = PCIM_CMD_MEMEN;
2340 		break;
2341 	default:
2342 		return (EINVAL);
2343 	}
2344 	pci_clear_command_bit(dev, child, bit);
2345 	return (0);
2346 }
2347 
2348 /*
2349  * New style pci driver.  Parent device is either a pci-host-bridge or a
2350  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2351  */
2352 
2353 void
2354 pci_print_verbose(struct pci_devinfo *dinfo)
2355 {
2356 
2357 	if (bootverbose) {
2358 		pcicfgregs *cfg = &dinfo->cfg;
2359 
2360 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2361 		    cfg->vendor, cfg->device, cfg->revid);
2362 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2363 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2364 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2365 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2366 		    cfg->mfdev);
2367 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2368 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2369 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2370 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2371 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2372 		if (cfg->intpin > 0)
2373 			printf("\tintpin=%c, irq=%d\n",
2374 			    cfg->intpin +'a' -1, cfg->intline);
2375 		if (cfg->pp.pp_cap) {
2376 			uint16_t status;
2377 
2378 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2379 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2380 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2381 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2382 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2383 			    status & PCIM_PSTAT_DMASK);
2384 		}
2385 		if (cfg->msi.msi_location) {
2386 			int ctrl;
2387 
2388 			ctrl = cfg->msi.msi_ctrl;
2389 			printf("\tMSI supports %d message%s%s%s\n",
2390 			    cfg->msi.msi_msgnum,
2391 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2392 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2393 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2394 		}
2395 		if (cfg->msix.msix_location) {
2396 			printf("\tMSI-X supports %d message%s ",
2397 			    cfg->msix.msix_msgnum,
2398 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2399 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2400 				printf("in map 0x%x\n",
2401 				    cfg->msix.msix_table_bar);
2402 			else
2403 				printf("in maps 0x%x and 0x%x\n",
2404 				    cfg->msix.msix_table_bar,
2405 				    cfg->msix.msix_pba_bar);
2406 		}
2407 	}
2408 }
2409 
2410 static int
2411 pci_porten(device_t dev)
2412 {
2413 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2414 }
2415 
2416 static int
2417 pci_memen(device_t dev)
2418 {
2419 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2420 }
2421 
2422 static void
2423 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2424 {
2425 	struct pci_devinfo *dinfo;
2426 	pci_addr_t map, testval;
2427 	int ln2range;
2428 	uint16_t cmd;
2429 
2430 	/*
2431 	 * The device ROM BAR is special.  It is always a 32-bit
2432 	 * memory BAR.  Bit 0 is special and should not be set when
2433 	 * sizing the BAR.
2434 	 */
2435 	dinfo = device_get_ivars(dev);
2436 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2437 		map = pci_read_config(dev, reg, 4);
2438 		pci_write_config(dev, reg, 0xfffffffe, 4);
2439 		testval = pci_read_config(dev, reg, 4);
2440 		pci_write_config(dev, reg, map, 4);
2441 		*mapp = map;
2442 		*testvalp = testval;
2443 		return;
2444 	}
2445 
2446 	map = pci_read_config(dev, reg, 4);
2447 	ln2range = pci_maprange(map);
2448 	if (ln2range == 64)
2449 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2450 
2451 	/*
2452 	 * Disable decoding via the command register before
2453 	 * determining the BAR's length since we will be placing it in
2454 	 * a weird state.
2455 	 */
2456 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2457 	pci_write_config(dev, PCIR_COMMAND,
2458 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2459 
2460 	/*
2461 	 * Determine the BAR's length by writing all 1's.  The bottom
2462 	 * log_2(size) bits of the BAR will stick as 0 when we read
2463 	 * the value back.
2464 	 */
2465 	pci_write_config(dev, reg, 0xffffffff, 4);
2466 	testval = pci_read_config(dev, reg, 4);
2467 	if (ln2range == 64) {
2468 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2469 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2470 	}
2471 
2472 	/*
2473 	 * Restore the original value of the BAR.  We may have reprogrammed
2474 	 * the BAR of the low-level console device and when booting verbose,
2475 	 * we need the console device addressable.
2476 	 */
2477 	pci_write_config(dev, reg, map, 4);
2478 	if (ln2range == 64)
2479 		pci_write_config(dev, reg + 4, map >> 32, 4);
2480 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2481 
2482 	*mapp = map;
2483 	*testvalp = testval;
2484 }
2485 
2486 static void
2487 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2488 {
2489 	struct pci_devinfo *dinfo;
2490 	int ln2range;
2491 
2492 	/* The device ROM BAR is always a 32-bit memory BAR. */
2493 	dinfo = device_get_ivars(dev);
2494 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2495 		ln2range = 32;
2496 	else
2497 		ln2range = pci_maprange(pm->pm_value);
2498 	pci_write_config(dev, pm->pm_reg, base, 4);
2499 	if (ln2range == 64)
2500 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2501 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2502 	if (ln2range == 64)
2503 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2504 		    pm->pm_reg + 4, 4) << 32;
2505 }
2506 
2507 struct pci_map *
2508 pci_find_bar(device_t dev, int reg)
2509 {
2510 	struct pci_devinfo *dinfo;
2511 	struct pci_map *pm;
2512 
2513 	dinfo = device_get_ivars(dev);
2514 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2515 		if (pm->pm_reg == reg)
2516 			return (pm);
2517 	}
2518 	return (NULL);
2519 }
2520 
2521 int
2522 pci_bar_enabled(device_t dev, struct pci_map *pm)
2523 {
2524 	struct pci_devinfo *dinfo;
2525 	uint16_t cmd;
2526 
2527 	dinfo = device_get_ivars(dev);
2528 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2529 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2530 		return (0);
2531 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2532 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2533 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2534 	else
2535 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2536 }
2537 
2538 static struct pci_map *
2539 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2540 {
2541 	struct pci_devinfo *dinfo;
2542 	struct pci_map *pm, *prev;
2543 
2544 	dinfo = device_get_ivars(dev);
2545 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2546 	pm->pm_reg = reg;
2547 	pm->pm_value = value;
2548 	pm->pm_size = size;
2549 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2550 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2551 		    reg));
2552 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2553 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2554 			break;
2555 	}
2556 	if (prev != NULL)
2557 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2558 	else
2559 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2560 	return (pm);
2561 }
2562 
2563 static void
2564 pci_restore_bars(device_t dev)
2565 {
2566 	struct pci_devinfo *dinfo;
2567 	struct pci_map *pm;
2568 	int ln2range;
2569 
2570 	dinfo = device_get_ivars(dev);
2571 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2572 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2573 			ln2range = 32;
2574 		else
2575 			ln2range = pci_maprange(pm->pm_value);
2576 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2577 		if (ln2range == 64)
2578 			pci_write_config(dev, pm->pm_reg + 4,
2579 			    pm->pm_value >> 32, 4);
2580 	}
2581 }
2582 
2583 /*
2584  * Add a resource based on a pci map register. Return 1 if the map
2585  * register is a 32bit map register or 2 if it is a 64bit register.
2586  */
2587 static int
2588 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2589     int force, int prefetch)
2590 {
2591 	struct pci_map *pm;
2592 	pci_addr_t base, map, testval;
2593 	pci_addr_t start, end, count;
2594 	int barlen, basezero, maprange, mapsize, type;
2595 	uint16_t cmd;
2596 	struct resource *res;
2597 
2598 	/*
2599 	 * The BAR may already exist if the device is a CardBus card
2600 	 * whose CIS is stored in this BAR.
2601 	 */
2602 	pm = pci_find_bar(dev, reg);
2603 	if (pm != NULL) {
2604 		maprange = pci_maprange(pm->pm_value);
2605 		barlen = maprange == 64 ? 2 : 1;
2606 		return (barlen);
2607 	}
2608 
2609 	pci_read_bar(dev, reg, &map, &testval);
2610 	if (PCI_BAR_MEM(map)) {
2611 		type = SYS_RES_MEMORY;
2612 		if (map & PCIM_BAR_MEM_PREFETCH)
2613 			prefetch = 1;
2614 	} else
2615 		type = SYS_RES_IOPORT;
2616 	mapsize = pci_mapsize(testval);
2617 	base = pci_mapbase(map);
2618 #ifdef __PCI_BAR_ZERO_VALID
2619 	basezero = 0;
2620 #else
2621 	basezero = base == 0;
2622 #endif
2623 	maprange = pci_maprange(map);
2624 	barlen = maprange == 64 ? 2 : 1;
2625 
2626 	/*
2627 	 * For I/O registers, if bottom bit is set, and the next bit up
2628 	 * isn't clear, we know we have a BAR that doesn't conform to the
2629 	 * spec, so ignore it.  Also, sanity check the size of the data
2630 	 * areas to the type of memory involved.  Memory must be at least
2631 	 * 16 bytes in size, while I/O ranges must be at least 4.
2632 	 */
2633 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2634 		return (barlen);
2635 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2636 	    (type == SYS_RES_IOPORT && mapsize < 2))
2637 		return (barlen);
2638 
2639 	/* Save a record of this BAR. */
2640 	pm = pci_add_bar(dev, reg, map, mapsize);
2641 	if (bootverbose) {
2642 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2643 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2644 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2645 			printf(", port disabled\n");
2646 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2647 			printf(", memory disabled\n");
2648 		else
2649 			printf(", enabled\n");
2650 	}
2651 
2652 	/*
2653 	 * If base is 0, then we have problems if this architecture does
2654 	 * not allow that.  It is best to ignore such entries for the
2655 	 * moment.  These will be allocated later if the driver specifically
2656 	 * requests them.  However, some removable busses look better when
2657 	 * all resources are allocated, so allow '0' to be overriden.
2658 	 *
2659 	 * Similarly treat maps whose values is the same as the test value
2660 	 * read back.  These maps have had all f's written to them by the
2661 	 * BIOS in an attempt to disable the resources.
2662 	 */
2663 	if (!force && (basezero || map == testval))
2664 		return (barlen);
2665 	if ((u_long)base != base) {
2666 		device_printf(bus,
2667 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2668 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2669 		    pci_get_function(dev), reg);
2670 		return (barlen);
2671 	}
2672 
2673 	/*
2674 	 * This code theoretically does the right thing, but has
2675 	 * undesirable side effects in some cases where peripherals
2676 	 * respond oddly to having these bits enabled.  Let the user
2677 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2678 	 * default).
2679 	 */
2680 	if (pci_enable_io_modes) {
2681 		/* Turn on resources that have been left off by a lazy BIOS */
2682 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2683 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2684 			cmd |= PCIM_CMD_PORTEN;
2685 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2686 		}
2687 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2688 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2689 			cmd |= PCIM_CMD_MEMEN;
2690 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2691 		}
2692 	} else {
2693 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2694 			return (barlen);
2695 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2696 			return (barlen);
2697 	}
2698 
2699 	count = (pci_addr_t)1 << mapsize;
2700 	if (basezero || base == pci_mapbase(testval)) {
2701 		start = 0;	/* Let the parent decide. */
2702 		end = ~0ul;
2703 	} else {
2704 		start = base;
2705 		end = base + count - 1;
2706 	}
2707 	resource_list_add(rl, type, reg, start, end, count);
2708 
2709 	/*
2710 	 * Try to allocate the resource for this BAR from our parent
2711 	 * so that this resource range is already reserved.  The
2712 	 * driver for this device will later inherit this resource in
2713 	 * pci_alloc_resource().
2714 	 */
2715 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2716 	    prefetch ? RF_PREFETCHABLE : 0);
2717 	if (res == NULL) {
2718 		/*
2719 		 * If the allocation fails, clear the BAR and delete
2720 		 * the resource list entry to force
2721 		 * pci_alloc_resource() to allocate resources from the
2722 		 * parent.
2723 		 */
2724 		resource_list_delete(rl, type, reg);
2725 		start = 0;
2726 	} else
2727 		start = rman_get_start(res);
2728 	pci_write_bar(dev, pm, start);
2729 	return (barlen);
2730 }
2731 
2732 /*
2733  * For ATA devices we need to decide early what addressing mode to use.
2734  * Legacy demands that the primary and secondary ATA ports sits on the
2735  * same addresses that old ISA hardware did. This dictates that we use
2736  * those addresses and ignore the BAR's if we cannot set PCI native
2737  * addressing mode.
2738  */
2739 static void
2740 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2741     uint32_t prefetchmask)
2742 {
2743 	struct resource *r;
2744 	int rid, type, progif;
2745 #if 0
2746 	/* if this device supports PCI native addressing use it */
2747 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2748 	if ((progif & 0x8a) == 0x8a) {
2749 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2750 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2751 			printf("Trying ATA native PCI addressing mode\n");
2752 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2753 		}
2754 	}
2755 #endif
2756 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2757 	type = SYS_RES_IOPORT;
2758 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2759 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2760 		    prefetchmask & (1 << 0));
2761 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2762 		    prefetchmask & (1 << 1));
2763 	} else {
2764 		rid = PCIR_BAR(0);
2765 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2766 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2767 		    0x1f7, 8, 0);
2768 		rid = PCIR_BAR(1);
2769 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2770 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2771 		    0x3f6, 1, 0);
2772 	}
2773 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2774 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2775 		    prefetchmask & (1 << 2));
2776 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2777 		    prefetchmask & (1 << 3));
2778 	} else {
2779 		rid = PCIR_BAR(2);
2780 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2781 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2782 		    0x177, 8, 0);
2783 		rid = PCIR_BAR(3);
2784 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2785 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2786 		    0x376, 1, 0);
2787 	}
2788 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2789 	    prefetchmask & (1 << 4));
2790 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2791 	    prefetchmask & (1 << 5));
2792 }
2793 
2794 static void
2795 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2796 {
2797 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2798 	pcicfgregs *cfg = &dinfo->cfg;
2799 	char tunable_name[64];
2800 	int irq;
2801 
2802 	/* Has to have an intpin to have an interrupt. */
2803 	if (cfg->intpin == 0)
2804 		return;
2805 
2806 	/* Let the user override the IRQ with a tunable. */
2807 	irq = PCI_INVALID_IRQ;
2808 	snprintf(tunable_name, sizeof(tunable_name),
2809 	    "hw.pci%d.%d.%d.INT%c.irq",
2810 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2811 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2812 		irq = PCI_INVALID_IRQ;
2813 
2814 	/*
2815 	 * If we didn't get an IRQ via the tunable, then we either use the
2816 	 * IRQ value in the intline register or we ask the bus to route an
2817 	 * interrupt for us.  If force_route is true, then we only use the
2818 	 * value in the intline register if the bus was unable to assign an
2819 	 * IRQ.
2820 	 */
2821 	if (!PCI_INTERRUPT_VALID(irq)) {
2822 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2823 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2824 		if (!PCI_INTERRUPT_VALID(irq))
2825 			irq = cfg->intline;
2826 	}
2827 
2828 	/* If after all that we don't have an IRQ, just bail. */
2829 	if (!PCI_INTERRUPT_VALID(irq))
2830 		return;
2831 
2832 	/* Update the config register if it changed. */
2833 	if (irq != cfg->intline) {
2834 		cfg->intline = irq;
2835 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2836 	}
2837 
2838 	/* Add this IRQ as rid 0 interrupt resource. */
2839 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2840 }
2841 
2842 /* Perform early OHCI takeover from SMM. */
2843 static void
2844 ohci_early_takeover(device_t self)
2845 {
2846 	struct resource *res;
2847 	uint32_t ctl;
2848 	int rid;
2849 	int i;
2850 
2851 	rid = PCIR_BAR(0);
2852 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2853 	if (res == NULL)
2854 		return;
2855 
2856 	ctl = bus_read_4(res, OHCI_CONTROL);
2857 	if (ctl & OHCI_IR) {
2858 		if (bootverbose)
2859 			printf("ohci early: "
2860 			    "SMM active, request owner change\n");
2861 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2862 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2863 			DELAY(1000);
2864 			ctl = bus_read_4(res, OHCI_CONTROL);
2865 		}
2866 		if (ctl & OHCI_IR) {
2867 			if (bootverbose)
2868 				printf("ohci early: "
2869 				    "SMM does not respond, resetting\n");
2870 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2871 		}
2872 		/* Disable interrupts */
2873 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2874 	}
2875 
2876 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2877 }
2878 
2879 /* Perform early UHCI takeover from SMM. */
2880 static void
2881 uhci_early_takeover(device_t self)
2882 {
2883 	struct resource *res;
2884 	int rid;
2885 
2886 	/*
2887 	 * Set the PIRQD enable bit and switch off all the others. We don't
2888 	 * want legacy support to interfere with us XXX Does this also mean
2889 	 * that the BIOS won't touch the keyboard anymore if it is connected
2890 	 * to the ports of the root hub?
2891 	 */
2892 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2893 
2894 	/* Disable interrupts */
2895 	rid = PCI_UHCI_BASE_REG;
2896 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2897 	if (res != NULL) {
2898 		bus_write_2(res, UHCI_INTR, 0);
2899 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2900 	}
2901 }
2902 
2903 /* Perform early EHCI takeover from SMM. */
2904 static void
2905 ehci_early_takeover(device_t self)
2906 {
2907 	struct resource *res;
2908 	uint32_t cparams;
2909 	uint32_t eec;
2910 	uint8_t eecp;
2911 	uint8_t bios_sem;
2912 	uint8_t offs;
2913 	int rid;
2914 	int i;
2915 
2916 	rid = PCIR_BAR(0);
2917 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2918 	if (res == NULL)
2919 		return;
2920 
2921 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2922 
2923 	/* Synchronise with the BIOS if it owns the controller. */
2924 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2925 	    eecp = EHCI_EECP_NEXT(eec)) {
2926 		eec = pci_read_config(self, eecp, 4);
2927 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2928 			continue;
2929 		}
2930 		bios_sem = pci_read_config(self, eecp +
2931 		    EHCI_LEGSUP_BIOS_SEM, 1);
2932 		if (bios_sem == 0) {
2933 			continue;
2934 		}
2935 		if (bootverbose)
2936 			printf("ehci early: "
2937 			    "SMM active, request owner change\n");
2938 
2939 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2940 
2941 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2942 			DELAY(1000);
2943 			bios_sem = pci_read_config(self, eecp +
2944 			    EHCI_LEGSUP_BIOS_SEM, 1);
2945 		}
2946 
2947 		if (bios_sem != 0) {
2948 			if (bootverbose)
2949 				printf("ehci early: "
2950 				    "SMM does not respond\n");
2951 		}
2952 		/* Disable interrupts */
2953 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2954 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2955 	}
2956 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2957 }
2958 
2959 void
2960 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2961 {
2962 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2963 	pcicfgregs *cfg = &dinfo->cfg;
2964 	struct resource_list *rl = &dinfo->resources;
2965 	struct pci_quirk *q;
2966 	int i;
2967 
2968 	/* ATA devices needs special map treatment */
2969 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2970 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2971 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2972 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2973 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2974 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2975 	else
2976 		for (i = 0; i < cfg->nummaps;)
2977 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2978 			    prefetchmask & (1 << i));
2979 
2980 	/*
2981 	 * Add additional, quirked resources.
2982 	 */
2983 	for (q = &pci_quirks[0]; q->devid; q++) {
2984 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2985 		    && q->type == PCI_QUIRK_MAP_REG)
2986 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2987 	}
2988 
2989 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2990 #ifdef __PCI_REROUTE_INTERRUPT
2991 		/*
2992 		 * Try to re-route interrupts. Sometimes the BIOS or
2993 		 * firmware may leave bogus values in these registers.
2994 		 * If the re-route fails, then just stick with what we
2995 		 * have.
2996 		 */
2997 		pci_assign_interrupt(bus, dev, 1);
2998 #else
2999 		pci_assign_interrupt(bus, dev, 0);
3000 #endif
3001 	}
3002 
3003 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3004 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3005 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3006 			ehci_early_takeover(dev);
3007 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3008 			ohci_early_takeover(dev);
3009 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3010 			uhci_early_takeover(dev);
3011 	}
3012 }
3013 
3014 void
3015 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3016 {
3017 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3018 	device_t pcib = device_get_parent(dev);
3019 	struct pci_devinfo *dinfo;
3020 	int maxslots;
3021 	int s, f, pcifunchigh;
3022 	uint8_t hdrtype;
3023 
3024 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3025 	    ("dinfo_size too small"));
3026 	maxslots = PCIB_MAXSLOTS(pcib);
3027 	for (s = 0; s <= maxslots; s++) {
3028 		pcifunchigh = 0;
3029 		f = 0;
3030 		DELAY(1);
3031 		hdrtype = REG(PCIR_HDRTYPE, 1);
3032 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3033 			continue;
3034 		if (hdrtype & PCIM_MFDEV)
3035 			pcifunchigh = PCI_FUNCMAX;
3036 		for (f = 0; f <= pcifunchigh; f++) {
3037 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3038 			    dinfo_size);
3039 			if (dinfo != NULL) {
3040 				pci_add_child(dev, dinfo);
3041 			}
3042 		}
3043 	}
3044 #undef REG
3045 }
3046 
3047 void
3048 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3049 {
3050 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3051 	device_set_ivars(dinfo->cfg.dev, dinfo);
3052 	resource_list_init(&dinfo->resources);
3053 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3054 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3055 	pci_print_verbose(dinfo);
3056 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3057 }
3058 
3059 static int
3060 pci_probe(device_t dev)
3061 {
3062 
3063 	device_set_desc(dev, "PCI bus");
3064 
3065 	/* Allow other subclasses to override this driver. */
3066 	return (BUS_PROBE_GENERIC);
3067 }
3068 
3069 static int
3070 pci_attach(device_t dev)
3071 {
3072 	int busno, domain;
3073 
3074 	/*
3075 	 * Since there can be multiple independantly numbered PCI
3076 	 * busses on systems with multiple PCI domains, we can't use
3077 	 * the unit number to decide which bus we are probing. We ask
3078 	 * the parent pcib what our domain and bus numbers are.
3079 	 */
3080 	domain = pcib_get_domain(dev);
3081 	busno = pcib_get_bus(dev);
3082 	if (bootverbose)
3083 		device_printf(dev, "domain=%d, physical bus=%d\n",
3084 		    domain, busno);
3085 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3086 	return (bus_generic_attach(dev));
3087 }
3088 
3089 static void
3090 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3091     int state)
3092 {
3093 	device_t child, pcib;
3094 	struct pci_devinfo *dinfo;
3095 	int dstate, i;
3096 
3097 	/*
3098 	 * Set the device to the given state.  If the firmware suggests
3099 	 * a different power state, use it instead.  If power management
3100 	 * is not present, the firmware is responsible for managing
3101 	 * device power.  Skip children who aren't attached since they
3102 	 * are handled separately.
3103 	 */
3104 	pcib = device_get_parent(dev);
3105 	for (i = 0; i < numdevs; i++) {
3106 		child = devlist[i];
3107 		dinfo = device_get_ivars(child);
3108 		dstate = state;
3109 		if (device_is_attached(child) &&
3110 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3111 			pci_set_powerstate(child, dstate);
3112 	}
3113 }
3114 
3115 int
3116 pci_suspend(device_t dev)
3117 {
3118 	device_t child, *devlist;
3119 	struct pci_devinfo *dinfo;
3120 	int error, i, numdevs;
3121 
3122 	/*
3123 	 * Save the PCI configuration space for each child and set the
3124 	 * device in the appropriate power state for this sleep state.
3125 	 */
3126 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3127 		return (error);
3128 	for (i = 0; i < numdevs; i++) {
3129 		child = devlist[i];
3130 		dinfo = device_get_ivars(child);
3131 		pci_cfg_save(child, dinfo, 0);
3132 	}
3133 
3134 	/* Suspend devices before potentially powering them down. */
3135 	error = bus_generic_suspend(dev);
3136 	if (error) {
3137 		free(devlist, M_TEMP);
3138 		return (error);
3139 	}
3140 	if (pci_do_power_suspend)
3141 		pci_set_power_children(dev, devlist, numdevs,
3142 		    PCI_POWERSTATE_D3);
3143 	free(devlist, M_TEMP);
3144 	return (0);
3145 }
3146 
3147 int
3148 pci_resume(device_t dev)
3149 {
3150 	device_t child, *devlist;
3151 	struct pci_devinfo *dinfo;
3152 	int error, i, numdevs;
3153 
3154 	/*
3155 	 * Set each child to D0 and restore its PCI configuration space.
3156 	 */
3157 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3158 		return (error);
3159 	if (pci_do_power_resume)
3160 		pci_set_power_children(dev, devlist, numdevs,
3161 		    PCI_POWERSTATE_D0);
3162 
3163 	/* Now the device is powered up, restore its config space. */
3164 	for (i = 0; i < numdevs; i++) {
3165 		child = devlist[i];
3166 		dinfo = device_get_ivars(child);
3167 
3168 		pci_cfg_restore(child, dinfo);
3169 		if (!device_is_attached(child))
3170 			pci_cfg_save(child, dinfo, 1);
3171 	}
3172 
3173 	/*
3174 	 * Resume critical devices first, then everything else later.
3175 	 */
3176 	for (i = 0; i < numdevs; i++) {
3177 		child = devlist[i];
3178 		switch (pci_get_class(child)) {
3179 		case PCIC_DISPLAY:
3180 		case PCIC_MEMORY:
3181 		case PCIC_BRIDGE:
3182 		case PCIC_BASEPERIPH:
3183 			DEVICE_RESUME(child);
3184 			break;
3185 		}
3186 	}
3187 	for (i = 0; i < numdevs; i++) {
3188 		child = devlist[i];
3189 		switch (pci_get_class(child)) {
3190 		case PCIC_DISPLAY:
3191 		case PCIC_MEMORY:
3192 		case PCIC_BRIDGE:
3193 		case PCIC_BASEPERIPH:
3194 			break;
3195 		default:
3196 			DEVICE_RESUME(child);
3197 		}
3198 	}
3199 	free(devlist, M_TEMP);
3200 	return (0);
3201 }
3202 
3203 static void
3204 pci_load_vendor_data(void)
3205 {
3206 	caddr_t data;
3207 	void *ptr;
3208 	size_t sz;
3209 
3210 	data = preload_search_by_type("pci_vendor_data");
3211 	if (data != NULL) {
3212 		ptr = preload_fetch_addr(data);
3213 		sz = preload_fetch_size(data);
3214 		if (ptr != NULL && sz != 0) {
3215 			pci_vendordata = ptr;
3216 			pci_vendordata_size = sz;
3217 			/* terminate the database */
3218 			pci_vendordata[pci_vendordata_size] = '\n';
3219 		}
3220 	}
3221 }
3222 
3223 void
3224 pci_driver_added(device_t dev, driver_t *driver)
3225 {
3226 	int numdevs;
3227 	device_t *devlist;
3228 	device_t child;
3229 	struct pci_devinfo *dinfo;
3230 	int i;
3231 
3232 	if (bootverbose)
3233 		device_printf(dev, "driver added\n");
3234 	DEVICE_IDENTIFY(driver, dev);
3235 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3236 		return;
3237 	for (i = 0; i < numdevs; i++) {
3238 		child = devlist[i];
3239 		if (device_get_state(child) != DS_NOTPRESENT)
3240 			continue;
3241 		dinfo = device_get_ivars(child);
3242 		pci_print_verbose(dinfo);
3243 		if (bootverbose)
3244 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3245 		pci_cfg_restore(child, dinfo);
3246 		if (device_probe_and_attach(child) != 0)
3247 			pci_cfg_save(child, dinfo, 1);
3248 	}
3249 	free(devlist, M_TEMP);
3250 }
3251 
3252 int
3253 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3254     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3255 {
3256 	struct pci_devinfo *dinfo;
3257 	struct msix_table_entry *mte;
3258 	struct msix_vector *mv;
3259 	uint64_t addr;
3260 	uint32_t data;
3261 	void *cookie;
3262 	int error, rid;
3263 
3264 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3265 	    arg, &cookie);
3266 	if (error)
3267 		return (error);
3268 
3269 	/* If this is not a direct child, just bail out. */
3270 	if (device_get_parent(child) != dev) {
3271 		*cookiep = cookie;
3272 		return(0);
3273 	}
3274 
3275 	rid = rman_get_rid(irq);
3276 	if (rid == 0) {
3277 		/* Make sure that INTx is enabled */
3278 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3279 	} else {
3280 		/*
3281 		 * Check to see if the interrupt is MSI or MSI-X.
3282 		 * Ask our parent to map the MSI and give
3283 		 * us the address and data register values.
3284 		 * If we fail for some reason, teardown the
3285 		 * interrupt handler.
3286 		 */
3287 		dinfo = device_get_ivars(child);
3288 		if (dinfo->cfg.msi.msi_alloc > 0) {
3289 			if (dinfo->cfg.msi.msi_addr == 0) {
3290 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3291 			    ("MSI has handlers, but vectors not mapped"));
3292 				error = PCIB_MAP_MSI(device_get_parent(dev),
3293 				    child, rman_get_start(irq), &addr, &data);
3294 				if (error)
3295 					goto bad;
3296 				dinfo->cfg.msi.msi_addr = addr;
3297 				dinfo->cfg.msi.msi_data = data;
3298 			}
3299 			if (dinfo->cfg.msi.msi_handlers == 0)
3300 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3301 				    dinfo->cfg.msi.msi_data);
3302 			dinfo->cfg.msi.msi_handlers++;
3303 		} else {
3304 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3305 			    ("No MSI or MSI-X interrupts allocated"));
3306 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3307 			    ("MSI-X index too high"));
3308 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3309 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3310 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3311 			KASSERT(mv->mv_irq == rman_get_start(irq),
3312 			    ("IRQ mismatch"));
3313 			if (mv->mv_address == 0) {
3314 				KASSERT(mte->mte_handlers == 0,
3315 		    ("MSI-X table entry has handlers, but vector not mapped"));
3316 				error = PCIB_MAP_MSI(device_get_parent(dev),
3317 				    child, rman_get_start(irq), &addr, &data);
3318 				if (error)
3319 					goto bad;
3320 				mv->mv_address = addr;
3321 				mv->mv_data = data;
3322 			}
3323 			if (mte->mte_handlers == 0) {
3324 				pci_enable_msix(child, rid - 1, mv->mv_address,
3325 				    mv->mv_data);
3326 				pci_unmask_msix(child, rid - 1);
3327 			}
3328 			mte->mte_handlers++;
3329 		}
3330 
3331 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3332 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3333 	bad:
3334 		if (error) {
3335 			(void)bus_generic_teardown_intr(dev, child, irq,
3336 			    cookie);
3337 			return (error);
3338 		}
3339 	}
3340 	*cookiep = cookie;
3341 	return (0);
3342 }
3343 
3344 int
3345 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3346     void *cookie)
3347 {
3348 	struct msix_table_entry *mte;
3349 	struct resource_list_entry *rle;
3350 	struct pci_devinfo *dinfo;
3351 	int error, rid;
3352 
3353 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3354 		return (EINVAL);
3355 
3356 	/* If this isn't a direct child, just bail out */
3357 	if (device_get_parent(child) != dev)
3358 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3359 
3360 	rid = rman_get_rid(irq);
3361 	if (rid == 0) {
3362 		/* Mask INTx */
3363 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3364 	} else {
3365 		/*
3366 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3367 		 * decrement the appropriate handlers count and mask the
3368 		 * MSI-X message, or disable MSI messages if the count
3369 		 * drops to 0.
3370 		 */
3371 		dinfo = device_get_ivars(child);
3372 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3373 		if (rle->res != irq)
3374 			return (EINVAL);
3375 		if (dinfo->cfg.msi.msi_alloc > 0) {
3376 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3377 			    ("MSI-X index too high"));
3378 			if (dinfo->cfg.msi.msi_handlers == 0)
3379 				return (EINVAL);
3380 			dinfo->cfg.msi.msi_handlers--;
3381 			if (dinfo->cfg.msi.msi_handlers == 0)
3382 				pci_disable_msi(child);
3383 		} else {
3384 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3385 			    ("No MSI or MSI-X interrupts allocated"));
3386 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3387 			    ("MSI-X index too high"));
3388 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3389 			if (mte->mte_handlers == 0)
3390 				return (EINVAL);
3391 			mte->mte_handlers--;
3392 			if (mte->mte_handlers == 0)
3393 				pci_mask_msix(child, rid - 1);
3394 		}
3395 	}
3396 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3397 	if (rid > 0)
3398 		KASSERT(error == 0,
3399 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3400 	return (error);
3401 }
3402 
3403 int
3404 pci_print_child(device_t dev, device_t child)
3405 {
3406 	struct pci_devinfo *dinfo;
3407 	struct resource_list *rl;
3408 	int retval = 0;
3409 
3410 	dinfo = device_get_ivars(child);
3411 	rl = &dinfo->resources;
3412 
3413 	retval += bus_print_child_header(dev, child);
3414 
3415 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3416 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3417 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3418 	if (device_get_flags(dev))
3419 		retval += printf(" flags %#x", device_get_flags(dev));
3420 
3421 	retval += printf(" at device %d.%d", pci_get_slot(child),
3422 	    pci_get_function(child));
3423 
3424 	retval += bus_print_child_footer(dev, child);
3425 
3426 	return (retval);
3427 }
3428 
3429 static struct
3430 {
3431 	int	class;
3432 	int	subclass;
3433 	char	*desc;
3434 } pci_nomatch_tab[] = {
3435 	{PCIC_OLD,		-1,			"old"},
3436 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3437 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3438 	{PCIC_STORAGE,		-1,			"mass storage"},
3439 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3440 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3441 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3442 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3443 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3444 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3445 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3446 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3447 	{PCIC_NETWORK,		-1,			"network"},
3448 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3449 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3450 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3451 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3452 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3453 	{PCIC_DISPLAY,		-1,			"display"},
3454 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3455 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3456 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3457 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3458 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3459 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3460 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3461 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3462 	{PCIC_MEMORY,		-1,			"memory"},
3463 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3464 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3465 	{PCIC_BRIDGE,		-1,			"bridge"},
3466 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3467 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3468 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3469 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3470 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3471 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3472 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3473 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3474 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3475 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3476 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3477 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3478 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3479 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3480 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3481 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3482 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3483 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3484 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3485 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3486 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3487 	{PCIC_INPUTDEV,		-1,			"input device"},
3488 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3489 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3490 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3491 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3492 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3493 	{PCIC_DOCKING,		-1,			"docking station"},
3494 	{PCIC_PROCESSOR,	-1,			"processor"},
3495 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3496 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3497 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3498 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3499 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3500 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3501 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3502 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3503 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3504 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3505 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3506 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3507 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3508 	{PCIC_SATCOM,		-1,			"satellite communication"},
3509 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3510 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3511 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3512 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3513 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3514 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3515 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3516 	{PCIC_DASP,		-1,			"dasp"},
3517 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3518 	{0, 0,		NULL}
3519 };
3520 
3521 void
3522 pci_probe_nomatch(device_t dev, device_t child)
3523 {
3524 	int	i;
3525 	char	*cp, *scp, *device;
3526 
3527 	/*
3528 	 * Look for a listing for this device in a loaded device database.
3529 	 */
3530 	if ((device = pci_describe_device(child)) != NULL) {
3531 		device_printf(dev, "<%s>", device);
3532 		free(device, M_DEVBUF);
3533 	} else {
3534 		/*
3535 		 * Scan the class/subclass descriptions for a general
3536 		 * description.
3537 		 */
3538 		cp = "unknown";
3539 		scp = NULL;
3540 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3541 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3542 				if (pci_nomatch_tab[i].subclass == -1) {
3543 					cp = pci_nomatch_tab[i].desc;
3544 				} else if (pci_nomatch_tab[i].subclass ==
3545 				    pci_get_subclass(child)) {
3546 					scp = pci_nomatch_tab[i].desc;
3547 				}
3548 			}
3549 		}
3550 		device_printf(dev, "<%s%s%s>",
3551 		    cp ? cp : "",
3552 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3553 		    scp ? scp : "");
3554 	}
3555 	printf(" at device %d.%d (no driver attached)\n",
3556 	    pci_get_slot(child), pci_get_function(child));
3557 	pci_cfg_save(child, device_get_ivars(child), 1);
3558 	return;
3559 }
3560 
3561 /*
3562  * Parse the PCI device database, if loaded, and return a pointer to a
3563  * description of the device.
3564  *
3565  * The database is flat text formatted as follows:
3566  *
3567  * Any line not in a valid format is ignored.
3568  * Lines are terminated with newline '\n' characters.
3569  *
3570  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3571  * the vendor name.
3572  *
3573  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3574  * - devices cannot be listed without a corresponding VENDOR line.
3575  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3576  * another TAB, then the device name.
3577  */
3578 
3579 /*
3580  * Assuming (ptr) points to the beginning of a line in the database,
3581  * return the vendor or device and description of the next entry.
3582  * The value of (vendor) or (device) inappropriate for the entry type
3583  * is set to -1.  Returns nonzero at the end of the database.
3584  *
3585  * Note that this is slightly unrobust in the face of corrupt data;
3586  * we attempt to safeguard against this by spamming the end of the
3587  * database with a newline when we initialise.
3588  */
3589 static int
3590 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3591 {
3592 	char	*cp = *ptr;
3593 	int	left;
3594 
3595 	*device = -1;
3596 	*vendor = -1;
3597 	**desc = '\0';
3598 	for (;;) {
3599 		left = pci_vendordata_size - (cp - pci_vendordata);
3600 		if (left <= 0) {
3601 			*ptr = cp;
3602 			return(1);
3603 		}
3604 
3605 		/* vendor entry? */
3606 		if (*cp != '\t' &&
3607 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3608 			break;
3609 		/* device entry? */
3610 		if (*cp == '\t' &&
3611 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3612 			break;
3613 
3614 		/* skip to next line */
3615 		while (*cp != '\n' && left > 0) {
3616 			cp++;
3617 			left--;
3618 		}
3619 		if (*cp == '\n') {
3620 			cp++;
3621 			left--;
3622 		}
3623 	}
3624 	/* skip to next line */
3625 	while (*cp != '\n' && left > 0) {
3626 		cp++;
3627 		left--;
3628 	}
3629 	if (*cp == '\n' && left > 0)
3630 		cp++;
3631 	*ptr = cp;
3632 	return(0);
3633 }
3634 
3635 static char *
3636 pci_describe_device(device_t dev)
3637 {
3638 	int	vendor, device;
3639 	char	*desc, *vp, *dp, *line;
3640 
3641 	desc = vp = dp = NULL;
3642 
3643 	/*
3644 	 * If we have no vendor data, we can't do anything.
3645 	 */
3646 	if (pci_vendordata == NULL)
3647 		goto out;
3648 
3649 	/*
3650 	 * Scan the vendor data looking for this device
3651 	 */
3652 	line = pci_vendordata;
3653 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3654 		goto out;
3655 	for (;;) {
3656 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3657 			goto out;
3658 		if (vendor == pci_get_vendor(dev))
3659 			break;
3660 	}
3661 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3662 		goto out;
3663 	for (;;) {
3664 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3665 			*dp = 0;
3666 			break;
3667 		}
3668 		if (vendor != -1) {
3669 			*dp = 0;
3670 			break;
3671 		}
3672 		if (device == pci_get_device(dev))
3673 			break;
3674 	}
3675 	if (dp[0] == '\0')
3676 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3677 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3678 	    NULL)
3679 		sprintf(desc, "%s, %s", vp, dp);
3680  out:
3681 	if (vp != NULL)
3682 		free(vp, M_DEVBUF);
3683 	if (dp != NULL)
3684 		free(dp, M_DEVBUF);
3685 	return(desc);
3686 }
3687 
3688 int
3689 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3690 {
3691 	struct pci_devinfo *dinfo;
3692 	pcicfgregs *cfg;
3693 
3694 	dinfo = device_get_ivars(child);
3695 	cfg = &dinfo->cfg;
3696 
3697 	switch (which) {
3698 	case PCI_IVAR_ETHADDR:
3699 		/*
3700 		 * The generic accessor doesn't deal with failure, so
3701 		 * we set the return value, then return an error.
3702 		 */
3703 		*((uint8_t **) result) = NULL;
3704 		return (EINVAL);
3705 	case PCI_IVAR_SUBVENDOR:
3706 		*result = cfg->subvendor;
3707 		break;
3708 	case PCI_IVAR_SUBDEVICE:
3709 		*result = cfg->subdevice;
3710 		break;
3711 	case PCI_IVAR_VENDOR:
3712 		*result = cfg->vendor;
3713 		break;
3714 	case PCI_IVAR_DEVICE:
3715 		*result = cfg->device;
3716 		break;
3717 	case PCI_IVAR_DEVID:
3718 		*result = (cfg->device << 16) | cfg->vendor;
3719 		break;
3720 	case PCI_IVAR_CLASS:
3721 		*result = cfg->baseclass;
3722 		break;
3723 	case PCI_IVAR_SUBCLASS:
3724 		*result = cfg->subclass;
3725 		break;
3726 	case PCI_IVAR_PROGIF:
3727 		*result = cfg->progif;
3728 		break;
3729 	case PCI_IVAR_REVID:
3730 		*result = cfg->revid;
3731 		break;
3732 	case PCI_IVAR_INTPIN:
3733 		*result = cfg->intpin;
3734 		break;
3735 	case PCI_IVAR_IRQ:
3736 		*result = cfg->intline;
3737 		break;
3738 	case PCI_IVAR_DOMAIN:
3739 		*result = cfg->domain;
3740 		break;
3741 	case PCI_IVAR_BUS:
3742 		*result = cfg->bus;
3743 		break;
3744 	case PCI_IVAR_SLOT:
3745 		*result = cfg->slot;
3746 		break;
3747 	case PCI_IVAR_FUNCTION:
3748 		*result = cfg->func;
3749 		break;
3750 	case PCI_IVAR_CMDREG:
3751 		*result = cfg->cmdreg;
3752 		break;
3753 	case PCI_IVAR_CACHELNSZ:
3754 		*result = cfg->cachelnsz;
3755 		break;
3756 	case PCI_IVAR_MINGNT:
3757 		*result = cfg->mingnt;
3758 		break;
3759 	case PCI_IVAR_MAXLAT:
3760 		*result = cfg->maxlat;
3761 		break;
3762 	case PCI_IVAR_LATTIMER:
3763 		*result = cfg->lattimer;
3764 		break;
3765 	default:
3766 		return (ENOENT);
3767 	}
3768 	return (0);
3769 }
3770 
3771 int
3772 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3773 {
3774 	struct pci_devinfo *dinfo;
3775 
3776 	dinfo = device_get_ivars(child);
3777 
3778 	switch (which) {
3779 	case PCI_IVAR_INTPIN:
3780 		dinfo->cfg.intpin = value;
3781 		return (0);
3782 	case PCI_IVAR_ETHADDR:
3783 	case PCI_IVAR_SUBVENDOR:
3784 	case PCI_IVAR_SUBDEVICE:
3785 	case PCI_IVAR_VENDOR:
3786 	case PCI_IVAR_DEVICE:
3787 	case PCI_IVAR_DEVID:
3788 	case PCI_IVAR_CLASS:
3789 	case PCI_IVAR_SUBCLASS:
3790 	case PCI_IVAR_PROGIF:
3791 	case PCI_IVAR_REVID:
3792 	case PCI_IVAR_IRQ:
3793 	case PCI_IVAR_DOMAIN:
3794 	case PCI_IVAR_BUS:
3795 	case PCI_IVAR_SLOT:
3796 	case PCI_IVAR_FUNCTION:
3797 		return (EINVAL);	/* disallow for now */
3798 
3799 	default:
3800 		return (ENOENT);
3801 	}
3802 }
3803 
3804 
3805 #include "opt_ddb.h"
3806 #ifdef DDB
3807 #include <ddb/ddb.h>
3808 #include <sys/cons.h>
3809 
3810 /*
3811  * List resources based on pci map registers, used for within ddb
3812  */
3813 
3814 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3815 {
3816 	struct pci_devinfo *dinfo;
3817 	struct devlist *devlist_head;
3818 	struct pci_conf *p;
3819 	const char *name;
3820 	int i, error, none_count;
3821 
3822 	none_count = 0;
3823 	/* get the head of the device queue */
3824 	devlist_head = &pci_devq;
3825 
3826 	/*
3827 	 * Go through the list of devices and print out devices
3828 	 */
3829 	for (error = 0, i = 0,
3830 	     dinfo = STAILQ_FIRST(devlist_head);
3831 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3832 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3833 
3834 		/* Populate pd_name and pd_unit */
3835 		name = NULL;
3836 		if (dinfo->cfg.dev)
3837 			name = device_get_name(dinfo->cfg.dev);
3838 
3839 		p = &dinfo->conf;
3840 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3841 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3842 			(name && *name) ? name : "none",
3843 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3844 			none_count++,
3845 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3846 			p->pc_sel.pc_func, (p->pc_class << 16) |
3847 			(p->pc_subclass << 8) | p->pc_progif,
3848 			(p->pc_subdevice << 16) | p->pc_subvendor,
3849 			(p->pc_device << 16) | p->pc_vendor,
3850 			p->pc_revid, p->pc_hdr);
3851 	}
3852 }
3853 #endif /* DDB */
3854 
3855 static struct resource *
3856 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3857     u_long start, u_long end, u_long count, u_int flags)
3858 {
3859 	struct pci_devinfo *dinfo = device_get_ivars(child);
3860 	struct resource_list *rl = &dinfo->resources;
3861 	struct resource_list_entry *rle;
3862 	struct resource *res;
3863 	struct pci_map *pm;
3864 	pci_addr_t map, testval;
3865 	int mapsize;
3866 
3867 	res = NULL;
3868 	pm = pci_find_bar(child, *rid);
3869 	if (pm != NULL) {
3870 		/* This is a BAR that we failed to allocate earlier. */
3871 		mapsize = pm->pm_size;
3872 		map = pm->pm_value;
3873 	} else {
3874 		/*
3875 		 * Weed out the bogons, and figure out how large the
3876 		 * BAR/map is.  BARs that read back 0 here are bogus
3877 		 * and unimplemented.  Note: atapci in legacy mode are
3878 		 * special and handled elsewhere in the code.  If you
3879 		 * have a atapci device in legacy mode and it fails
3880 		 * here, that other code is broken.
3881 		 */
3882 		pci_read_bar(child, *rid, &map, &testval);
3883 
3884 		/*
3885 		 * Determine the size of the BAR and ignore BARs with a size
3886 		 * of 0.  Device ROM BARs use a different mask value.
3887 		 */
3888 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3889 			mapsize = pci_romsize(testval);
3890 		else
3891 			mapsize = pci_mapsize(testval);
3892 		if (mapsize == 0)
3893 			goto out;
3894 		pm = pci_add_bar(child, *rid, map, mapsize);
3895 	}
3896 
3897 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3898 		if (type != SYS_RES_MEMORY) {
3899 			if (bootverbose)
3900 				device_printf(dev,
3901 				    "child %s requested type %d for rid %#x,"
3902 				    " but the BAR says it is an memio\n",
3903 				    device_get_nameunit(child), type, *rid);
3904 			goto out;
3905 		}
3906 	} else {
3907 		if (type != SYS_RES_IOPORT) {
3908 			if (bootverbose)
3909 				device_printf(dev,
3910 				    "child %s requested type %d for rid %#x,"
3911 				    " but the BAR says it is an ioport\n",
3912 				    device_get_nameunit(child), type, *rid);
3913 			goto out;
3914 		}
3915 	}
3916 
3917 	/*
3918 	 * For real BARs, we need to override the size that
3919 	 * the driver requests, because that's what the BAR
3920 	 * actually uses and we would otherwise have a
3921 	 * situation where we might allocate the excess to
3922 	 * another driver, which won't work.
3923 	 */
3924 	count = (pci_addr_t)1 << mapsize;
3925 	if (RF_ALIGNMENT(flags) < mapsize)
3926 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3927 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3928 		flags |= RF_PREFETCHABLE;
3929 
3930 	/*
3931 	 * Allocate enough resource, and then write back the
3932 	 * appropriate BAR for that resource.
3933 	 */
3934 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3935 	    start, end, count, flags & ~RF_ACTIVE);
3936 	if (res == NULL) {
3937 		device_printf(child,
3938 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3939 		    count, *rid, type, start, end);
3940 		goto out;
3941 	}
3942 	resource_list_add(rl, type, *rid, start, end, count);
3943 	rle = resource_list_find(rl, type, *rid);
3944 	if (rle == NULL)
3945 		panic("pci_reserve_map: unexpectedly can't find resource.");
3946 	rle->res = res;
3947 	rle->start = rman_get_start(res);
3948 	rle->end = rman_get_end(res);
3949 	rle->count = count;
3950 	rle->flags = RLE_RESERVED;
3951 	if (bootverbose)
3952 		device_printf(child,
3953 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3954 		    count, *rid, type, rman_get_start(res));
3955 	map = rman_get_start(res);
3956 	pci_write_bar(child, pm, map);
3957 out:;
3958 	return (res);
3959 }
3960 
3961 
3962 struct resource *
3963 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3964 		   u_long start, u_long end, u_long count, u_int flags)
3965 {
3966 	struct pci_devinfo *dinfo = device_get_ivars(child);
3967 	struct resource_list *rl = &dinfo->resources;
3968 	struct resource_list_entry *rle;
3969 	struct resource *res;
3970 	pcicfgregs *cfg = &dinfo->cfg;
3971 
3972 	if (device_get_parent(child) != dev)
3973 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3974 		    type, rid, start, end, count, flags));
3975 
3976 	/*
3977 	 * Perform lazy resource allocation
3978 	 */
3979 	switch (type) {
3980 	case SYS_RES_IRQ:
3981 		/*
3982 		 * Can't alloc legacy interrupt once MSI messages have
3983 		 * been allocated.
3984 		 */
3985 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3986 		    cfg->msix.msix_alloc > 0))
3987 			return (NULL);
3988 
3989 		/*
3990 		 * If the child device doesn't have an interrupt
3991 		 * routed and is deserving of an interrupt, try to
3992 		 * assign it one.
3993 		 */
3994 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3995 		    (cfg->intpin != 0))
3996 			pci_assign_interrupt(dev, child, 0);
3997 		break;
3998 	case SYS_RES_IOPORT:
3999 	case SYS_RES_MEMORY:
4000 #ifdef NEW_PCIB
4001 		/*
4002 		 * PCI-PCI bridge I/O window resources are not BARs.
4003 		 * For those allocations just pass the request up the
4004 		 * tree.
4005 		 */
4006 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4007 			switch (*rid) {
4008 			case PCIR_IOBASEL_1:
4009 			case PCIR_MEMBASE_1:
4010 			case PCIR_PMBASEL_1:
4011 				/*
4012 				 * XXX: Should we bother creating a resource
4013 				 * list entry?
4014 				 */
4015 				return (bus_generic_alloc_resource(dev, child,
4016 				    type, rid, start, end, count, flags));
4017 			}
4018 		}
4019 #endif
4020 		/* Reserve resources for this BAR if needed. */
4021 		rle = resource_list_find(rl, type, *rid);
4022 		if (rle == NULL) {
4023 			res = pci_reserve_map(dev, child, type, rid, start, end,
4024 			    count, flags);
4025 			if (res == NULL)
4026 				return (NULL);
4027 		}
4028 	}
4029 	return (resource_list_alloc(rl, dev, child, type, rid,
4030 	    start, end, count, flags));
4031 }
4032 
4033 int
4034 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4035     struct resource *r)
4036 {
4037 	struct pci_devinfo *dinfo;
4038 	int error;
4039 
4040 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4041 	if (error)
4042 		return (error);
4043 
4044 	/* Enable decoding in the command register when activating BARs. */
4045 	if (device_get_parent(child) == dev) {
4046 		/* Device ROMs need their decoding explicitly enabled. */
4047 		dinfo = device_get_ivars(child);
4048 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4049 			pci_write_bar(child, pci_find_bar(child, rid),
4050 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4051 		switch (type) {
4052 		case SYS_RES_IOPORT:
4053 		case SYS_RES_MEMORY:
4054 			error = PCI_ENABLE_IO(dev, child, type);
4055 			break;
4056 		}
4057 	}
4058 	return (error);
4059 }
4060 
4061 int
4062 pci_deactivate_resource(device_t dev, device_t child, int type,
4063     int rid, struct resource *r)
4064 {
4065 	struct pci_devinfo *dinfo;
4066 	int error;
4067 
4068 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4069 	if (error)
4070 		return (error);
4071 
4072 	/* Disable decoding for device ROMs. */
4073 	if (device_get_parent(child) == dev) {
4074 		dinfo = device_get_ivars(child);
4075 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4076 			pci_write_bar(child, pci_find_bar(child, rid),
4077 			    rman_get_start(r));
4078 	}
4079 	return (0);
4080 }
4081 
4082 void
4083 pci_delete_child(device_t dev, device_t child)
4084 {
4085 	struct resource_list_entry *rle;
4086 	struct resource_list *rl;
4087 	struct pci_devinfo *dinfo;
4088 
4089 	dinfo = device_get_ivars(child);
4090 	rl = &dinfo->resources;
4091 
4092 	if (device_is_attached(child))
4093 		device_detach(child);
4094 
4095 	/* Turn off access to resources we're about to free */
4096 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4097 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4098 
4099 	/* Free all allocated resources */
4100 	STAILQ_FOREACH(rle, rl, link) {
4101 		if (rle->res) {
4102 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4103 			    resource_list_busy(rl, rle->type, rle->rid)) {
4104 				pci_printf(&dinfo->cfg,
4105 				    "Resource still owned, oops. "
4106 				    "(type=%d, rid=%d, addr=%lx)\n",
4107 				    rle->type, rle->rid,
4108 				    rman_get_start(rle->res));
4109 				bus_release_resource(child, rle->type, rle->rid,
4110 				    rle->res);
4111 			}
4112 			resource_list_unreserve(rl, dev, child, rle->type,
4113 			    rle->rid);
4114 		}
4115 	}
4116 	resource_list_free(rl);
4117 
4118 	device_delete_child(dev, child);
4119 	pci_freecfg(dinfo);
4120 }
4121 
4122 void
4123 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4124 {
4125 	struct pci_devinfo *dinfo;
4126 	struct resource_list *rl;
4127 	struct resource_list_entry *rle;
4128 
4129 	if (device_get_parent(child) != dev)
4130 		return;
4131 
4132 	dinfo = device_get_ivars(child);
4133 	rl = &dinfo->resources;
4134 	rle = resource_list_find(rl, type, rid);
4135 	if (rle == NULL)
4136 		return;
4137 
4138 	if (rle->res) {
4139 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4140 		    resource_list_busy(rl, type, rid)) {
4141 			device_printf(dev, "delete_resource: "
4142 			    "Resource still owned by child, oops. "
4143 			    "(type=%d, rid=%d, addr=%lx)\n",
4144 			    type, rid, rman_get_start(rle->res));
4145 			return;
4146 		}
4147 
4148 #ifndef __PCI_BAR_ZERO_VALID
4149 		/*
4150 		 * If this is a BAR, clear the BAR so it stops
4151 		 * decoding before releasing the resource.
4152 		 */
4153 		switch (type) {
4154 		case SYS_RES_IOPORT:
4155 		case SYS_RES_MEMORY:
4156 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4157 			break;
4158 		}
4159 #endif
4160 		resource_list_unreserve(rl, dev, child, type, rid);
4161 	}
4162 	resource_list_delete(rl, type, rid);
4163 }
4164 
4165 struct resource_list *
4166 pci_get_resource_list (device_t dev, device_t child)
4167 {
4168 	struct pci_devinfo *dinfo = device_get_ivars(child);
4169 
4170 	return (&dinfo->resources);
4171 }
4172 
4173 uint32_t
4174 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4175 {
4176 	struct pci_devinfo *dinfo = device_get_ivars(child);
4177 	pcicfgregs *cfg = &dinfo->cfg;
4178 
4179 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4180 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4181 }
4182 
4183 void
4184 pci_write_config_method(device_t dev, device_t child, int reg,
4185     uint32_t val, int width)
4186 {
4187 	struct pci_devinfo *dinfo = device_get_ivars(child);
4188 	pcicfgregs *cfg = &dinfo->cfg;
4189 
4190 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4191 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4192 }
4193 
4194 int
4195 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4196     size_t buflen)
4197 {
4198 
4199 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4200 	    pci_get_function(child));
4201 	return (0);
4202 }
4203 
4204 int
4205 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4206     size_t buflen)
4207 {
4208 	struct pci_devinfo *dinfo;
4209 	pcicfgregs *cfg;
4210 
4211 	dinfo = device_get_ivars(child);
4212 	cfg = &dinfo->cfg;
4213 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4214 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4215 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4216 	    cfg->progif);
4217 	return (0);
4218 }
4219 
4220 int
4221 pci_assign_interrupt_method(device_t dev, device_t child)
4222 {
4223 	struct pci_devinfo *dinfo = device_get_ivars(child);
4224 	pcicfgregs *cfg = &dinfo->cfg;
4225 
4226 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4227 	    cfg->intpin));
4228 }
4229 
4230 static int
4231 pci_modevent(module_t mod, int what, void *arg)
4232 {
4233 	static struct cdev *pci_cdev;
4234 
4235 	switch (what) {
4236 	case MOD_LOAD:
4237 		STAILQ_INIT(&pci_devq);
4238 		pci_generation = 0;
4239 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4240 		    "pci");
4241 		pci_load_vendor_data();
4242 		break;
4243 
4244 	case MOD_UNLOAD:
4245 		destroy_dev(pci_cdev);
4246 		break;
4247 	}
4248 
4249 	return (0);
4250 }
4251 
4252 void
4253 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4254 {
4255 
4256 	/*
4257 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4258 	 * which we know need special treatment.  Type 2 devices are
4259 	 * cardbus bridges which also require special treatment.
4260 	 * Other types are unknown, and we err on the side of safety
4261 	 * by ignoring them.
4262 	 */
4263 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4264 		return;
4265 
4266 	/*
4267 	 * Restore the device to full power mode.  We must do this
4268 	 * before we restore the registers because moving from D3 to
4269 	 * D0 will cause the chip's BARs and some other registers to
4270 	 * be reset to some unknown power on reset values.  Cut down
4271 	 * the noise on boot by doing nothing if we are already in
4272 	 * state D0.
4273 	 */
4274 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4275 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4276 	pci_restore_bars(dev);
4277 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4278 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4279 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4280 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4281 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4282 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4283 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4284 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4285 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4286 
4287 	/* Restore MSI and MSI-X configurations if they are present. */
4288 	if (dinfo->cfg.msi.msi_location != 0)
4289 		pci_resume_msi(dev);
4290 	if (dinfo->cfg.msix.msix_location != 0)
4291 		pci_resume_msix(dev);
4292 }
4293 
4294 void
4295 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4296 {
4297 	uint32_t cls;
4298 	int ps;
4299 
4300 	/*
4301 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4302 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4303 	 * which also require special treatment.  Other types are unknown, and
4304 	 * we err on the side of safety by ignoring them.  Powering down
4305 	 * bridges should not be undertaken lightly.
4306 	 */
4307 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4308 		return;
4309 
4310 	/*
4311 	 * Some drivers apparently write to these registers w/o updating our
4312 	 * cached copy.  No harm happens if we update the copy, so do so here
4313 	 * so we can restore them.  The COMMAND register is modified by the
4314 	 * bus w/o updating the cache.  This should represent the normally
4315 	 * writable portion of the 'defined' part of type 0 headers.  In
4316 	 * theory we also need to save/restore the PCI capability structures
4317 	 * we know about, but apart from power we don't know any that are
4318 	 * writable.
4319 	 */
4320 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4321 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4322 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4323 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4324 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4325 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4326 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4327 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4328 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4329 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4330 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4331 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4332 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4333 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4334 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4335 
4336 	/*
4337 	 * don't set the state for display devices, base peripherals and
4338 	 * memory devices since bad things happen when they are powered down.
4339 	 * We should (a) have drivers that can easily detach and (b) use
4340 	 * generic drivers for these devices so that some device actually
4341 	 * attaches.  We need to make sure that when we implement (a) we don't
4342 	 * power the device down on a reattach.
4343 	 */
4344 	cls = pci_get_class(dev);
4345 	if (!setstate)
4346 		return;
4347 	switch (pci_do_power_nodriver)
4348 	{
4349 		case 0:		/* NO powerdown at all */
4350 			return;
4351 		case 1:		/* Conservative about what to power down */
4352 			if (cls == PCIC_STORAGE)
4353 				return;
4354 			/*FALLTHROUGH*/
4355 		case 2:		/* Agressive about what to power down */
4356 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4357 			    cls == PCIC_BASEPERIPH)
4358 				return;
4359 			/*FALLTHROUGH*/
4360 		case 3:		/* Power down everything */
4361 			break;
4362 	}
4363 	/*
4364 	 * PCI spec says we can only go into D3 state from D0 state.
4365 	 * Transition from D[12] into D0 before going to D3 state.
4366 	 */
4367 	ps = pci_get_powerstate(dev);
4368 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4369 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4370 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4371 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4372 }
4373