xref: /freebsd/sys/dev/pci/pci.c (revision 6f9c8e5b074419423648ffb89b83fd2f257e90b7)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #define	PCIR_IS_BIOS(cfg, reg)						\
73 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75 
76 
77 static pci_addr_t	pci_mapbase(uint64_t mapreg);
78 static const char	*pci_maptype(uint64_t mapreg);
79 static int		pci_mapsize(uint64_t testval);
80 static int		pci_maprange(uint64_t mapreg);
81 static pci_addr_t	pci_rombase(uint64_t mapreg);
82 static int		pci_romsize(uint64_t testval);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85 
86 static int		pci_porten(device_t dev);
87 static int		pci_memen(device_t dev);
88 static void		pci_assign_interrupt(device_t bus, device_t dev,
89 			    int force_route);
90 static int		pci_add_map(device_t bus, device_t dev, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_enable_msix(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix(device_t dev, u_int index);
115 static void		pci_unmask_msix(device_t dev, u_int index);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152 
153 	/* PCI interface */
154 	DEVMETHOD(pci_read_config,	pci_read_config_method),
155 	DEVMETHOD(pci_write_config,	pci_write_config_method),
156 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172 
173 	{ 0, 0 }
174 };
175 
176 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177 
178 static devclass_t pci_devclass;
179 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180 MODULE_VERSION(pci, 1);
181 
182 static char	*pci_vendordata;
183 static size_t	pci_vendordata_size;
184 
185 
186 struct pci_quirk {
187 	uint32_t devid;	/* Vendor/device of the card */
188 	int	type;
189 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
190 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
191 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
192 	int	arg1;
193 	int	arg2;
194 };
195 
196 struct pci_quirk pci_quirks[] = {
197 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202 
203 	/*
204 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206 	 */
207 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 
210 	/*
211 	 * MSI doesn't work on earlier Intel chipsets including
212 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213 	 */
214 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224 	 * bridge.
225 	 */
226 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 
228 	/*
229 	 * Some virtualization environments emulate an older chipset
230 	 * but support MSI just fine.  QEMU uses the Intel 82440.
231 	 */
232 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
233 
234 	{ 0 }
235 };
236 
237 /* map register information */
238 #define	PCI_MAPMEM	0x01	/* memory map */
239 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
240 #define	PCI_MAPPORT	0x04	/* port map */
241 
242 struct devlist pci_devq;
243 uint32_t pci_generation;
244 uint32_t pci_numdevs = 0;
245 static int pcie_chipset, pcix_chipset;
246 
247 /* sysctl vars */
248 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
249 
250 static int pci_enable_io_modes = 1;
251 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
252 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
253     &pci_enable_io_modes, 1,
254     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
255 enable these bits correctly.  We'd like to do this all the time, but there\n\
256 are some peripherals that this causes problems with.");
257 
258 static int pci_do_power_nodriver = 0;
259 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
260 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
261     &pci_do_power_nodriver, 0,
262   "Place a function into D3 state when no driver attaches to it.  0 means\n\
263 disable.  1 means conservatively place devices into D3 state.  2 means\n\
264 agressively place devices into D3 state.  3 means put absolutely everything\n\
265 in D3 state.");
266 
267 int pci_do_power_resume = 1;
268 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
269 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
270     &pci_do_power_resume, 1,
271   "Transition from D3 -> D0 on resume.");
272 
273 int pci_do_power_suspend = 1;
274 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
275 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
276     &pci_do_power_suspend, 1,
277   "Transition from D0 -> D3 on suspend.");
278 
279 static int pci_do_msi = 1;
280 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
281 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
282     "Enable support for MSI interrupts");
283 
284 static int pci_do_msix = 1;
285 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
287     "Enable support for MSI-X interrupts");
288 
289 static int pci_honor_msi_blacklist = 1;
290 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
291 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
292     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
293 
294 #if defined(__i386__) || defined(__amd64__)
295 static int pci_usb_takeover = 1;
296 #else
297 static int pci_usb_takeover = 0;
298 #endif
299 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
300 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
301     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
302 Disable this if you depend on BIOS emulation of USB devices, that is\n\
303 you use USB devices (like keyboard or mouse) but do not load USB drivers");
304 
305 /* Find a device_t by bus/slot/function in domain 0 */
306 
307 device_t
308 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
309 {
310 
311 	return (pci_find_dbsf(0, bus, slot, func));
312 }
313 
314 /* Find a device_t by domain/bus/slot/function */
315 
316 device_t
317 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
318 {
319 	struct pci_devinfo *dinfo;
320 
321 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322 		if ((dinfo->cfg.domain == domain) &&
323 		    (dinfo->cfg.bus == bus) &&
324 		    (dinfo->cfg.slot == slot) &&
325 		    (dinfo->cfg.func == func)) {
326 			return (dinfo->cfg.dev);
327 		}
328 	}
329 
330 	return (NULL);
331 }
332 
333 /* Find a device_t by vendor/device ID */
334 
335 device_t
336 pci_find_device(uint16_t vendor, uint16_t device)
337 {
338 	struct pci_devinfo *dinfo;
339 
340 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
341 		if ((dinfo->cfg.vendor == vendor) &&
342 		    (dinfo->cfg.device == device)) {
343 			return (dinfo->cfg.dev);
344 		}
345 	}
346 
347 	return (NULL);
348 }
349 
350 static int
351 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
352 {
353 	va_list ap;
354 	int retval;
355 
356 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
357 	    cfg->func);
358 	va_start(ap, fmt);
359 	retval += vprintf(fmt, ap);
360 	va_end(ap);
361 	return (retval);
362 }
363 
364 /* return base address of memory or port map */
365 
366 static pci_addr_t
367 pci_mapbase(uint64_t mapreg)
368 {
369 
370 	if (PCI_BAR_MEM(mapreg))
371 		return (mapreg & PCIM_BAR_MEM_BASE);
372 	else
373 		return (mapreg & PCIM_BAR_IO_BASE);
374 }
375 
376 /* return map type of memory or port map */
377 
378 static const char *
379 pci_maptype(uint64_t mapreg)
380 {
381 
382 	if (PCI_BAR_IO(mapreg))
383 		return ("I/O Port");
384 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
385 		return ("Prefetchable Memory");
386 	return ("Memory");
387 }
388 
389 /* return log2 of map size decoded for memory or port map */
390 
391 static int
392 pci_mapsize(uint64_t testval)
393 {
394 	int ln2size;
395 
396 	testval = pci_mapbase(testval);
397 	ln2size = 0;
398 	if (testval != 0) {
399 		while ((testval & 1) == 0)
400 		{
401 			ln2size++;
402 			testval >>= 1;
403 		}
404 	}
405 	return (ln2size);
406 }
407 
408 /* return base address of device ROM */
409 
410 static pci_addr_t
411 pci_rombase(uint64_t mapreg)
412 {
413 
414 	return (mapreg & PCIM_BIOS_ADDR_MASK);
415 }
416 
417 /* return log2 of map size decided for device ROM */
418 
419 static int
420 pci_romsize(uint64_t testval)
421 {
422 	int ln2size;
423 
424 	testval = pci_rombase(testval);
425 	ln2size = 0;
426 	if (testval != 0) {
427 		while ((testval & 1) == 0)
428 		{
429 			ln2size++;
430 			testval >>= 1;
431 		}
432 	}
433 	return (ln2size);
434 }
435 
436 /* return log2 of address range supported by map register */
437 
438 static int
439 pci_maprange(uint64_t mapreg)
440 {
441 	int ln2range = 0;
442 
443 	if (PCI_BAR_IO(mapreg))
444 		ln2range = 32;
445 	else
446 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
447 		case PCIM_BAR_MEM_32:
448 			ln2range = 32;
449 			break;
450 		case PCIM_BAR_MEM_1MB:
451 			ln2range = 20;
452 			break;
453 		case PCIM_BAR_MEM_64:
454 			ln2range = 64;
455 			break;
456 		}
457 	return (ln2range);
458 }
459 
460 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
461 
462 static void
463 pci_fixancient(pcicfgregs *cfg)
464 {
465 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
466 		return;
467 
468 	/* PCI to PCI bridges use header type 1 */
469 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
470 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
471 }
472 
473 /* extract header type specific config data */
474 
475 static void
476 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
477 {
478 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
479 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
480 	case PCIM_HDRTYPE_NORMAL:
481 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
482 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
483 		cfg->nummaps	    = PCI_MAXMAPS_0;
484 		break;
485 	case PCIM_HDRTYPE_BRIDGE:
486 		cfg->nummaps	    = PCI_MAXMAPS_1;
487 		break;
488 	case PCIM_HDRTYPE_CARDBUS:
489 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
490 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
491 		cfg->nummaps	    = PCI_MAXMAPS_2;
492 		break;
493 	}
494 #undef REG
495 }
496 
497 /* read configuration header into pcicfgregs structure */
498 struct pci_devinfo *
499 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
500 {
501 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
502 	pcicfgregs *cfg = NULL;
503 	struct pci_devinfo *devlist_entry;
504 	struct devlist *devlist_head;
505 
506 	devlist_head = &pci_devq;
507 
508 	devlist_entry = NULL;
509 
510 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
511 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
512 		if (devlist_entry == NULL)
513 			return (NULL);
514 
515 		cfg = &devlist_entry->cfg;
516 
517 		cfg->domain		= d;
518 		cfg->bus		= b;
519 		cfg->slot		= s;
520 		cfg->func		= f;
521 		cfg->vendor		= REG(PCIR_VENDOR, 2);
522 		cfg->device		= REG(PCIR_DEVICE, 2);
523 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
524 		cfg->statreg		= REG(PCIR_STATUS, 2);
525 		cfg->baseclass		= REG(PCIR_CLASS, 1);
526 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
527 		cfg->progif		= REG(PCIR_PROGIF, 1);
528 		cfg->revid		= REG(PCIR_REVID, 1);
529 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
530 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
531 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
532 		cfg->intpin		= REG(PCIR_INTPIN, 1);
533 		cfg->intline		= REG(PCIR_INTLINE, 1);
534 
535 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
536 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
537 
538 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
539 		cfg->hdrtype		&= ~PCIM_MFDEV;
540 		STAILQ_INIT(&cfg->maps);
541 
542 		pci_fixancient(cfg);
543 		pci_hdrtypedata(pcib, b, s, f, cfg);
544 
545 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
546 			pci_read_cap(pcib, cfg);
547 
548 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
549 
550 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
551 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
552 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
553 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
554 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
555 
556 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
557 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
558 		devlist_entry->conf.pc_vendor = cfg->vendor;
559 		devlist_entry->conf.pc_device = cfg->device;
560 
561 		devlist_entry->conf.pc_class = cfg->baseclass;
562 		devlist_entry->conf.pc_subclass = cfg->subclass;
563 		devlist_entry->conf.pc_progif = cfg->progif;
564 		devlist_entry->conf.pc_revid = cfg->revid;
565 
566 		pci_numdevs++;
567 		pci_generation++;
568 	}
569 	return (devlist_entry);
570 #undef REG
571 }
572 
573 static void
574 pci_read_cap(device_t pcib, pcicfgregs *cfg)
575 {
576 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
577 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
578 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
579 	uint64_t addr;
580 #endif
581 	uint32_t val;
582 	int	ptr, nextptr, ptrptr;
583 
584 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585 	case PCIM_HDRTYPE_NORMAL:
586 	case PCIM_HDRTYPE_BRIDGE:
587 		ptrptr = PCIR_CAP_PTR;
588 		break;
589 	case PCIM_HDRTYPE_CARDBUS:
590 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
591 		break;
592 	default:
593 		return;		/* no extended capabilities support */
594 	}
595 	nextptr = REG(ptrptr, 1);	/* sanity check? */
596 
597 	/*
598 	 * Read capability entries.
599 	 */
600 	while (nextptr != 0) {
601 		/* Sanity check */
602 		if (nextptr > 255) {
603 			printf("illegal PCI extended capability offset %d\n",
604 			    nextptr);
605 			return;
606 		}
607 		/* Find the next entry */
608 		ptr = nextptr;
609 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
610 
611 		/* Process this entry */
612 		switch (REG(ptr + PCICAP_ID, 1)) {
613 		case PCIY_PMG:		/* PCI power management */
614 			if (cfg->pp.pp_cap == 0) {
615 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
616 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
617 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
618 				if ((nextptr - ptr) > PCIR_POWER_DATA)
619 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
620 			}
621 			break;
622 		case PCIY_HT:		/* HyperTransport */
623 			/* Determine HT-specific capability type. */
624 			val = REG(ptr + PCIR_HT_COMMAND, 2);
625 
626 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
627 				cfg->ht.ht_slave = ptr;
628 
629 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
630 			switch (val & PCIM_HTCMD_CAP_MASK) {
631 			case PCIM_HTCAP_MSI_MAPPING:
632 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
633 					/* Sanity check the mapping window. */
634 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
635 					    4);
636 					addr <<= 32;
637 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
638 					    4);
639 					if (addr != MSI_INTEL_ADDR_BASE)
640 						device_printf(pcib,
641 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
642 						    cfg->domain, cfg->bus,
643 						    cfg->slot, cfg->func,
644 						    (long long)addr);
645 				} else
646 					addr = MSI_INTEL_ADDR_BASE;
647 
648 				cfg->ht.ht_msimap = ptr;
649 				cfg->ht.ht_msictrl = val;
650 				cfg->ht.ht_msiaddr = addr;
651 				break;
652 			}
653 #endif
654 			break;
655 		case PCIY_MSI:		/* PCI MSI */
656 			cfg->msi.msi_location = ptr;
657 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
658 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
659 						     PCIM_MSICTRL_MMC_MASK)>>1);
660 			break;
661 		case PCIY_MSIX:		/* PCI MSI-X */
662 			cfg->msix.msix_location = ptr;
663 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
664 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
665 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
666 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
667 			cfg->msix.msix_table_bar = PCIR_BAR(val &
668 			    PCIM_MSIX_BIR_MASK);
669 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
670 			val = REG(ptr + PCIR_MSIX_PBA, 4);
671 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
672 			    PCIM_MSIX_BIR_MASK);
673 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
674 			break;
675 		case PCIY_VPD:		/* PCI Vital Product Data */
676 			cfg->vpd.vpd_reg = ptr;
677 			break;
678 		case PCIY_SUBVENDOR:
679 			/* Should always be true. */
680 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
681 			    PCIM_HDRTYPE_BRIDGE) {
682 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
683 				cfg->subvendor = val & 0xffff;
684 				cfg->subdevice = val >> 16;
685 			}
686 			break;
687 		case PCIY_PCIX:		/* PCI-X */
688 			/*
689 			 * Assume we have a PCI-X chipset if we have
690 			 * at least one PCI-PCI bridge with a PCI-X
691 			 * capability.  Note that some systems with
692 			 * PCI-express or HT chipsets might match on
693 			 * this check as well.
694 			 */
695 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
696 			    PCIM_HDRTYPE_BRIDGE)
697 				pcix_chipset = 1;
698 			break;
699 		case PCIY_EXPRESS:	/* PCI-express */
700 			/*
701 			 * Assume we have a PCI-express chipset if we have
702 			 * at least one PCI-express device.
703 			 */
704 			pcie_chipset = 1;
705 			break;
706 		default:
707 			break;
708 		}
709 	}
710 
711 
712 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
713 	/*
714 	 * Enable the MSI mapping window for all HyperTransport
715 	 * slaves.  PCI-PCI bridges have their windows enabled via
716 	 * PCIB_MAP_MSI().
717 	 */
718 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
719 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
720 		device_printf(pcib,
721 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
722 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
723 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
724 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
725 		     2);
726 	}
727 #endif
728 /* REG and WREG use carry through to next functions */
729 }
730 
731 /*
732  * PCI Vital Product Data
733  */
734 
735 #define	PCI_VPD_TIMEOUT		1000000
736 
737 static int
738 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
739 {
740 	int count = PCI_VPD_TIMEOUT;
741 
742 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
743 
744 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
745 
746 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
747 		if (--count < 0)
748 			return (ENXIO);
749 		DELAY(1);	/* limit looping */
750 	}
751 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
752 
753 	return (0);
754 }
755 
756 #if 0
757 static int
758 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
759 {
760 	int count = PCI_VPD_TIMEOUT;
761 
762 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
763 
764 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
765 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
766 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
767 		if (--count < 0)
768 			return (ENXIO);
769 		DELAY(1);	/* limit looping */
770 	}
771 
772 	return (0);
773 }
774 #endif
775 
776 #undef PCI_VPD_TIMEOUT
777 
778 struct vpd_readstate {
779 	device_t	pcib;
780 	pcicfgregs	*cfg;
781 	uint32_t	val;
782 	int		bytesinval;
783 	int		off;
784 	uint8_t		cksum;
785 };
786 
787 static int
788 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
789 {
790 	uint32_t reg;
791 	uint8_t byte;
792 
793 	if (vrs->bytesinval == 0) {
794 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
795 			return (ENXIO);
796 		vrs->val = le32toh(reg);
797 		vrs->off += 4;
798 		byte = vrs->val & 0xff;
799 		vrs->bytesinval = 3;
800 	} else {
801 		vrs->val = vrs->val >> 8;
802 		byte = vrs->val & 0xff;
803 		vrs->bytesinval--;
804 	}
805 
806 	vrs->cksum += byte;
807 	*data = byte;
808 	return (0);
809 }
810 
811 static void
812 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
813 {
814 	struct vpd_readstate vrs;
815 	int state;
816 	int name;
817 	int remain;
818 	int i;
819 	int alloc, off;		/* alloc/off for RO/W arrays */
820 	int cksumvalid;
821 	int dflen;
822 	uint8_t byte;
823 	uint8_t byte2;
824 
825 	/* init vpd reader */
826 	vrs.bytesinval = 0;
827 	vrs.off = 0;
828 	vrs.pcib = pcib;
829 	vrs.cfg = cfg;
830 	vrs.cksum = 0;
831 
832 	state = 0;
833 	name = remain = i = 0;	/* shut up stupid gcc */
834 	alloc = off = 0;	/* shut up stupid gcc */
835 	dflen = 0;		/* shut up stupid gcc */
836 	cksumvalid = -1;
837 	while (state >= 0) {
838 		if (vpd_nextbyte(&vrs, &byte)) {
839 			state = -2;
840 			break;
841 		}
842 #if 0
843 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
844 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
845 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
846 #endif
847 		switch (state) {
848 		case 0:		/* item name */
849 			if (byte & 0x80) {
850 				if (vpd_nextbyte(&vrs, &byte2)) {
851 					state = -2;
852 					break;
853 				}
854 				remain = byte2;
855 				if (vpd_nextbyte(&vrs, &byte2)) {
856 					state = -2;
857 					break;
858 				}
859 				remain |= byte2 << 8;
860 				if (remain > (0x7f*4 - vrs.off)) {
861 					state = -1;
862 					printf(
863 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
864 					    cfg->domain, cfg->bus, cfg->slot,
865 					    cfg->func, remain);
866 				}
867 				name = byte & 0x7f;
868 			} else {
869 				remain = byte & 0x7;
870 				name = (byte >> 3) & 0xf;
871 			}
872 			switch (name) {
873 			case 0x2:	/* String */
874 				cfg->vpd.vpd_ident = malloc(remain + 1,
875 				    M_DEVBUF, M_WAITOK);
876 				i = 0;
877 				state = 1;
878 				break;
879 			case 0xf:	/* End */
880 				state = -1;
881 				break;
882 			case 0x10:	/* VPD-R */
883 				alloc = 8;
884 				off = 0;
885 				cfg->vpd.vpd_ros = malloc(alloc *
886 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
887 				    M_WAITOK | M_ZERO);
888 				state = 2;
889 				break;
890 			case 0x11:	/* VPD-W */
891 				alloc = 8;
892 				off = 0;
893 				cfg->vpd.vpd_w = malloc(alloc *
894 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
895 				    M_WAITOK | M_ZERO);
896 				state = 5;
897 				break;
898 			default:	/* Invalid data, abort */
899 				state = -1;
900 				break;
901 			}
902 			break;
903 
904 		case 1:	/* Identifier String */
905 			cfg->vpd.vpd_ident[i++] = byte;
906 			remain--;
907 			if (remain == 0)  {
908 				cfg->vpd.vpd_ident[i] = '\0';
909 				state = 0;
910 			}
911 			break;
912 
913 		case 2:	/* VPD-R Keyword Header */
914 			if (off == alloc) {
915 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
916 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
917 				    M_DEVBUF, M_WAITOK | M_ZERO);
918 			}
919 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
920 			if (vpd_nextbyte(&vrs, &byte2)) {
921 				state = -2;
922 				break;
923 			}
924 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
925 			if (vpd_nextbyte(&vrs, &byte2)) {
926 				state = -2;
927 				break;
928 			}
929 			dflen = byte2;
930 			if (dflen == 0 &&
931 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
932 			    2) == 0) {
933 				/*
934 				 * if this happens, we can't trust the rest
935 				 * of the VPD.
936 				 */
937 				printf(
938 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
939 				    cfg->domain, cfg->bus, cfg->slot,
940 				    cfg->func, dflen);
941 				cksumvalid = 0;
942 				state = -1;
943 				break;
944 			} else if (dflen == 0) {
945 				cfg->vpd.vpd_ros[off].value = malloc(1 *
946 				    sizeof(*cfg->vpd.vpd_ros[off].value),
947 				    M_DEVBUF, M_WAITOK);
948 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
949 			} else
950 				cfg->vpd.vpd_ros[off].value = malloc(
951 				    (dflen + 1) *
952 				    sizeof(*cfg->vpd.vpd_ros[off].value),
953 				    M_DEVBUF, M_WAITOK);
954 			remain -= 3;
955 			i = 0;
956 			/* keep in sync w/ state 3's transistions */
957 			if (dflen == 0 && remain == 0)
958 				state = 0;
959 			else if (dflen == 0)
960 				state = 2;
961 			else
962 				state = 3;
963 			break;
964 
965 		case 3:	/* VPD-R Keyword Value */
966 			cfg->vpd.vpd_ros[off].value[i++] = byte;
967 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
968 			    "RV", 2) == 0 && cksumvalid == -1) {
969 				if (vrs.cksum == 0)
970 					cksumvalid = 1;
971 				else {
972 					if (bootverbose)
973 						printf(
974 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
975 						    cfg->domain, cfg->bus,
976 						    cfg->slot, cfg->func,
977 						    vrs.cksum);
978 					cksumvalid = 0;
979 					state = -1;
980 					break;
981 				}
982 			}
983 			dflen--;
984 			remain--;
985 			/* keep in sync w/ state 2's transistions */
986 			if (dflen == 0)
987 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
988 			if (dflen == 0 && remain == 0) {
989 				cfg->vpd.vpd_rocnt = off;
990 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
991 				    off * sizeof(*cfg->vpd.vpd_ros),
992 				    M_DEVBUF, M_WAITOK | M_ZERO);
993 				state = 0;
994 			} else if (dflen == 0)
995 				state = 2;
996 			break;
997 
998 		case 4:
999 			remain--;
1000 			if (remain == 0)
1001 				state = 0;
1002 			break;
1003 
1004 		case 5:	/* VPD-W Keyword Header */
1005 			if (off == alloc) {
1006 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1007 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1008 				    M_DEVBUF, M_WAITOK | M_ZERO);
1009 			}
1010 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1011 			if (vpd_nextbyte(&vrs, &byte2)) {
1012 				state = -2;
1013 				break;
1014 			}
1015 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1016 			if (vpd_nextbyte(&vrs, &byte2)) {
1017 				state = -2;
1018 				break;
1019 			}
1020 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1021 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1022 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1023 			    sizeof(*cfg->vpd.vpd_w[off].value),
1024 			    M_DEVBUF, M_WAITOK);
1025 			remain -= 3;
1026 			i = 0;
1027 			/* keep in sync w/ state 6's transistions */
1028 			if (dflen == 0 && remain == 0)
1029 				state = 0;
1030 			else if (dflen == 0)
1031 				state = 5;
1032 			else
1033 				state = 6;
1034 			break;
1035 
1036 		case 6:	/* VPD-W Keyword Value */
1037 			cfg->vpd.vpd_w[off].value[i++] = byte;
1038 			dflen--;
1039 			remain--;
1040 			/* keep in sync w/ state 5's transistions */
1041 			if (dflen == 0)
1042 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1043 			if (dflen == 0 && remain == 0) {
1044 				cfg->vpd.vpd_wcnt = off;
1045 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1046 				    off * sizeof(*cfg->vpd.vpd_w),
1047 				    M_DEVBUF, M_WAITOK | M_ZERO);
1048 				state = 0;
1049 			} else if (dflen == 0)
1050 				state = 5;
1051 			break;
1052 
1053 		default:
1054 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1055 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1056 			    state);
1057 			state = -1;
1058 			break;
1059 		}
1060 	}
1061 
1062 	if (cksumvalid == 0 || state < -1) {
1063 		/* read-only data bad, clean up */
1064 		if (cfg->vpd.vpd_ros != NULL) {
1065 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1066 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1067 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1068 			cfg->vpd.vpd_ros = NULL;
1069 		}
1070 	}
1071 	if (state < -1) {
1072 		/* I/O error, clean up */
1073 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1074 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1075 		if (cfg->vpd.vpd_ident != NULL) {
1076 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1077 			cfg->vpd.vpd_ident = NULL;
1078 		}
1079 		if (cfg->vpd.vpd_w != NULL) {
1080 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1081 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1082 			free(cfg->vpd.vpd_w, M_DEVBUF);
1083 			cfg->vpd.vpd_w = NULL;
1084 		}
1085 	}
1086 	cfg->vpd.vpd_cached = 1;
1087 #undef REG
1088 #undef WREG
1089 }
1090 
1091 int
1092 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1093 {
1094 	struct pci_devinfo *dinfo = device_get_ivars(child);
1095 	pcicfgregs *cfg = &dinfo->cfg;
1096 
1097 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1098 		pci_read_vpd(device_get_parent(dev), cfg);
1099 
1100 	*identptr = cfg->vpd.vpd_ident;
1101 
1102 	if (*identptr == NULL)
1103 		return (ENXIO);
1104 
1105 	return (0);
1106 }
1107 
1108 int
1109 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1110 	const char **vptr)
1111 {
1112 	struct pci_devinfo *dinfo = device_get_ivars(child);
1113 	pcicfgregs *cfg = &dinfo->cfg;
1114 	int i;
1115 
1116 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1117 		pci_read_vpd(device_get_parent(dev), cfg);
1118 
1119 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1120 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1121 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1122 			*vptr = cfg->vpd.vpd_ros[i].value;
1123 		}
1124 
1125 	if (i != cfg->vpd.vpd_rocnt)
1126 		return (0);
1127 
1128 	*vptr = NULL;
1129 	return (ENXIO);
1130 }
1131 
1132 /*
1133  * Find the requested extended capability and return the offset in
1134  * configuration space via the pointer provided. The function returns
1135  * 0 on success and error code otherwise.
1136  */
1137 int
1138 pci_find_extcap_method(device_t dev, device_t child, int capability,
1139     int *capreg)
1140 {
1141 	struct pci_devinfo *dinfo = device_get_ivars(child);
1142 	pcicfgregs *cfg = &dinfo->cfg;
1143 	u_int32_t status;
1144 	u_int8_t ptr;
1145 
1146 	/*
1147 	 * Check the CAP_LIST bit of the PCI status register first.
1148 	 */
1149 	status = pci_read_config(child, PCIR_STATUS, 2);
1150 	if (!(status & PCIM_STATUS_CAPPRESENT))
1151 		return (ENXIO);
1152 
1153 	/*
1154 	 * Determine the start pointer of the capabilities list.
1155 	 */
1156 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1157 	case PCIM_HDRTYPE_NORMAL:
1158 	case PCIM_HDRTYPE_BRIDGE:
1159 		ptr = PCIR_CAP_PTR;
1160 		break;
1161 	case PCIM_HDRTYPE_CARDBUS:
1162 		ptr = PCIR_CAP_PTR_2;
1163 		break;
1164 	default:
1165 		/* XXX: panic? */
1166 		return (ENXIO);		/* no extended capabilities support */
1167 	}
1168 	ptr = pci_read_config(child, ptr, 1);
1169 
1170 	/*
1171 	 * Traverse the capabilities list.
1172 	 */
1173 	while (ptr != 0) {
1174 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1175 			if (capreg != NULL)
1176 				*capreg = ptr;
1177 			return (0);
1178 		}
1179 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1180 	}
1181 
1182 	return (ENOENT);
1183 }
1184 
1185 /*
1186  * Support for MSI-X message interrupts.
1187  */
1188 void
1189 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1190 {
1191 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1192 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1193 	uint32_t offset;
1194 
1195 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1196 	offset = msix->msix_table_offset + index * 16;
1197 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1198 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1199 	bus_write_4(msix->msix_table_res, offset + 8, data);
1200 
1201 	/* Enable MSI -> HT mapping. */
1202 	pci_ht_map_msi(dev, address);
1203 }
1204 
1205 void
1206 pci_mask_msix(device_t dev, u_int index)
1207 {
1208 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1209 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1210 	uint32_t offset, val;
1211 
1212 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1213 	offset = msix->msix_table_offset + index * 16 + 12;
1214 	val = bus_read_4(msix->msix_table_res, offset);
1215 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1216 		val |= PCIM_MSIX_VCTRL_MASK;
1217 		bus_write_4(msix->msix_table_res, offset, val);
1218 	}
1219 }
1220 
1221 void
1222 pci_unmask_msix(device_t dev, u_int index)
1223 {
1224 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1225 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1226 	uint32_t offset, val;
1227 
1228 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1229 	offset = msix->msix_table_offset + index * 16 + 12;
1230 	val = bus_read_4(msix->msix_table_res, offset);
1231 	if (val & PCIM_MSIX_VCTRL_MASK) {
1232 		val &= ~PCIM_MSIX_VCTRL_MASK;
1233 		bus_write_4(msix->msix_table_res, offset, val);
1234 	}
1235 }
1236 
1237 int
1238 pci_pending_msix(device_t dev, u_int index)
1239 {
1240 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1242 	uint32_t offset, bit;
1243 
1244 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1245 	offset = msix->msix_pba_offset + (index / 32) * 4;
1246 	bit = 1 << index % 32;
1247 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1248 }
1249 
1250 /*
1251  * Restore MSI-X registers and table during resume.  If MSI-X is
1252  * enabled then walk the virtual table to restore the actual MSI-X
1253  * table.
1254  */
1255 static void
1256 pci_resume_msix(device_t dev)
1257 {
1258 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1259 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1260 	struct msix_table_entry *mte;
1261 	struct msix_vector *mv;
1262 	int i;
1263 
1264 	if (msix->msix_alloc > 0) {
1265 		/* First, mask all vectors. */
1266 		for (i = 0; i < msix->msix_msgnum; i++)
1267 			pci_mask_msix(dev, i);
1268 
1269 		/* Second, program any messages with at least one handler. */
1270 		for (i = 0; i < msix->msix_table_len; i++) {
1271 			mte = &msix->msix_table[i];
1272 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1273 				continue;
1274 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1275 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1276 			pci_unmask_msix(dev, i);
1277 		}
1278 	}
1279 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1280 	    msix->msix_ctrl, 2);
1281 }
1282 
1283 /*
1284  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1285  * returned in *count.  After this function returns, each message will be
1286  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1287  */
1288 int
1289 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1290 {
1291 	struct pci_devinfo *dinfo = device_get_ivars(child);
1292 	pcicfgregs *cfg = &dinfo->cfg;
1293 	struct resource_list_entry *rle;
1294 	int actual, error, i, irq, max;
1295 
1296 	/* Don't let count == 0 get us into trouble. */
1297 	if (*count == 0)
1298 		return (EINVAL);
1299 
1300 	/* If rid 0 is allocated, then fail. */
1301 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1302 	if (rle != NULL && rle->res != NULL)
1303 		return (ENXIO);
1304 
1305 	/* Already have allocated messages? */
1306 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1307 		return (ENXIO);
1308 
1309 	/* If MSI is blacklisted for this system, fail. */
1310 	if (pci_msi_blacklisted())
1311 		return (ENXIO);
1312 
1313 	/* MSI-X capability present? */
1314 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1315 		return (ENODEV);
1316 
1317 	/* Make sure the appropriate BARs are mapped. */
1318 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1319 	    cfg->msix.msix_table_bar);
1320 	if (rle == NULL || rle->res == NULL ||
1321 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1322 		return (ENXIO);
1323 	cfg->msix.msix_table_res = rle->res;
1324 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1325 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1326 		    cfg->msix.msix_pba_bar);
1327 		if (rle == NULL || rle->res == NULL ||
1328 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1329 			return (ENXIO);
1330 	}
1331 	cfg->msix.msix_pba_res = rle->res;
1332 
1333 	if (bootverbose)
1334 		device_printf(child,
1335 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1336 		    *count, cfg->msix.msix_msgnum);
1337 	max = min(*count, cfg->msix.msix_msgnum);
1338 	for (i = 0; i < max; i++) {
1339 		/* Allocate a message. */
1340 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1341 		if (error)
1342 			break;
1343 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1344 		    irq, 1);
1345 	}
1346 	actual = i;
1347 
1348 	if (bootverbose) {
1349 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1350 		if (actual == 1)
1351 			device_printf(child, "using IRQ %lu for MSI-X\n",
1352 			    rle->start);
1353 		else {
1354 			int run;
1355 
1356 			/*
1357 			 * Be fancy and try to print contiguous runs of
1358 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1359 			 * 'run' is true if we are in a range.
1360 			 */
1361 			device_printf(child, "using IRQs %lu", rle->start);
1362 			irq = rle->start;
1363 			run = 0;
1364 			for (i = 1; i < actual; i++) {
1365 				rle = resource_list_find(&dinfo->resources,
1366 				    SYS_RES_IRQ, i + 1);
1367 
1368 				/* Still in a run? */
1369 				if (rle->start == irq + 1) {
1370 					run = 1;
1371 					irq++;
1372 					continue;
1373 				}
1374 
1375 				/* Finish previous range. */
1376 				if (run) {
1377 					printf("-%d", irq);
1378 					run = 0;
1379 				}
1380 
1381 				/* Start new range. */
1382 				printf(",%lu", rle->start);
1383 				irq = rle->start;
1384 			}
1385 
1386 			/* Unfinished range? */
1387 			if (run)
1388 				printf("-%d", irq);
1389 			printf(" for MSI-X\n");
1390 		}
1391 	}
1392 
1393 	/* Mask all vectors. */
1394 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1395 		pci_mask_msix(child, i);
1396 
1397 	/* Allocate and initialize vector data and virtual table. */
1398 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1399 	    M_DEVBUF, M_WAITOK | M_ZERO);
1400 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1401 	    M_DEVBUF, M_WAITOK | M_ZERO);
1402 	for (i = 0; i < actual; i++) {
1403 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1404 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1405 		cfg->msix.msix_table[i].mte_vector = i + 1;
1406 	}
1407 
1408 	/* Update control register to enable MSI-X. */
1409 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1410 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1411 	    cfg->msix.msix_ctrl, 2);
1412 
1413 	/* Update counts of alloc'd messages. */
1414 	cfg->msix.msix_alloc = actual;
1415 	cfg->msix.msix_table_len = actual;
1416 	*count = actual;
1417 	return (0);
1418 }
1419 
1420 /*
1421  * By default, pci_alloc_msix() will assign the allocated IRQ
1422  * resources consecutively to the first N messages in the MSI-X table.
1423  * However, device drivers may want to use different layouts if they
1424  * either receive fewer messages than they asked for, or they wish to
1425  * populate the MSI-X table sparsely.  This method allows the driver
1426  * to specify what layout it wants.  It must be called after a
1427  * successful pci_alloc_msix() but before any of the associated
1428  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1429  *
1430  * The 'vectors' array contains 'count' message vectors.  The array
1431  * maps directly to the MSI-X table in that index 0 in the array
1432  * specifies the vector for the first message in the MSI-X table, etc.
1433  * The vector value in each array index can either be 0 to indicate
1434  * that no vector should be assigned to a message slot, or it can be a
1435  * number from 1 to N (where N is the count returned from a
1436  * succcessful call to pci_alloc_msix()) to indicate which message
1437  * vector (IRQ) to be used for the corresponding message.
1438  *
1439  * On successful return, each message with a non-zero vector will have
1440  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1441  * 1.  Additionally, if any of the IRQs allocated via the previous
1442  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1443  * will be freed back to the system automatically.
1444  *
1445  * For example, suppose a driver has a MSI-X table with 6 messages and
1446  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1447  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1448  * C.  After the call to pci_alloc_msix(), the device will be setup to
1449  * have an MSI-X table of ABC--- (where - means no vector assigned).
1450  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1451  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1452  * be freed back to the system.  This device will also have valid
1453  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1454  *
1455  * In any case, the SYS_RES_IRQ rid X will always map to the message
1456  * at MSI-X table index X - 1 and will only be valid if a vector is
1457  * assigned to that table entry.
1458  */
1459 int
1460 pci_remap_msix_method(device_t dev, device_t child, int count,
1461     const u_int *vectors)
1462 {
1463 	struct pci_devinfo *dinfo = device_get_ivars(child);
1464 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465 	struct resource_list_entry *rle;
1466 	int i, irq, j, *used;
1467 
1468 	/*
1469 	 * Have to have at least one message in the table but the
1470 	 * table can't be bigger than the actual MSI-X table in the
1471 	 * device.
1472 	 */
1473 	if (count == 0 || count > msix->msix_msgnum)
1474 		return (EINVAL);
1475 
1476 	/* Sanity check the vectors. */
1477 	for (i = 0; i < count; i++)
1478 		if (vectors[i] > msix->msix_alloc)
1479 			return (EINVAL);
1480 
1481 	/*
1482 	 * Make sure there aren't any holes in the vectors to be used.
1483 	 * It's a big pain to support it, and it doesn't really make
1484 	 * sense anyway.  Also, at least one vector must be used.
1485 	 */
1486 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1487 	    M_ZERO);
1488 	for (i = 0; i < count; i++)
1489 		if (vectors[i] != 0)
1490 			used[vectors[i] - 1] = 1;
1491 	for (i = 0; i < msix->msix_alloc - 1; i++)
1492 		if (used[i] == 0 && used[i + 1] == 1) {
1493 			free(used, M_DEVBUF);
1494 			return (EINVAL);
1495 		}
1496 	if (used[0] != 1) {
1497 		free(used, M_DEVBUF);
1498 		return (EINVAL);
1499 	}
1500 
1501 	/* Make sure none of the resources are allocated. */
1502 	for (i = 0; i < msix->msix_table_len; i++) {
1503 		if (msix->msix_table[i].mte_vector == 0)
1504 			continue;
1505 		if (msix->msix_table[i].mte_handlers > 0)
1506 			return (EBUSY);
1507 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1508 		KASSERT(rle != NULL, ("missing resource"));
1509 		if (rle->res != NULL)
1510 			return (EBUSY);
1511 	}
1512 
1513 	/* Free the existing resource list entries. */
1514 	for (i = 0; i < msix->msix_table_len; i++) {
1515 		if (msix->msix_table[i].mte_vector == 0)
1516 			continue;
1517 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1518 	}
1519 
1520 	/*
1521 	 * Build the new virtual table keeping track of which vectors are
1522 	 * used.
1523 	 */
1524 	free(msix->msix_table, M_DEVBUF);
1525 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1526 	    M_DEVBUF, M_WAITOK | M_ZERO);
1527 	for (i = 0; i < count; i++)
1528 		msix->msix_table[i].mte_vector = vectors[i];
1529 	msix->msix_table_len = count;
1530 
1531 	/* Free any unused IRQs and resize the vectors array if necessary. */
1532 	j = msix->msix_alloc - 1;
1533 	if (used[j] == 0) {
1534 		struct msix_vector *vec;
1535 
1536 		while (used[j] == 0) {
1537 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1538 			    msix->msix_vectors[j].mv_irq);
1539 			j--;
1540 		}
1541 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1542 		    M_WAITOK);
1543 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1544 		    (j + 1));
1545 		free(msix->msix_vectors, M_DEVBUF);
1546 		msix->msix_vectors = vec;
1547 		msix->msix_alloc = j + 1;
1548 	}
1549 	free(used, M_DEVBUF);
1550 
1551 	/* Map the IRQs onto the rids. */
1552 	for (i = 0; i < count; i++) {
1553 		if (vectors[i] == 0)
1554 			continue;
1555 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1556 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1557 		    irq, 1);
1558 	}
1559 
1560 	if (bootverbose) {
1561 		device_printf(child, "Remapped MSI-X IRQs as: ");
1562 		for (i = 0; i < count; i++) {
1563 			if (i != 0)
1564 				printf(", ");
1565 			if (vectors[i] == 0)
1566 				printf("---");
1567 			else
1568 				printf("%d",
1569 				    msix->msix_vectors[vectors[i]].mv_irq);
1570 		}
1571 		printf("\n");
1572 	}
1573 
1574 	return (0);
1575 }
1576 
1577 static int
1578 pci_release_msix(device_t dev, device_t child)
1579 {
1580 	struct pci_devinfo *dinfo = device_get_ivars(child);
1581 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1582 	struct resource_list_entry *rle;
1583 	int i;
1584 
1585 	/* Do we have any messages to release? */
1586 	if (msix->msix_alloc == 0)
1587 		return (ENODEV);
1588 
1589 	/* Make sure none of the resources are allocated. */
1590 	for (i = 0; i < msix->msix_table_len; i++) {
1591 		if (msix->msix_table[i].mte_vector == 0)
1592 			continue;
1593 		if (msix->msix_table[i].mte_handlers > 0)
1594 			return (EBUSY);
1595 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596 		KASSERT(rle != NULL, ("missing resource"));
1597 		if (rle->res != NULL)
1598 			return (EBUSY);
1599 	}
1600 
1601 	/* Update control register to disable MSI-X. */
1602 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1603 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1604 	    msix->msix_ctrl, 2);
1605 
1606 	/* Free the resource list entries. */
1607 	for (i = 0; i < msix->msix_table_len; i++) {
1608 		if (msix->msix_table[i].mte_vector == 0)
1609 			continue;
1610 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1611 	}
1612 	free(msix->msix_table, M_DEVBUF);
1613 	msix->msix_table_len = 0;
1614 
1615 	/* Release the IRQs. */
1616 	for (i = 0; i < msix->msix_alloc; i++)
1617 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1618 		    msix->msix_vectors[i].mv_irq);
1619 	free(msix->msix_vectors, M_DEVBUF);
1620 	msix->msix_alloc = 0;
1621 	return (0);
1622 }
1623 
1624 /*
1625  * Return the max supported MSI-X messages this device supports.
1626  * Basically, assuming the MD code can alloc messages, this function
1627  * should return the maximum value that pci_alloc_msix() can return.
1628  * Thus, it is subject to the tunables, etc.
1629  */
1630 int
1631 pci_msix_count_method(device_t dev, device_t child)
1632 {
1633 	struct pci_devinfo *dinfo = device_get_ivars(child);
1634 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635 
1636 	if (pci_do_msix && msix->msix_location != 0)
1637 		return (msix->msix_msgnum);
1638 	return (0);
1639 }
1640 
1641 /*
1642  * HyperTransport MSI mapping control
1643  */
1644 void
1645 pci_ht_map_msi(device_t dev, uint64_t addr)
1646 {
1647 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1649 
1650 	if (!ht->ht_msimap)
1651 		return;
1652 
1653 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1654 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1655 		/* Enable MSI -> HT mapping. */
1656 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1657 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1658 		    ht->ht_msictrl, 2);
1659 	}
1660 
1661 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1662 		/* Disable MSI -> HT mapping. */
1663 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1664 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1665 		    ht->ht_msictrl, 2);
1666 	}
1667 }
1668 
1669 int
1670 pci_get_max_read_req(device_t dev)
1671 {
1672 	int cap;
1673 	uint16_t val;
1674 
1675 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1676 		return (0);
1677 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1678 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1679 	val >>= 12;
1680 	return (1 << (val + 7));
1681 }
1682 
1683 int
1684 pci_set_max_read_req(device_t dev, int size)
1685 {
1686 	int cap;
1687 	uint16_t val;
1688 
1689 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1690 		return (0);
1691 	if (size < 128)
1692 		size = 128;
1693 	if (size > 4096)
1694 		size = 4096;
1695 	size = (1 << (fls(size) - 1));
1696 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1697 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1698 	val |= (fls(size) - 8) << 12;
1699 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1700 	return (size);
1701 }
1702 
1703 /*
1704  * Support for MSI message signalled interrupts.
1705  */
1706 void
1707 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1708 {
1709 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1710 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1711 
1712 	/* Write data and address values. */
1713 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1714 	    address & 0xffffffff, 4);
1715 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1716 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1717 		    address >> 32, 4);
1718 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1719 		    data, 2);
1720 	} else
1721 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1722 		    2);
1723 
1724 	/* Enable MSI in the control register. */
1725 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1726 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1727 	    2);
1728 
1729 	/* Enable MSI -> HT mapping. */
1730 	pci_ht_map_msi(dev, address);
1731 }
1732 
1733 void
1734 pci_disable_msi(device_t dev)
1735 {
1736 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1737 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1738 
1739 	/* Disable MSI -> HT mapping. */
1740 	pci_ht_map_msi(dev, 0);
1741 
1742 	/* Disable MSI in the control register. */
1743 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1744 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1745 	    2);
1746 }
1747 
1748 /*
1749  * Restore MSI registers during resume.  If MSI is enabled then
1750  * restore the data and address registers in addition to the control
1751  * register.
1752  */
1753 static void
1754 pci_resume_msi(device_t dev)
1755 {
1756 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1757 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1758 	uint64_t address;
1759 	uint16_t data;
1760 
1761 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1762 		address = msi->msi_addr;
1763 		data = msi->msi_data;
1764 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1765 		    address & 0xffffffff, 4);
1766 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1767 			pci_write_config(dev, msi->msi_location +
1768 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1769 			pci_write_config(dev, msi->msi_location +
1770 			    PCIR_MSI_DATA_64BIT, data, 2);
1771 		} else
1772 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1773 			    data, 2);
1774 	}
1775 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1776 	    2);
1777 }
1778 
1779 static int
1780 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1781 {
1782 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1783 	pcicfgregs *cfg = &dinfo->cfg;
1784 	struct resource_list_entry *rle;
1785 	struct msix_table_entry *mte;
1786 	struct msix_vector *mv;
1787 	uint64_t addr;
1788 	uint32_t data;
1789 	int error, i, j;
1790 
1791 	/*
1792 	 * Handle MSI first.  We try to find this IRQ among our list
1793 	 * of MSI IRQs.  If we find it, we request updated address and
1794 	 * data registers and apply the results.
1795 	 */
1796 	if (cfg->msi.msi_alloc > 0) {
1797 
1798 		/* If we don't have any active handlers, nothing to do. */
1799 		if (cfg->msi.msi_handlers == 0)
1800 			return (0);
1801 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1802 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1803 			    i + 1);
1804 			if (rle->start == irq) {
1805 				error = PCIB_MAP_MSI(device_get_parent(bus),
1806 				    dev, irq, &addr, &data);
1807 				if (error)
1808 					return (error);
1809 				pci_disable_msi(dev);
1810 				dinfo->cfg.msi.msi_addr = addr;
1811 				dinfo->cfg.msi.msi_data = data;
1812 				pci_enable_msi(dev, addr, data);
1813 				return (0);
1814 			}
1815 		}
1816 		return (ENOENT);
1817 	}
1818 
1819 	/*
1820 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1821 	 * we request the updated mapping info.  If that works, we go
1822 	 * through all the slots that use this IRQ and update them.
1823 	 */
1824 	if (cfg->msix.msix_alloc > 0) {
1825 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1826 			mv = &cfg->msix.msix_vectors[i];
1827 			if (mv->mv_irq == irq) {
1828 				error = PCIB_MAP_MSI(device_get_parent(bus),
1829 				    dev, irq, &addr, &data);
1830 				if (error)
1831 					return (error);
1832 				mv->mv_address = addr;
1833 				mv->mv_data = data;
1834 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1835 					mte = &cfg->msix.msix_table[j];
1836 					if (mte->mte_vector != i + 1)
1837 						continue;
1838 					if (mte->mte_handlers == 0)
1839 						continue;
1840 					pci_mask_msix(dev, j);
1841 					pci_enable_msix(dev, j, addr, data);
1842 					pci_unmask_msix(dev, j);
1843 				}
1844 			}
1845 		}
1846 		return (ENOENT);
1847 	}
1848 
1849 	return (ENOENT);
1850 }
1851 
1852 /*
1853  * Returns true if the specified device is blacklisted because MSI
1854  * doesn't work.
1855  */
1856 int
1857 pci_msi_device_blacklisted(device_t dev)
1858 {
1859 	struct pci_quirk *q;
1860 
1861 	if (!pci_honor_msi_blacklist)
1862 		return (0);
1863 
1864 	for (q = &pci_quirks[0]; q->devid; q++) {
1865 		if (q->devid == pci_get_devid(dev) &&
1866 		    q->type == PCI_QUIRK_DISABLE_MSI)
1867 			return (1);
1868 	}
1869 	return (0);
1870 }
1871 
1872 /*
1873  * Returns true if a specified chipset supports MSI when it is
1874  * emulated hardware in a virtual machine.
1875  */
1876 static int
1877 pci_msi_vm_chipset(device_t dev)
1878 {
1879 	struct pci_quirk *q;
1880 
1881 	for (q = &pci_quirks[0]; q->devid; q++) {
1882 		if (q->devid == pci_get_devid(dev) &&
1883 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1884 			return (1);
1885 	}
1886 	return (0);
1887 }
1888 
1889 /*
1890  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1891  * we just check for blacklisted chipsets as represented by the
1892  * host-PCI bridge at device 0:0:0.  In the future, it may become
1893  * necessary to check other system attributes, such as the kenv values
1894  * that give the motherboard manufacturer and model number.
1895  */
1896 static int
1897 pci_msi_blacklisted(void)
1898 {
1899 	device_t dev;
1900 
1901 	if (!pci_honor_msi_blacklist)
1902 		return (0);
1903 
1904 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1905 	if (!(pcie_chipset || pcix_chipset)) {
1906 		if (vm_guest != VM_GUEST_NO) {
1907 			dev = pci_find_bsf(0, 0, 0);
1908 			if (dev != NULL)
1909 				return (pci_msi_vm_chipset(dev) == 0);
1910 		}
1911 		return (1);
1912 	}
1913 
1914 	dev = pci_find_bsf(0, 0, 0);
1915 	if (dev != NULL)
1916 		return (pci_msi_device_blacklisted(dev));
1917 	return (0);
1918 }
1919 
1920 /*
1921  * Attempt to allocate *count MSI messages.  The actual number allocated is
1922  * returned in *count.  After this function returns, each message will be
1923  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1924  */
1925 int
1926 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1927 {
1928 	struct pci_devinfo *dinfo = device_get_ivars(child);
1929 	pcicfgregs *cfg = &dinfo->cfg;
1930 	struct resource_list_entry *rle;
1931 	int actual, error, i, irqs[32];
1932 	uint16_t ctrl;
1933 
1934 	/* Don't let count == 0 get us into trouble. */
1935 	if (*count == 0)
1936 		return (EINVAL);
1937 
1938 	/* If rid 0 is allocated, then fail. */
1939 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1940 	if (rle != NULL && rle->res != NULL)
1941 		return (ENXIO);
1942 
1943 	/* Already have allocated messages? */
1944 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1945 		return (ENXIO);
1946 
1947 	/* If MSI is blacklisted for this system, fail. */
1948 	if (pci_msi_blacklisted())
1949 		return (ENXIO);
1950 
1951 	/* MSI capability present? */
1952 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1953 		return (ENODEV);
1954 
1955 	if (bootverbose)
1956 		device_printf(child,
1957 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1958 		    *count, cfg->msi.msi_msgnum);
1959 
1960 	/* Don't ask for more than the device supports. */
1961 	actual = min(*count, cfg->msi.msi_msgnum);
1962 
1963 	/* Don't ask for more than 32 messages. */
1964 	actual = min(actual, 32);
1965 
1966 	/* MSI requires power of 2 number of messages. */
1967 	if (!powerof2(actual))
1968 		return (EINVAL);
1969 
1970 	for (;;) {
1971 		/* Try to allocate N messages. */
1972 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1973 		    actual, irqs);
1974 		if (error == 0)
1975 			break;
1976 		if (actual == 1)
1977 			return (error);
1978 
1979 		/* Try N / 2. */
1980 		actual >>= 1;
1981 	}
1982 
1983 	/*
1984 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1985 	 * resources in the irqs[] array, so add new resources
1986 	 * starting at rid 1.
1987 	 */
1988 	for (i = 0; i < actual; i++)
1989 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1990 		    irqs[i], irqs[i], 1);
1991 
1992 	if (bootverbose) {
1993 		if (actual == 1)
1994 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1995 		else {
1996 			int run;
1997 
1998 			/*
1999 			 * Be fancy and try to print contiguous runs
2000 			 * of IRQ values as ranges.  'run' is true if
2001 			 * we are in a range.
2002 			 */
2003 			device_printf(child, "using IRQs %d", irqs[0]);
2004 			run = 0;
2005 			for (i = 1; i < actual; i++) {
2006 
2007 				/* Still in a run? */
2008 				if (irqs[i] == irqs[i - 1] + 1) {
2009 					run = 1;
2010 					continue;
2011 				}
2012 
2013 				/* Finish previous range. */
2014 				if (run) {
2015 					printf("-%d", irqs[i - 1]);
2016 					run = 0;
2017 				}
2018 
2019 				/* Start new range. */
2020 				printf(",%d", irqs[i]);
2021 			}
2022 
2023 			/* Unfinished range? */
2024 			if (run)
2025 				printf("-%d", irqs[actual - 1]);
2026 			printf(" for MSI\n");
2027 		}
2028 	}
2029 
2030 	/* Update control register with actual count. */
2031 	ctrl = cfg->msi.msi_ctrl;
2032 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2033 	ctrl |= (ffs(actual) - 1) << 4;
2034 	cfg->msi.msi_ctrl = ctrl;
2035 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2036 
2037 	/* Update counts of alloc'd messages. */
2038 	cfg->msi.msi_alloc = actual;
2039 	cfg->msi.msi_handlers = 0;
2040 	*count = actual;
2041 	return (0);
2042 }
2043 
2044 /* Release the MSI messages associated with this device. */
2045 int
2046 pci_release_msi_method(device_t dev, device_t child)
2047 {
2048 	struct pci_devinfo *dinfo = device_get_ivars(child);
2049 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2050 	struct resource_list_entry *rle;
2051 	int error, i, irqs[32];
2052 
2053 	/* Try MSI-X first. */
2054 	error = pci_release_msix(dev, child);
2055 	if (error != ENODEV)
2056 		return (error);
2057 
2058 	/* Do we have any messages to release? */
2059 	if (msi->msi_alloc == 0)
2060 		return (ENODEV);
2061 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2062 
2063 	/* Make sure none of the resources are allocated. */
2064 	if (msi->msi_handlers > 0)
2065 		return (EBUSY);
2066 	for (i = 0; i < msi->msi_alloc; i++) {
2067 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2068 		KASSERT(rle != NULL, ("missing MSI resource"));
2069 		if (rle->res != NULL)
2070 			return (EBUSY);
2071 		irqs[i] = rle->start;
2072 	}
2073 
2074 	/* Update control register with 0 count. */
2075 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2076 	    ("%s: MSI still enabled", __func__));
2077 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2078 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2079 	    msi->msi_ctrl, 2);
2080 
2081 	/* Release the messages. */
2082 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2083 	for (i = 0; i < msi->msi_alloc; i++)
2084 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2085 
2086 	/* Update alloc count. */
2087 	msi->msi_alloc = 0;
2088 	msi->msi_addr = 0;
2089 	msi->msi_data = 0;
2090 	return (0);
2091 }
2092 
2093 /*
2094  * Return the max supported MSI messages this device supports.
2095  * Basically, assuming the MD code can alloc messages, this function
2096  * should return the maximum value that pci_alloc_msi() can return.
2097  * Thus, it is subject to the tunables, etc.
2098  */
2099 int
2100 pci_msi_count_method(device_t dev, device_t child)
2101 {
2102 	struct pci_devinfo *dinfo = device_get_ivars(child);
2103 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2104 
2105 	if (pci_do_msi && msi->msi_location != 0)
2106 		return (msi->msi_msgnum);
2107 	return (0);
2108 }
2109 
2110 /* free pcicfgregs structure and all depending data structures */
2111 
2112 int
2113 pci_freecfg(struct pci_devinfo *dinfo)
2114 {
2115 	struct devlist *devlist_head;
2116 	struct pci_map *pm, *next;
2117 	int i;
2118 
2119 	devlist_head = &pci_devq;
2120 
2121 	if (dinfo->cfg.vpd.vpd_reg) {
2122 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2123 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2124 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2125 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2126 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2127 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2128 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2129 	}
2130 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2131 		free(pm, M_DEVBUF);
2132 	}
2133 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2134 	free(dinfo, M_DEVBUF);
2135 
2136 	/* increment the generation count */
2137 	pci_generation++;
2138 
2139 	/* we're losing one device */
2140 	pci_numdevs--;
2141 	return (0);
2142 }
2143 
2144 /*
2145  * PCI power manangement
2146  */
2147 int
2148 pci_set_powerstate_method(device_t dev, device_t child, int state)
2149 {
2150 	struct pci_devinfo *dinfo = device_get_ivars(child);
2151 	pcicfgregs *cfg = &dinfo->cfg;
2152 	uint16_t status;
2153 	int result, oldstate, highest, delay;
2154 
2155 	if (cfg->pp.pp_cap == 0)
2156 		return (EOPNOTSUPP);
2157 
2158 	/*
2159 	 * Optimize a no state change request away.  While it would be OK to
2160 	 * write to the hardware in theory, some devices have shown odd
2161 	 * behavior when going from D3 -> D3.
2162 	 */
2163 	oldstate = pci_get_powerstate(child);
2164 	if (oldstate == state)
2165 		return (0);
2166 
2167 	/*
2168 	 * The PCI power management specification states that after a state
2169 	 * transition between PCI power states, system software must
2170 	 * guarantee a minimal delay before the function accesses the device.
2171 	 * Compute the worst case delay that we need to guarantee before we
2172 	 * access the device.  Many devices will be responsive much more
2173 	 * quickly than this delay, but there are some that don't respond
2174 	 * instantly to state changes.  Transitions to/from D3 state require
2175 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2176 	 * is done below with DELAY rather than a sleeper function because
2177 	 * this function can be called from contexts where we cannot sleep.
2178 	 */
2179 	highest = (oldstate > state) ? oldstate : state;
2180 	if (highest == PCI_POWERSTATE_D3)
2181 	    delay = 10000;
2182 	else if (highest == PCI_POWERSTATE_D2)
2183 	    delay = 200;
2184 	else
2185 	    delay = 0;
2186 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2187 	    & ~PCIM_PSTAT_DMASK;
2188 	result = 0;
2189 	switch (state) {
2190 	case PCI_POWERSTATE_D0:
2191 		status |= PCIM_PSTAT_D0;
2192 		break;
2193 	case PCI_POWERSTATE_D1:
2194 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2195 			return (EOPNOTSUPP);
2196 		status |= PCIM_PSTAT_D1;
2197 		break;
2198 	case PCI_POWERSTATE_D2:
2199 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2200 			return (EOPNOTSUPP);
2201 		status |= PCIM_PSTAT_D2;
2202 		break;
2203 	case PCI_POWERSTATE_D3:
2204 		status |= PCIM_PSTAT_D3;
2205 		break;
2206 	default:
2207 		return (EINVAL);
2208 	}
2209 
2210 	if (bootverbose)
2211 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2212 		    state);
2213 
2214 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2215 	if (delay)
2216 		DELAY(delay);
2217 	return (0);
2218 }
2219 
2220 int
2221 pci_get_powerstate_method(device_t dev, device_t child)
2222 {
2223 	struct pci_devinfo *dinfo = device_get_ivars(child);
2224 	pcicfgregs *cfg = &dinfo->cfg;
2225 	uint16_t status;
2226 	int result;
2227 
2228 	if (cfg->pp.pp_cap != 0) {
2229 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2230 		switch (status & PCIM_PSTAT_DMASK) {
2231 		case PCIM_PSTAT_D0:
2232 			result = PCI_POWERSTATE_D0;
2233 			break;
2234 		case PCIM_PSTAT_D1:
2235 			result = PCI_POWERSTATE_D1;
2236 			break;
2237 		case PCIM_PSTAT_D2:
2238 			result = PCI_POWERSTATE_D2;
2239 			break;
2240 		case PCIM_PSTAT_D3:
2241 			result = PCI_POWERSTATE_D3;
2242 			break;
2243 		default:
2244 			result = PCI_POWERSTATE_UNKNOWN;
2245 			break;
2246 		}
2247 	} else {
2248 		/* No support, device is always at D0 */
2249 		result = PCI_POWERSTATE_D0;
2250 	}
2251 	return (result);
2252 }
2253 
2254 /*
2255  * Some convenience functions for PCI device drivers.
2256  */
2257 
2258 static __inline void
2259 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2260 {
2261 	uint16_t	command;
2262 
2263 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2264 	command |= bit;
2265 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2266 }
2267 
2268 static __inline void
2269 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2270 {
2271 	uint16_t	command;
2272 
2273 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2274 	command &= ~bit;
2275 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2276 }
2277 
2278 int
2279 pci_enable_busmaster_method(device_t dev, device_t child)
2280 {
2281 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2282 	return (0);
2283 }
2284 
2285 int
2286 pci_disable_busmaster_method(device_t dev, device_t child)
2287 {
2288 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2289 	return (0);
2290 }
2291 
2292 int
2293 pci_enable_io_method(device_t dev, device_t child, int space)
2294 {
2295 	uint16_t bit;
2296 
2297 	switch(space) {
2298 	case SYS_RES_IOPORT:
2299 		bit = PCIM_CMD_PORTEN;
2300 		break;
2301 	case SYS_RES_MEMORY:
2302 		bit = PCIM_CMD_MEMEN;
2303 		break;
2304 	default:
2305 		return (EINVAL);
2306 	}
2307 	pci_set_command_bit(dev, child, bit);
2308 	return (0);
2309 }
2310 
2311 int
2312 pci_disable_io_method(device_t dev, device_t child, int space)
2313 {
2314 	uint16_t bit;
2315 
2316 	switch(space) {
2317 	case SYS_RES_IOPORT:
2318 		bit = PCIM_CMD_PORTEN;
2319 		break;
2320 	case SYS_RES_MEMORY:
2321 		bit = PCIM_CMD_MEMEN;
2322 		break;
2323 	default:
2324 		return (EINVAL);
2325 	}
2326 	pci_clear_command_bit(dev, child, bit);
2327 	return (0);
2328 }
2329 
2330 /*
2331  * New style pci driver.  Parent device is either a pci-host-bridge or a
2332  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2333  */
2334 
2335 void
2336 pci_print_verbose(struct pci_devinfo *dinfo)
2337 {
2338 
2339 	if (bootverbose) {
2340 		pcicfgregs *cfg = &dinfo->cfg;
2341 
2342 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2343 		    cfg->vendor, cfg->device, cfg->revid);
2344 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2345 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2346 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2347 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2348 		    cfg->mfdev);
2349 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2350 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2351 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2352 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2353 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2354 		if (cfg->intpin > 0)
2355 			printf("\tintpin=%c, irq=%d\n",
2356 			    cfg->intpin +'a' -1, cfg->intline);
2357 		if (cfg->pp.pp_cap) {
2358 			uint16_t status;
2359 
2360 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2361 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2362 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2363 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2364 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2365 			    status & PCIM_PSTAT_DMASK);
2366 		}
2367 		if (cfg->msi.msi_location) {
2368 			int ctrl;
2369 
2370 			ctrl = cfg->msi.msi_ctrl;
2371 			printf("\tMSI supports %d message%s%s%s\n",
2372 			    cfg->msi.msi_msgnum,
2373 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2374 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2375 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2376 		}
2377 		if (cfg->msix.msix_location) {
2378 			printf("\tMSI-X supports %d message%s ",
2379 			    cfg->msix.msix_msgnum,
2380 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2381 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2382 				printf("in map 0x%x\n",
2383 				    cfg->msix.msix_table_bar);
2384 			else
2385 				printf("in maps 0x%x and 0x%x\n",
2386 				    cfg->msix.msix_table_bar,
2387 				    cfg->msix.msix_pba_bar);
2388 		}
2389 	}
2390 }
2391 
2392 static int
2393 pci_porten(device_t dev)
2394 {
2395 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2396 }
2397 
2398 static int
2399 pci_memen(device_t dev)
2400 {
2401 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2402 }
2403 
2404 static void
2405 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2406 {
2407 	struct pci_devinfo *dinfo;
2408 	pci_addr_t map, testval;
2409 	int ln2range;
2410 	uint16_t cmd;
2411 
2412 	/*
2413 	 * The device ROM BAR is special.  It is always a 32-bit
2414 	 * memory BAR.  Bit 0 is special and should not be set when
2415 	 * sizing the BAR.
2416 	 */
2417 	dinfo = device_get_ivars(dev);
2418 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2419 		map = pci_read_config(dev, reg, 4);
2420 		pci_write_config(dev, reg, 0xfffffffe, 4);
2421 		testval = pci_read_config(dev, reg, 4);
2422 		pci_write_config(dev, reg, map, 4);
2423 		*mapp = map;
2424 		*testvalp = testval;
2425 		return;
2426 	}
2427 
2428 	map = pci_read_config(dev, reg, 4);
2429 	ln2range = pci_maprange(map);
2430 	if (ln2range == 64)
2431 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2432 
2433 	/*
2434 	 * Disable decoding via the command register before
2435 	 * determining the BAR's length since we will be placing it in
2436 	 * a weird state.
2437 	 */
2438 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2439 	pci_write_config(dev, PCIR_COMMAND,
2440 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2441 
2442 	/*
2443 	 * Determine the BAR's length by writing all 1's.  The bottom
2444 	 * log_2(size) bits of the BAR will stick as 0 when we read
2445 	 * the value back.
2446 	 */
2447 	pci_write_config(dev, reg, 0xffffffff, 4);
2448 	testval = pci_read_config(dev, reg, 4);
2449 	if (ln2range == 64) {
2450 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2451 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2452 	}
2453 
2454 	/*
2455 	 * Restore the original value of the BAR.  We may have reprogrammed
2456 	 * the BAR of the low-level console device and when booting verbose,
2457 	 * we need the console device addressable.
2458 	 */
2459 	pci_write_config(dev, reg, map, 4);
2460 	if (ln2range == 64)
2461 		pci_write_config(dev, reg + 4, map >> 32, 4);
2462 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2463 
2464 	*mapp = map;
2465 	*testvalp = testval;
2466 }
2467 
2468 static void
2469 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2470 {
2471 	struct pci_devinfo *dinfo;
2472 	int ln2range;
2473 
2474 	/* The device ROM BAR is always a 32-bit memory BAR. */
2475 	dinfo = device_get_ivars(dev);
2476 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2477 		ln2range = 32;
2478 	else
2479 		ln2range = pci_maprange(pm->pm_value);
2480 	pci_write_config(dev, pm->pm_reg, base, 4);
2481 	if (ln2range == 64)
2482 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2483 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2484 	if (ln2range == 64)
2485 		pm->pm_value |= (pci_addr_t)pci_read_config(dev, pm->pm_reg + 4, 4) << 32;
2486 }
2487 
2488 struct pci_map *
2489 pci_find_bar(device_t dev, int reg)
2490 {
2491 	struct pci_devinfo *dinfo;
2492 	struct pci_map *pm;
2493 
2494 	dinfo = device_get_ivars(dev);
2495 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2496 		if (pm->pm_reg == reg)
2497 			return (pm);
2498 	}
2499 	return (NULL);
2500 }
2501 
2502 int
2503 pci_bar_enabled(device_t dev, struct pci_map *pm)
2504 {
2505 	struct pci_devinfo *dinfo;
2506 	uint16_t cmd;
2507 
2508 	dinfo = device_get_ivars(dev);
2509 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2510 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2511 		return (0);
2512 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2513 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2514 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2515 	else
2516 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2517 }
2518 
2519 static struct pci_map *
2520 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2521 {
2522 	struct pci_devinfo *dinfo;
2523 	struct pci_map *pm, *prev;
2524 
2525 	dinfo = device_get_ivars(dev);
2526 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2527 	pm->pm_reg = reg;
2528 	pm->pm_value = value;
2529 	pm->pm_size = size;
2530 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2531 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2532 		    reg));
2533 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2534 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2535 			break;
2536 	}
2537 	if (prev != NULL)
2538 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2539 	else
2540 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2541 	return (pm);
2542 }
2543 
2544 static void
2545 pci_restore_bars(device_t dev)
2546 {
2547 	struct pci_devinfo *dinfo;
2548 	struct pci_map *pm;
2549 	int ln2range;
2550 
2551 	dinfo = device_get_ivars(dev);
2552 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2553 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2554 			ln2range = 32;
2555 		else
2556 			ln2range = pci_maprange(pm->pm_value);
2557 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2558 		if (ln2range == 64)
2559 			pci_write_config(dev, pm->pm_reg + 4,
2560 			    pm->pm_value >> 32, 4);
2561 	}
2562 }
2563 
2564 /*
2565  * Add a resource based on a pci map register. Return 1 if the map
2566  * register is a 32bit map register or 2 if it is a 64bit register.
2567  */
2568 static int
2569 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2570     int force, int prefetch)
2571 {
2572 	struct pci_map *pm;
2573 	pci_addr_t base, map, testval;
2574 	pci_addr_t start, end, count;
2575 	int barlen, basezero, maprange, mapsize, type;
2576 	uint16_t cmd;
2577 	struct resource *res;
2578 
2579 	/*
2580 	 * The BAR may already exist if the device is a CardBus card
2581 	 * whose CIS is stored in this BAR.
2582 	 */
2583 	pm = pci_find_bar(dev, reg);
2584 	if (pm != NULL) {
2585 		maprange = pci_maprange(pm->pm_value);
2586 		barlen = maprange == 64 ? 2 : 1;
2587 		return (barlen);
2588 	}
2589 
2590 	pci_read_bar(dev, reg, &map, &testval);
2591 	if (PCI_BAR_MEM(map)) {
2592 		type = SYS_RES_MEMORY;
2593 		if (map & PCIM_BAR_MEM_PREFETCH)
2594 			prefetch = 1;
2595 	} else
2596 		type = SYS_RES_IOPORT;
2597 	mapsize = pci_mapsize(testval);
2598 	base = pci_mapbase(map);
2599 #ifdef __PCI_BAR_ZERO_VALID
2600 	basezero = 0;
2601 #else
2602 	basezero = base == 0;
2603 #endif
2604 	maprange = pci_maprange(map);
2605 	barlen = maprange == 64 ? 2 : 1;
2606 
2607 	/*
2608 	 * For I/O registers, if bottom bit is set, and the next bit up
2609 	 * isn't clear, we know we have a BAR that doesn't conform to the
2610 	 * spec, so ignore it.  Also, sanity check the size of the data
2611 	 * areas to the type of memory involved.  Memory must be at least
2612 	 * 16 bytes in size, while I/O ranges must be at least 4.
2613 	 */
2614 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2615 		return (barlen);
2616 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2617 	    (type == SYS_RES_IOPORT && mapsize < 2))
2618 		return (barlen);
2619 
2620 	/* Save a record of this BAR. */
2621 	pm = pci_add_bar(dev, reg, map, mapsize);
2622 	if (bootverbose) {
2623 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2624 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2625 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2626 			printf(", port disabled\n");
2627 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2628 			printf(", memory disabled\n");
2629 		else
2630 			printf(", enabled\n");
2631 	}
2632 
2633 	/*
2634 	 * If base is 0, then we have problems if this architecture does
2635 	 * not allow that.  It is best to ignore such entries for the
2636 	 * moment.  These will be allocated later if the driver specifically
2637 	 * requests them.  However, some removable busses look better when
2638 	 * all resources are allocated, so allow '0' to be overriden.
2639 	 *
2640 	 * Similarly treat maps whose values is the same as the test value
2641 	 * read back.  These maps have had all f's written to them by the
2642 	 * BIOS in an attempt to disable the resources.
2643 	 */
2644 	if (!force && (basezero || map == testval))
2645 		return (barlen);
2646 	if ((u_long)base != base) {
2647 		device_printf(bus,
2648 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2649 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2650 		    pci_get_function(dev), reg);
2651 		return (barlen);
2652 	}
2653 
2654 	/*
2655 	 * This code theoretically does the right thing, but has
2656 	 * undesirable side effects in some cases where peripherals
2657 	 * respond oddly to having these bits enabled.  Let the user
2658 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2659 	 * default).
2660 	 */
2661 	if (pci_enable_io_modes) {
2662 		/* Turn on resources that have been left off by a lazy BIOS */
2663 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2664 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2665 			cmd |= PCIM_CMD_PORTEN;
2666 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2667 		}
2668 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2669 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2670 			cmd |= PCIM_CMD_MEMEN;
2671 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2672 		}
2673 	} else {
2674 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2675 			return (barlen);
2676 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2677 			return (barlen);
2678 	}
2679 
2680 	count = (pci_addr_t)1 << mapsize;
2681 	if (basezero || base == pci_mapbase(testval)) {
2682 		start = 0;	/* Let the parent decide. */
2683 		end = ~0ULL;
2684 	} else {
2685 		start = base;
2686 		end = base + count - 1;
2687 	}
2688 	resource_list_add(rl, type, reg, start, end, count);
2689 
2690 	/*
2691 	 * Try to allocate the resource for this BAR from our parent
2692 	 * so that this resource range is already reserved.  The
2693 	 * driver for this device will later inherit this resource in
2694 	 * pci_alloc_resource().
2695 	 */
2696 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2697 	    prefetch ? RF_PREFETCHABLE : 0);
2698 	if (res == NULL) {
2699 		/*
2700 		 * If the allocation fails, clear the BAR and delete
2701 		 * the resource list entry to force
2702 		 * pci_alloc_resource() to allocate resources from the
2703 		 * parent.
2704 		 */
2705 		resource_list_delete(rl, type, reg);
2706 		start = 0;
2707 	} else
2708 		start = rman_get_start(res);
2709 	pci_write_bar(dev, pm, start);
2710 	return (barlen);
2711 }
2712 
2713 /*
2714  * For ATA devices we need to decide early what addressing mode to use.
2715  * Legacy demands that the primary and secondary ATA ports sits on the
2716  * same addresses that old ISA hardware did. This dictates that we use
2717  * those addresses and ignore the BAR's if we cannot set PCI native
2718  * addressing mode.
2719  */
2720 static void
2721 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2722     uint32_t prefetchmask)
2723 {
2724 	struct resource *r;
2725 	int rid, type, progif;
2726 #if 0
2727 	/* if this device supports PCI native addressing use it */
2728 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2729 	if ((progif & 0x8a) == 0x8a) {
2730 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2731 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2732 			printf("Trying ATA native PCI addressing mode\n");
2733 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2734 		}
2735 	}
2736 #endif
2737 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2738 	type = SYS_RES_IOPORT;
2739 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2740 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2741 		    prefetchmask & (1 << 0));
2742 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2743 		    prefetchmask & (1 << 1));
2744 	} else {
2745 		rid = PCIR_BAR(0);
2746 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2747 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2748 		    0x1f7, 8, 0);
2749 		rid = PCIR_BAR(1);
2750 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2751 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2752 		    0x3f6, 1, 0);
2753 	}
2754 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2755 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2756 		    prefetchmask & (1 << 2));
2757 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2758 		    prefetchmask & (1 << 3));
2759 	} else {
2760 		rid = PCIR_BAR(2);
2761 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2762 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2763 		    0x177, 8, 0);
2764 		rid = PCIR_BAR(3);
2765 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2766 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2767 		    0x376, 1, 0);
2768 	}
2769 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2770 	    prefetchmask & (1 << 4));
2771 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2772 	    prefetchmask & (1 << 5));
2773 }
2774 
2775 static void
2776 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2777 {
2778 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2779 	pcicfgregs *cfg = &dinfo->cfg;
2780 	char tunable_name[64];
2781 	int irq;
2782 
2783 	/* Has to have an intpin to have an interrupt. */
2784 	if (cfg->intpin == 0)
2785 		return;
2786 
2787 	/* Let the user override the IRQ with a tunable. */
2788 	irq = PCI_INVALID_IRQ;
2789 	snprintf(tunable_name, sizeof(tunable_name),
2790 	    "hw.pci%d.%d.%d.INT%c.irq",
2791 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2792 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2793 		irq = PCI_INVALID_IRQ;
2794 
2795 	/*
2796 	 * If we didn't get an IRQ via the tunable, then we either use the
2797 	 * IRQ value in the intline register or we ask the bus to route an
2798 	 * interrupt for us.  If force_route is true, then we only use the
2799 	 * value in the intline register if the bus was unable to assign an
2800 	 * IRQ.
2801 	 */
2802 	if (!PCI_INTERRUPT_VALID(irq)) {
2803 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2804 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2805 		if (!PCI_INTERRUPT_VALID(irq))
2806 			irq = cfg->intline;
2807 	}
2808 
2809 	/* If after all that we don't have an IRQ, just bail. */
2810 	if (!PCI_INTERRUPT_VALID(irq))
2811 		return;
2812 
2813 	/* Update the config register if it changed. */
2814 	if (irq != cfg->intline) {
2815 		cfg->intline = irq;
2816 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2817 	}
2818 
2819 	/* Add this IRQ as rid 0 interrupt resource. */
2820 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2821 }
2822 
2823 /* Perform early OHCI takeover from SMM. */
2824 static void
2825 ohci_early_takeover(device_t self)
2826 {
2827 	struct resource *res;
2828 	uint32_t ctl;
2829 	int rid;
2830 	int i;
2831 
2832 	rid = PCIR_BAR(0);
2833 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2834 	if (res == NULL)
2835 		return;
2836 
2837 	ctl = bus_read_4(res, OHCI_CONTROL);
2838 	if (ctl & OHCI_IR) {
2839 		if (bootverbose)
2840 			printf("ohci early: "
2841 			    "SMM active, request owner change\n");
2842 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2843 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2844 			DELAY(1000);
2845 			ctl = bus_read_4(res, OHCI_CONTROL);
2846 		}
2847 		if (ctl & OHCI_IR) {
2848 			if (bootverbose)
2849 				printf("ohci early: "
2850 				    "SMM does not respond, resetting\n");
2851 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2852 		}
2853 		/* Disable interrupts */
2854 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2855 	}
2856 
2857 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2858 }
2859 
2860 /* Perform early UHCI takeover from SMM. */
2861 static void
2862 uhci_early_takeover(device_t self)
2863 {
2864 	struct resource *res;
2865 	int rid;
2866 
2867 	/*
2868 	 * Set the PIRQD enable bit and switch off all the others. We don't
2869 	 * want legacy support to interfere with us XXX Does this also mean
2870 	 * that the BIOS won't touch the keyboard anymore if it is connected
2871 	 * to the ports of the root hub?
2872 	 */
2873 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2874 
2875 	/* Disable interrupts */
2876 	rid = PCI_UHCI_BASE_REG;
2877 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2878 	if (res != NULL) {
2879 		bus_write_2(res, UHCI_INTR, 0);
2880 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2881 	}
2882 }
2883 
2884 /* Perform early EHCI takeover from SMM. */
2885 static void
2886 ehci_early_takeover(device_t self)
2887 {
2888 	struct resource *res;
2889 	uint32_t cparams;
2890 	uint32_t eec;
2891 	uint8_t eecp;
2892 	uint8_t bios_sem;
2893 	uint8_t offs;
2894 	int rid;
2895 	int i;
2896 
2897 	rid = PCIR_BAR(0);
2898 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2899 	if (res == NULL)
2900 		return;
2901 
2902 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2903 
2904 	/* Synchronise with the BIOS if it owns the controller. */
2905 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2906 	    eecp = EHCI_EECP_NEXT(eec)) {
2907 		eec = pci_read_config(self, eecp, 4);
2908 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2909 			continue;
2910 		}
2911 		bios_sem = pci_read_config(self, eecp +
2912 		    EHCI_LEGSUP_BIOS_SEM, 1);
2913 		if (bios_sem == 0) {
2914 			continue;
2915 		}
2916 		if (bootverbose)
2917 			printf("ehci early: "
2918 			    "SMM active, request owner change\n");
2919 
2920 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2921 
2922 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2923 			DELAY(1000);
2924 			bios_sem = pci_read_config(self, eecp +
2925 			    EHCI_LEGSUP_BIOS_SEM, 1);
2926 		}
2927 
2928 		if (bios_sem != 0) {
2929 			if (bootverbose)
2930 				printf("ehci early: "
2931 				    "SMM does not respond\n");
2932 		}
2933 		/* Disable interrupts */
2934 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2935 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2936 	}
2937 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2938 }
2939 
2940 void
2941 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2942 {
2943 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2944 	pcicfgregs *cfg = &dinfo->cfg;
2945 	struct resource_list *rl = &dinfo->resources;
2946 	struct pci_quirk *q;
2947 	int i;
2948 
2949 	/* ATA devices needs special map treatment */
2950 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2951 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2952 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2953 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2954 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2955 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2956 	else
2957 		for (i = 0; i < cfg->nummaps;)
2958 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2959 			    prefetchmask & (1 << i));
2960 
2961 	/*
2962 	 * Add additional, quirked resources.
2963 	 */
2964 	for (q = &pci_quirks[0]; q->devid; q++) {
2965 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2966 		    && q->type == PCI_QUIRK_MAP_REG)
2967 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2968 	}
2969 
2970 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2971 #ifdef __PCI_REROUTE_INTERRUPT
2972 		/*
2973 		 * Try to re-route interrupts. Sometimes the BIOS or
2974 		 * firmware may leave bogus values in these registers.
2975 		 * If the re-route fails, then just stick with what we
2976 		 * have.
2977 		 */
2978 		pci_assign_interrupt(bus, dev, 1);
2979 #else
2980 		pci_assign_interrupt(bus, dev, 0);
2981 #endif
2982 	}
2983 
2984 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2985 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2986 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2987 			ehci_early_takeover(dev);
2988 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2989 			ohci_early_takeover(dev);
2990 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2991 			uhci_early_takeover(dev);
2992 	}
2993 }
2994 
2995 void
2996 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2997 {
2998 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2999 	device_t pcib = device_get_parent(dev);
3000 	struct pci_devinfo *dinfo;
3001 	int maxslots;
3002 	int s, f, pcifunchigh;
3003 	uint8_t hdrtype;
3004 
3005 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3006 	    ("dinfo_size too small"));
3007 	maxslots = PCIB_MAXSLOTS(pcib);
3008 	for (s = 0; s <= maxslots; s++) {
3009 		pcifunchigh = 0;
3010 		f = 0;
3011 		DELAY(1);
3012 		hdrtype = REG(PCIR_HDRTYPE, 1);
3013 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3014 			continue;
3015 		if (hdrtype & PCIM_MFDEV)
3016 			pcifunchigh = PCI_FUNCMAX;
3017 		for (f = 0; f <= pcifunchigh; f++) {
3018 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3019 			    dinfo_size);
3020 			if (dinfo != NULL) {
3021 				pci_add_child(dev, dinfo);
3022 			}
3023 		}
3024 	}
3025 #undef REG
3026 }
3027 
3028 void
3029 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3030 {
3031 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3032 	device_set_ivars(dinfo->cfg.dev, dinfo);
3033 	resource_list_init(&dinfo->resources);
3034 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3035 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3036 	pci_print_verbose(dinfo);
3037 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3038 }
3039 
3040 static int
3041 pci_probe(device_t dev)
3042 {
3043 
3044 	device_set_desc(dev, "PCI bus");
3045 
3046 	/* Allow other subclasses to override this driver. */
3047 	return (BUS_PROBE_GENERIC);
3048 }
3049 
3050 static int
3051 pci_attach(device_t dev)
3052 {
3053 	int busno, domain;
3054 
3055 	/*
3056 	 * Since there can be multiple independantly numbered PCI
3057 	 * busses on systems with multiple PCI domains, we can't use
3058 	 * the unit number to decide which bus we are probing. We ask
3059 	 * the parent pcib what our domain and bus numbers are.
3060 	 */
3061 	domain = pcib_get_domain(dev);
3062 	busno = pcib_get_bus(dev);
3063 	if (bootverbose)
3064 		device_printf(dev, "domain=%d, physical bus=%d\n",
3065 		    domain, busno);
3066 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3067 	return (bus_generic_attach(dev));
3068 }
3069 
3070 static void
3071 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3072     int state)
3073 {
3074 	device_t child, pcib;
3075 	struct pci_devinfo *dinfo;
3076 	int dstate, i;
3077 
3078 	/*
3079 	 * Set the device to the given state.  If the firmware suggests
3080 	 * a different power state, use it instead.  If power management
3081 	 * is not present, the firmware is responsible for managing
3082 	 * device power.  Skip children who aren't attached since they
3083 	 * are handled separately.
3084 	 */
3085 	pcib = device_get_parent(dev);
3086 	for (i = 0; i < numdevs; i++) {
3087 		child = devlist[i];
3088 		dinfo = device_get_ivars(child);
3089 		dstate = state;
3090 		if (device_is_attached(child) &&
3091 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3092 			pci_set_powerstate(child, dstate);
3093 	}
3094 }
3095 
3096 int
3097 pci_suspend(device_t dev)
3098 {
3099 	device_t child, *devlist;
3100 	struct pci_devinfo *dinfo;
3101 	int error, i, numdevs;
3102 
3103 	/*
3104 	 * Save the PCI configuration space for each child and set the
3105 	 * device in the appropriate power state for this sleep state.
3106 	 */
3107 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3108 		return (error);
3109 	for (i = 0; i < numdevs; i++) {
3110 		child = devlist[i];
3111 		dinfo = device_get_ivars(child);
3112 		pci_cfg_save(child, dinfo, 0);
3113 	}
3114 
3115 	/* Suspend devices before potentially powering them down. */
3116 	error = bus_generic_suspend(dev);
3117 	if (error) {
3118 		free(devlist, M_TEMP);
3119 		return (error);
3120 	}
3121 	if (pci_do_power_suspend)
3122 		pci_set_power_children(dev, devlist, numdevs,
3123 		    PCI_POWERSTATE_D3);
3124 	free(devlist, M_TEMP);
3125 	return (0);
3126 }
3127 
3128 int
3129 pci_resume(device_t dev)
3130 {
3131 	device_t child, *devlist;
3132 	struct pci_devinfo *dinfo;
3133 	int error, i, numdevs;
3134 
3135 	/*
3136 	 * Set each child to D0 and restore its PCI configuration space.
3137 	 */
3138 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3139 		return (error);
3140 	if (pci_do_power_resume)
3141 		pci_set_power_children(dev, devlist, numdevs,
3142 		    PCI_POWERSTATE_D0);
3143 
3144 	/* Now the device is powered up, restore its config space. */
3145 	for (i = 0; i < numdevs; i++) {
3146 		child = devlist[i];
3147 		dinfo = device_get_ivars(child);
3148 
3149 		pci_cfg_restore(child, dinfo);
3150 		if (!device_is_attached(child))
3151 			pci_cfg_save(child, dinfo, 1);
3152 	}
3153 
3154 	/*
3155 	 * Resume critical devices first, then everything else later.
3156 	 */
3157 	for (i = 0; i < numdevs; i++) {
3158 		child = devlist[i];
3159 		switch (pci_get_class(child)) {
3160 		case PCIC_DISPLAY:
3161 		case PCIC_MEMORY:
3162 		case PCIC_BRIDGE:
3163 		case PCIC_BASEPERIPH:
3164 			DEVICE_RESUME(child);
3165 			break;
3166 		}
3167 	}
3168 	for (i = 0; i < numdevs; i++) {
3169 		child = devlist[i];
3170 		switch (pci_get_class(child)) {
3171 		case PCIC_DISPLAY:
3172 		case PCIC_MEMORY:
3173 		case PCIC_BRIDGE:
3174 		case PCIC_BASEPERIPH:
3175 			break;
3176 		default:
3177 			DEVICE_RESUME(child);
3178 		}
3179 	}
3180 	free(devlist, M_TEMP);
3181 	return (0);
3182 }
3183 
3184 static void
3185 pci_load_vendor_data(void)
3186 {
3187 	caddr_t data;
3188 	void *ptr;
3189 	size_t sz;
3190 
3191 	data = preload_search_by_type("pci_vendor_data");
3192 	if (data != NULL) {
3193 		ptr = preload_fetch_addr(data);
3194 		sz = preload_fetch_size(data);
3195 		if (ptr != NULL && sz != 0) {
3196 			pci_vendordata = ptr;
3197 			pci_vendordata_size = sz;
3198 			/* terminate the database */
3199 			pci_vendordata[pci_vendordata_size] = '\n';
3200 		}
3201 	}
3202 }
3203 
3204 void
3205 pci_driver_added(device_t dev, driver_t *driver)
3206 {
3207 	int numdevs;
3208 	device_t *devlist;
3209 	device_t child;
3210 	struct pci_devinfo *dinfo;
3211 	int i;
3212 
3213 	if (bootverbose)
3214 		device_printf(dev, "driver added\n");
3215 	DEVICE_IDENTIFY(driver, dev);
3216 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3217 		return;
3218 	for (i = 0; i < numdevs; i++) {
3219 		child = devlist[i];
3220 		if (device_get_state(child) != DS_NOTPRESENT)
3221 			continue;
3222 		dinfo = device_get_ivars(child);
3223 		pci_print_verbose(dinfo);
3224 		if (bootverbose)
3225 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3226 		pci_cfg_restore(child, dinfo);
3227 		if (device_probe_and_attach(child) != 0)
3228 			pci_cfg_save(child, dinfo, 1);
3229 	}
3230 	free(devlist, M_TEMP);
3231 }
3232 
3233 int
3234 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3235     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3236 {
3237 	struct pci_devinfo *dinfo;
3238 	struct msix_table_entry *mte;
3239 	struct msix_vector *mv;
3240 	uint64_t addr;
3241 	uint32_t data;
3242 	void *cookie;
3243 	int error, rid;
3244 
3245 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3246 	    arg, &cookie);
3247 	if (error)
3248 		return (error);
3249 
3250 	/* If this is not a direct child, just bail out. */
3251 	if (device_get_parent(child) != dev) {
3252 		*cookiep = cookie;
3253 		return(0);
3254 	}
3255 
3256 	rid = rman_get_rid(irq);
3257 	if (rid == 0) {
3258 		/* Make sure that INTx is enabled */
3259 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3260 	} else {
3261 		/*
3262 		 * Check to see if the interrupt is MSI or MSI-X.
3263 		 * Ask our parent to map the MSI and give
3264 		 * us the address and data register values.
3265 		 * If we fail for some reason, teardown the
3266 		 * interrupt handler.
3267 		 */
3268 		dinfo = device_get_ivars(child);
3269 		if (dinfo->cfg.msi.msi_alloc > 0) {
3270 			if (dinfo->cfg.msi.msi_addr == 0) {
3271 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3272 			    ("MSI has handlers, but vectors not mapped"));
3273 				error = PCIB_MAP_MSI(device_get_parent(dev),
3274 				    child, rman_get_start(irq), &addr, &data);
3275 				if (error)
3276 					goto bad;
3277 				dinfo->cfg.msi.msi_addr = addr;
3278 				dinfo->cfg.msi.msi_data = data;
3279 			}
3280 			if (dinfo->cfg.msi.msi_handlers == 0)
3281 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3282 				    dinfo->cfg.msi.msi_data);
3283 			dinfo->cfg.msi.msi_handlers++;
3284 		} else {
3285 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3286 			    ("No MSI or MSI-X interrupts allocated"));
3287 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3288 			    ("MSI-X index too high"));
3289 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3290 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3291 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3292 			KASSERT(mv->mv_irq == rman_get_start(irq),
3293 			    ("IRQ mismatch"));
3294 			if (mv->mv_address == 0) {
3295 				KASSERT(mte->mte_handlers == 0,
3296 		    ("MSI-X table entry has handlers, but vector not mapped"));
3297 				error = PCIB_MAP_MSI(device_get_parent(dev),
3298 				    child, rman_get_start(irq), &addr, &data);
3299 				if (error)
3300 					goto bad;
3301 				mv->mv_address = addr;
3302 				mv->mv_data = data;
3303 			}
3304 			if (mte->mte_handlers == 0) {
3305 				pci_enable_msix(child, rid - 1, mv->mv_address,
3306 				    mv->mv_data);
3307 				pci_unmask_msix(child, rid - 1);
3308 			}
3309 			mte->mte_handlers++;
3310 		}
3311 
3312 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3313 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3314 	bad:
3315 		if (error) {
3316 			(void)bus_generic_teardown_intr(dev, child, irq,
3317 			    cookie);
3318 			return (error);
3319 		}
3320 	}
3321 	*cookiep = cookie;
3322 	return (0);
3323 }
3324 
3325 int
3326 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3327     void *cookie)
3328 {
3329 	struct msix_table_entry *mte;
3330 	struct resource_list_entry *rle;
3331 	struct pci_devinfo *dinfo;
3332 	int error, rid;
3333 
3334 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3335 		return (EINVAL);
3336 
3337 	/* If this isn't a direct child, just bail out */
3338 	if (device_get_parent(child) != dev)
3339 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3340 
3341 	rid = rman_get_rid(irq);
3342 	if (rid == 0) {
3343 		/* Mask INTx */
3344 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3345 	} else {
3346 		/*
3347 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3348 		 * decrement the appropriate handlers count and mask the
3349 		 * MSI-X message, or disable MSI messages if the count
3350 		 * drops to 0.
3351 		 */
3352 		dinfo = device_get_ivars(child);
3353 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3354 		if (rle->res != irq)
3355 			return (EINVAL);
3356 		if (dinfo->cfg.msi.msi_alloc > 0) {
3357 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3358 			    ("MSI-X index too high"));
3359 			if (dinfo->cfg.msi.msi_handlers == 0)
3360 				return (EINVAL);
3361 			dinfo->cfg.msi.msi_handlers--;
3362 			if (dinfo->cfg.msi.msi_handlers == 0)
3363 				pci_disable_msi(child);
3364 		} else {
3365 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3366 			    ("No MSI or MSI-X interrupts allocated"));
3367 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3368 			    ("MSI-X index too high"));
3369 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3370 			if (mte->mte_handlers == 0)
3371 				return (EINVAL);
3372 			mte->mte_handlers--;
3373 			if (mte->mte_handlers == 0)
3374 				pci_mask_msix(child, rid - 1);
3375 		}
3376 	}
3377 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3378 	if (rid > 0)
3379 		KASSERT(error == 0,
3380 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3381 	return (error);
3382 }
3383 
3384 int
3385 pci_print_child(device_t dev, device_t child)
3386 {
3387 	struct pci_devinfo *dinfo;
3388 	struct resource_list *rl;
3389 	int retval = 0;
3390 
3391 	dinfo = device_get_ivars(child);
3392 	rl = &dinfo->resources;
3393 
3394 	retval += bus_print_child_header(dev, child);
3395 
3396 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3397 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3398 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3399 	if (device_get_flags(dev))
3400 		retval += printf(" flags %#x", device_get_flags(dev));
3401 
3402 	retval += printf(" at device %d.%d", pci_get_slot(child),
3403 	    pci_get_function(child));
3404 
3405 	retval += bus_print_child_footer(dev, child);
3406 
3407 	return (retval);
3408 }
3409 
3410 static struct
3411 {
3412 	int	class;
3413 	int	subclass;
3414 	char	*desc;
3415 } pci_nomatch_tab[] = {
3416 	{PCIC_OLD,		-1,			"old"},
3417 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3418 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3419 	{PCIC_STORAGE,		-1,			"mass storage"},
3420 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3421 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3422 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3423 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3424 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3425 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3426 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3427 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3428 	{PCIC_NETWORK,		-1,			"network"},
3429 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3430 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3431 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3432 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3433 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3434 	{PCIC_DISPLAY,		-1,			"display"},
3435 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3436 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3437 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3438 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3439 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3440 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3441 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3442 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3443 	{PCIC_MEMORY,		-1,			"memory"},
3444 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3445 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3446 	{PCIC_BRIDGE,		-1,			"bridge"},
3447 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3448 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3449 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3450 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3451 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3452 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3453 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3454 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3455 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3456 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3457 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3458 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3459 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3460 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3461 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3462 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3463 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3464 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3465 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3466 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3467 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3468 	{PCIC_INPUTDEV,		-1,			"input device"},
3469 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3470 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3471 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3472 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3473 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3474 	{PCIC_DOCKING,		-1,			"docking station"},
3475 	{PCIC_PROCESSOR,	-1,			"processor"},
3476 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3477 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3478 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3479 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3480 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3481 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3482 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3483 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3484 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3485 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3486 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3487 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3488 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3489 	{PCIC_SATCOM,		-1,			"satellite communication"},
3490 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3491 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3492 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3493 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3494 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3495 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3496 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3497 	{PCIC_DASP,		-1,			"dasp"},
3498 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3499 	{0, 0,		NULL}
3500 };
3501 
3502 void
3503 pci_probe_nomatch(device_t dev, device_t child)
3504 {
3505 	int	i;
3506 	char	*cp, *scp, *device;
3507 
3508 	/*
3509 	 * Look for a listing for this device in a loaded device database.
3510 	 */
3511 	if ((device = pci_describe_device(child)) != NULL) {
3512 		device_printf(dev, "<%s>", device);
3513 		free(device, M_DEVBUF);
3514 	} else {
3515 		/*
3516 		 * Scan the class/subclass descriptions for a general
3517 		 * description.
3518 		 */
3519 		cp = "unknown";
3520 		scp = NULL;
3521 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3522 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3523 				if (pci_nomatch_tab[i].subclass == -1) {
3524 					cp = pci_nomatch_tab[i].desc;
3525 				} else if (pci_nomatch_tab[i].subclass ==
3526 				    pci_get_subclass(child)) {
3527 					scp = pci_nomatch_tab[i].desc;
3528 				}
3529 			}
3530 		}
3531 		device_printf(dev, "<%s%s%s>",
3532 		    cp ? cp : "",
3533 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3534 		    scp ? scp : "");
3535 	}
3536 	printf(" at device %d.%d (no driver attached)\n",
3537 	    pci_get_slot(child), pci_get_function(child));
3538 	pci_cfg_save(child, device_get_ivars(child), 1);
3539 	return;
3540 }
3541 
3542 /*
3543  * Parse the PCI device database, if loaded, and return a pointer to a
3544  * description of the device.
3545  *
3546  * The database is flat text formatted as follows:
3547  *
3548  * Any line not in a valid format is ignored.
3549  * Lines are terminated with newline '\n' characters.
3550  *
3551  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3552  * the vendor name.
3553  *
3554  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3555  * - devices cannot be listed without a corresponding VENDOR line.
3556  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3557  * another TAB, then the device name.
3558  */
3559 
3560 /*
3561  * Assuming (ptr) points to the beginning of a line in the database,
3562  * return the vendor or device and description of the next entry.
3563  * The value of (vendor) or (device) inappropriate for the entry type
3564  * is set to -1.  Returns nonzero at the end of the database.
3565  *
3566  * Note that this is slightly unrobust in the face of corrupt data;
3567  * we attempt to safeguard against this by spamming the end of the
3568  * database with a newline when we initialise.
3569  */
3570 static int
3571 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3572 {
3573 	char	*cp = *ptr;
3574 	int	left;
3575 
3576 	*device = -1;
3577 	*vendor = -1;
3578 	**desc = '\0';
3579 	for (;;) {
3580 		left = pci_vendordata_size - (cp - pci_vendordata);
3581 		if (left <= 0) {
3582 			*ptr = cp;
3583 			return(1);
3584 		}
3585 
3586 		/* vendor entry? */
3587 		if (*cp != '\t' &&
3588 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3589 			break;
3590 		/* device entry? */
3591 		if (*cp == '\t' &&
3592 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3593 			break;
3594 
3595 		/* skip to next line */
3596 		while (*cp != '\n' && left > 0) {
3597 			cp++;
3598 			left--;
3599 		}
3600 		if (*cp == '\n') {
3601 			cp++;
3602 			left--;
3603 		}
3604 	}
3605 	/* skip to next line */
3606 	while (*cp != '\n' && left > 0) {
3607 		cp++;
3608 		left--;
3609 	}
3610 	if (*cp == '\n' && left > 0)
3611 		cp++;
3612 	*ptr = cp;
3613 	return(0);
3614 }
3615 
3616 static char *
3617 pci_describe_device(device_t dev)
3618 {
3619 	int	vendor, device;
3620 	char	*desc, *vp, *dp, *line;
3621 
3622 	desc = vp = dp = NULL;
3623 
3624 	/*
3625 	 * If we have no vendor data, we can't do anything.
3626 	 */
3627 	if (pci_vendordata == NULL)
3628 		goto out;
3629 
3630 	/*
3631 	 * Scan the vendor data looking for this device
3632 	 */
3633 	line = pci_vendordata;
3634 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3635 		goto out;
3636 	for (;;) {
3637 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3638 			goto out;
3639 		if (vendor == pci_get_vendor(dev))
3640 			break;
3641 	}
3642 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3643 		goto out;
3644 	for (;;) {
3645 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3646 			*dp = 0;
3647 			break;
3648 		}
3649 		if (vendor != -1) {
3650 			*dp = 0;
3651 			break;
3652 		}
3653 		if (device == pci_get_device(dev))
3654 			break;
3655 	}
3656 	if (dp[0] == '\0')
3657 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3658 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3659 	    NULL)
3660 		sprintf(desc, "%s, %s", vp, dp);
3661  out:
3662 	if (vp != NULL)
3663 		free(vp, M_DEVBUF);
3664 	if (dp != NULL)
3665 		free(dp, M_DEVBUF);
3666 	return(desc);
3667 }
3668 
3669 int
3670 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3671 {
3672 	struct pci_devinfo *dinfo;
3673 	pcicfgregs *cfg;
3674 
3675 	dinfo = device_get_ivars(child);
3676 	cfg = &dinfo->cfg;
3677 
3678 	switch (which) {
3679 	case PCI_IVAR_ETHADDR:
3680 		/*
3681 		 * The generic accessor doesn't deal with failure, so
3682 		 * we set the return value, then return an error.
3683 		 */
3684 		*((uint8_t **) result) = NULL;
3685 		return (EINVAL);
3686 	case PCI_IVAR_SUBVENDOR:
3687 		*result = cfg->subvendor;
3688 		break;
3689 	case PCI_IVAR_SUBDEVICE:
3690 		*result = cfg->subdevice;
3691 		break;
3692 	case PCI_IVAR_VENDOR:
3693 		*result = cfg->vendor;
3694 		break;
3695 	case PCI_IVAR_DEVICE:
3696 		*result = cfg->device;
3697 		break;
3698 	case PCI_IVAR_DEVID:
3699 		*result = (cfg->device << 16) | cfg->vendor;
3700 		break;
3701 	case PCI_IVAR_CLASS:
3702 		*result = cfg->baseclass;
3703 		break;
3704 	case PCI_IVAR_SUBCLASS:
3705 		*result = cfg->subclass;
3706 		break;
3707 	case PCI_IVAR_PROGIF:
3708 		*result = cfg->progif;
3709 		break;
3710 	case PCI_IVAR_REVID:
3711 		*result = cfg->revid;
3712 		break;
3713 	case PCI_IVAR_INTPIN:
3714 		*result = cfg->intpin;
3715 		break;
3716 	case PCI_IVAR_IRQ:
3717 		*result = cfg->intline;
3718 		break;
3719 	case PCI_IVAR_DOMAIN:
3720 		*result = cfg->domain;
3721 		break;
3722 	case PCI_IVAR_BUS:
3723 		*result = cfg->bus;
3724 		break;
3725 	case PCI_IVAR_SLOT:
3726 		*result = cfg->slot;
3727 		break;
3728 	case PCI_IVAR_FUNCTION:
3729 		*result = cfg->func;
3730 		break;
3731 	case PCI_IVAR_CMDREG:
3732 		*result = cfg->cmdreg;
3733 		break;
3734 	case PCI_IVAR_CACHELNSZ:
3735 		*result = cfg->cachelnsz;
3736 		break;
3737 	case PCI_IVAR_MINGNT:
3738 		*result = cfg->mingnt;
3739 		break;
3740 	case PCI_IVAR_MAXLAT:
3741 		*result = cfg->maxlat;
3742 		break;
3743 	case PCI_IVAR_LATTIMER:
3744 		*result = cfg->lattimer;
3745 		break;
3746 	default:
3747 		return (ENOENT);
3748 	}
3749 	return (0);
3750 }
3751 
3752 int
3753 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3754 {
3755 	struct pci_devinfo *dinfo;
3756 
3757 	dinfo = device_get_ivars(child);
3758 
3759 	switch (which) {
3760 	case PCI_IVAR_INTPIN:
3761 		dinfo->cfg.intpin = value;
3762 		return (0);
3763 	case PCI_IVAR_ETHADDR:
3764 	case PCI_IVAR_SUBVENDOR:
3765 	case PCI_IVAR_SUBDEVICE:
3766 	case PCI_IVAR_VENDOR:
3767 	case PCI_IVAR_DEVICE:
3768 	case PCI_IVAR_DEVID:
3769 	case PCI_IVAR_CLASS:
3770 	case PCI_IVAR_SUBCLASS:
3771 	case PCI_IVAR_PROGIF:
3772 	case PCI_IVAR_REVID:
3773 	case PCI_IVAR_IRQ:
3774 	case PCI_IVAR_DOMAIN:
3775 	case PCI_IVAR_BUS:
3776 	case PCI_IVAR_SLOT:
3777 	case PCI_IVAR_FUNCTION:
3778 		return (EINVAL);	/* disallow for now */
3779 
3780 	default:
3781 		return (ENOENT);
3782 	}
3783 }
3784 
3785 
3786 #include "opt_ddb.h"
3787 #ifdef DDB
3788 #include <ddb/ddb.h>
3789 #include <sys/cons.h>
3790 
3791 /*
3792  * List resources based on pci map registers, used for within ddb
3793  */
3794 
3795 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3796 {
3797 	struct pci_devinfo *dinfo;
3798 	struct devlist *devlist_head;
3799 	struct pci_conf *p;
3800 	const char *name;
3801 	int i, error, none_count;
3802 
3803 	none_count = 0;
3804 	/* get the head of the device queue */
3805 	devlist_head = &pci_devq;
3806 
3807 	/*
3808 	 * Go through the list of devices and print out devices
3809 	 */
3810 	for (error = 0, i = 0,
3811 	     dinfo = STAILQ_FIRST(devlist_head);
3812 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3813 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3814 
3815 		/* Populate pd_name and pd_unit */
3816 		name = NULL;
3817 		if (dinfo->cfg.dev)
3818 			name = device_get_name(dinfo->cfg.dev);
3819 
3820 		p = &dinfo->conf;
3821 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3822 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3823 			(name && *name) ? name : "none",
3824 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3825 			none_count++,
3826 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3827 			p->pc_sel.pc_func, (p->pc_class << 16) |
3828 			(p->pc_subclass << 8) | p->pc_progif,
3829 			(p->pc_subdevice << 16) | p->pc_subvendor,
3830 			(p->pc_device << 16) | p->pc_vendor,
3831 			p->pc_revid, p->pc_hdr);
3832 	}
3833 }
3834 #endif /* DDB */
3835 
3836 static struct resource *
3837 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3838     u_long start, u_long end, u_long count, u_int flags)
3839 {
3840 	struct pci_devinfo *dinfo = device_get_ivars(child);
3841 	struct resource_list *rl = &dinfo->resources;
3842 	struct resource_list_entry *rle;
3843 	struct resource *res;
3844 	struct pci_map *pm;
3845 	pci_addr_t map, testval;
3846 	int mapsize;
3847 
3848 	res = NULL;
3849 	pm = pci_find_bar(child, *rid);
3850 	if (pm != NULL) {
3851 		/* This is a BAR that we failed to allocate earlier. */
3852 		mapsize = pm->pm_size;
3853 		map = pm->pm_value;
3854 	} else {
3855 		/*
3856 		 * Weed out the bogons, and figure out how large the
3857 		 * BAR/map is.  BARs that read back 0 here are bogus
3858 		 * and unimplemented.  Note: atapci in legacy mode are
3859 		 * special and handled elsewhere in the code.  If you
3860 		 * have a atapci device in legacy mode and it fails
3861 		 * here, that other code is broken.
3862 		 */
3863 		pci_read_bar(child, *rid, &map, &testval);
3864 
3865 		/*
3866 		 * Determine the size of the BAR and ignore BARs with a size
3867 		 * of 0.  Device ROM BARs use a different mask value.
3868 		 */
3869 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3870 			mapsize = pci_romsize(testval);
3871 		else
3872 			mapsize = pci_mapsize(testval);
3873 		if (mapsize == 0)
3874 			goto out;
3875 		pm = pci_add_bar(child, *rid, map, mapsize);
3876 	}
3877 
3878 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3879 		if (type != SYS_RES_MEMORY) {
3880 			if (bootverbose)
3881 				device_printf(dev,
3882 				    "child %s requested type %d for rid %#x,"
3883 				    " but the BAR says it is an memio\n",
3884 				    device_get_nameunit(child), type, *rid);
3885 			goto out;
3886 		}
3887 	} else {
3888 		if (type != SYS_RES_IOPORT) {
3889 			if (bootverbose)
3890 				device_printf(dev,
3891 				    "child %s requested type %d for rid %#x,"
3892 				    " but the BAR says it is an ioport\n",
3893 				    device_get_nameunit(child), type, *rid);
3894 			goto out;
3895 		}
3896 	}
3897 
3898 	/*
3899 	 * For real BARs, we need to override the size that
3900 	 * the driver requests, because that's what the BAR
3901 	 * actually uses and we would otherwise have a
3902 	 * situation where we might allocate the excess to
3903 	 * another driver, which won't work.
3904 	 */
3905 	count = (pci_addr_t)1 << mapsize;
3906 	if (RF_ALIGNMENT(flags) < mapsize)
3907 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3908 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3909 		flags |= RF_PREFETCHABLE;
3910 
3911 	/*
3912 	 * Allocate enough resource, and then write back the
3913 	 * appropriate BAR for that resource.
3914 	 */
3915 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3916 	    start, end, count, flags & ~RF_ACTIVE);
3917 	if (res == NULL) {
3918 		device_printf(child,
3919 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3920 		    count, *rid, type, start, end);
3921 		goto out;
3922 	}
3923 	resource_list_add(rl, type, *rid, start, end, count);
3924 	rle = resource_list_find(rl, type, *rid);
3925 	if (rle == NULL)
3926 		panic("pci_reserve_map: unexpectedly can't find resource.");
3927 	rle->res = res;
3928 	rle->start = rman_get_start(res);
3929 	rle->end = rman_get_end(res);
3930 	rle->count = count;
3931 	rle->flags = RLE_RESERVED;
3932 	if (bootverbose)
3933 		device_printf(child,
3934 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3935 		    count, *rid, type, rman_get_start(res));
3936 	map = rman_get_start(res);
3937 	pci_write_bar(child, pm, map);
3938 out:;
3939 	return (res);
3940 }
3941 
3942 
3943 struct resource *
3944 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3945 		   u_long start, u_long end, u_long count, u_int flags)
3946 {
3947 	struct pci_devinfo *dinfo = device_get_ivars(child);
3948 	struct resource_list *rl = &dinfo->resources;
3949 	struct resource_list_entry *rle;
3950 	struct resource *res;
3951 	pcicfgregs *cfg = &dinfo->cfg;
3952 
3953 	if (device_get_parent(child) != dev)
3954 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3955 		    type, rid, start, end, count, flags));
3956 
3957 	/*
3958 	 * Perform lazy resource allocation
3959 	 */
3960 	switch (type) {
3961 	case SYS_RES_IRQ:
3962 		/*
3963 		 * Can't alloc legacy interrupt once MSI messages have
3964 		 * been allocated.
3965 		 */
3966 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3967 		    cfg->msix.msix_alloc > 0))
3968 			return (NULL);
3969 
3970 		/*
3971 		 * If the child device doesn't have an interrupt
3972 		 * routed and is deserving of an interrupt, try to
3973 		 * assign it one.
3974 		 */
3975 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3976 		    (cfg->intpin != 0))
3977 			pci_assign_interrupt(dev, child, 0);
3978 		break;
3979 	case SYS_RES_IOPORT:
3980 	case SYS_RES_MEMORY:
3981 #ifdef NEW_PCIB
3982 		/*
3983 		 * PCI-PCI bridge I/O window resources are not BARs.
3984 		 * For those allocations just pass the request up the
3985 		 * tree.
3986 		 */
3987 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
3988 			switch (*rid) {
3989 			case PCIR_IOBASEL_1:
3990 			case PCIR_MEMBASE_1:
3991 			case PCIR_PMBASEL_1:
3992 				/*
3993 				 * XXX: Should we bother creating a resource
3994 				 * list entry?
3995 				 */
3996 				return (bus_generic_alloc_resource(dev, child,
3997 				    type, rid, start, end, count, flags));
3998 			}
3999 		}
4000 #endif
4001 		/* Reserve resources for this BAR if needed. */
4002 		rle = resource_list_find(rl, type, *rid);
4003 		if (rle == NULL) {
4004 			res = pci_reserve_map(dev, child, type, rid, start, end,
4005 			    count, flags);
4006 			if (res == NULL)
4007 				return (NULL);
4008 		}
4009 	}
4010 	return (resource_list_alloc(rl, dev, child, type, rid,
4011 	    start, end, count, flags));
4012 }
4013 
4014 int
4015 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4016     struct resource *r)
4017 {
4018 	struct pci_devinfo *dinfo;
4019 	int error;
4020 
4021 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4022 	if (error)
4023 		return (error);
4024 
4025 	/* Enable decoding in the command register when activating BARs. */
4026 	if (device_get_parent(child) == dev) {
4027 		/* Device ROMs need their decoding explicitly enabled. */
4028 		dinfo = device_get_ivars(child);
4029 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4030 			pci_write_bar(child, pci_find_bar(child, rid),
4031 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4032 		switch (type) {
4033 		case SYS_RES_IOPORT:
4034 		case SYS_RES_MEMORY:
4035 			error = PCI_ENABLE_IO(dev, child, type);
4036 			break;
4037 		}
4038 	}
4039 	return (error);
4040 }
4041 
4042 int
4043 pci_deactivate_resource(device_t dev, device_t child, int type,
4044     int rid, struct resource *r)
4045 {
4046 	struct pci_devinfo *dinfo;
4047 	int error;
4048 
4049 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4050 	if (error)
4051 		return (error);
4052 
4053 	/* Disable decoding for device ROMs. */
4054 	if (device_get_parent(child) == dev) {
4055 		dinfo = device_get_ivars(child);
4056 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4057 			pci_write_bar(child, pci_find_bar(child, rid),
4058 			    rman_get_start(r));
4059 	}
4060 	return (0);
4061 }
4062 
4063 void
4064 pci_delete_child(device_t dev, device_t child)
4065 {
4066 	struct resource_list_entry *rle;
4067 	struct resource_list *rl;
4068 	struct pci_devinfo *dinfo;
4069 
4070 	dinfo = device_get_ivars(child);
4071 	rl = &dinfo->resources;
4072 
4073 	if (device_is_attached(child))
4074 		device_detach(child);
4075 
4076 	/* Turn off access to resources we're about to free */
4077 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4078 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4079 
4080 	/* Free all allocated resources */
4081 	STAILQ_FOREACH(rle, rl, link) {
4082 		if (rle->res) {
4083 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4084 			    resource_list_busy(rl, rle->type, rle->rid)) {
4085 				pci_printf(&dinfo->cfg,
4086 				    "Resource still owned, oops. "
4087 				    "(type=%d, rid=%d, addr=%lx)\n",
4088 				    rle->type, rle->rid,
4089 				    rman_get_start(rle->res));
4090 				bus_release_resource(child, rle->type, rle->rid,
4091 				    rle->res);
4092 			}
4093 			resource_list_unreserve(rl, dev, child, rle->type,
4094 			    rle->rid);
4095 		}
4096 	}
4097 	resource_list_free(rl);
4098 
4099 	device_delete_child(dev, child);
4100 	pci_freecfg(dinfo);
4101 }
4102 
4103 void
4104 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4105 {
4106 	struct pci_devinfo *dinfo;
4107 	struct resource_list *rl;
4108 	struct resource_list_entry *rle;
4109 
4110 	if (device_get_parent(child) != dev)
4111 		return;
4112 
4113 	dinfo = device_get_ivars(child);
4114 	rl = &dinfo->resources;
4115 	rle = resource_list_find(rl, type, rid);
4116 	if (rle == NULL)
4117 		return;
4118 
4119 	if (rle->res) {
4120 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4121 		    resource_list_busy(rl, type, rid)) {
4122 			device_printf(dev, "delete_resource: "
4123 			    "Resource still owned by child, oops. "
4124 			    "(type=%d, rid=%d, addr=%lx)\n",
4125 			    type, rid, rman_get_start(rle->res));
4126 			return;
4127 		}
4128 
4129 #ifndef __PCI_BAR_ZERO_VALID
4130 		/*
4131 		 * If this is a BAR, clear the BAR so it stops
4132 		 * decoding before releasing the resource.
4133 		 */
4134 		switch (type) {
4135 		case SYS_RES_IOPORT:
4136 		case SYS_RES_MEMORY:
4137 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4138 			break;
4139 		}
4140 #endif
4141 		resource_list_unreserve(rl, dev, child, type, rid);
4142 	}
4143 	resource_list_delete(rl, type, rid);
4144 }
4145 
4146 struct resource_list *
4147 pci_get_resource_list (device_t dev, device_t child)
4148 {
4149 	struct pci_devinfo *dinfo = device_get_ivars(child);
4150 
4151 	return (&dinfo->resources);
4152 }
4153 
4154 uint32_t
4155 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4156 {
4157 	struct pci_devinfo *dinfo = device_get_ivars(child);
4158 	pcicfgregs *cfg = &dinfo->cfg;
4159 
4160 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4161 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4162 }
4163 
4164 void
4165 pci_write_config_method(device_t dev, device_t child, int reg,
4166     uint32_t val, int width)
4167 {
4168 	struct pci_devinfo *dinfo = device_get_ivars(child);
4169 	pcicfgregs *cfg = &dinfo->cfg;
4170 
4171 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4172 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4173 }
4174 
4175 int
4176 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4177     size_t buflen)
4178 {
4179 
4180 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4181 	    pci_get_function(child));
4182 	return (0);
4183 }
4184 
4185 int
4186 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4187     size_t buflen)
4188 {
4189 	struct pci_devinfo *dinfo;
4190 	pcicfgregs *cfg;
4191 
4192 	dinfo = device_get_ivars(child);
4193 	cfg = &dinfo->cfg;
4194 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4195 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4196 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4197 	    cfg->progif);
4198 	return (0);
4199 }
4200 
4201 int
4202 pci_assign_interrupt_method(device_t dev, device_t child)
4203 {
4204 	struct pci_devinfo *dinfo = device_get_ivars(child);
4205 	pcicfgregs *cfg = &dinfo->cfg;
4206 
4207 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4208 	    cfg->intpin));
4209 }
4210 
4211 static int
4212 pci_modevent(module_t mod, int what, void *arg)
4213 {
4214 	static struct cdev *pci_cdev;
4215 
4216 	switch (what) {
4217 	case MOD_LOAD:
4218 		STAILQ_INIT(&pci_devq);
4219 		pci_generation = 0;
4220 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4221 		    "pci");
4222 		pci_load_vendor_data();
4223 		break;
4224 
4225 	case MOD_UNLOAD:
4226 		destroy_dev(pci_cdev);
4227 		break;
4228 	}
4229 
4230 	return (0);
4231 }
4232 
4233 void
4234 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4235 {
4236 
4237 	/*
4238 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4239 	 * which we know need special treatment.  Type 2 devices are
4240 	 * cardbus bridges which also require special treatment.
4241 	 * Other types are unknown, and we err on the side of safety
4242 	 * by ignoring them.
4243 	 */
4244 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4245 		return;
4246 
4247 	/*
4248 	 * Restore the device to full power mode.  We must do this
4249 	 * before we restore the registers because moving from D3 to
4250 	 * D0 will cause the chip's BARs and some other registers to
4251 	 * be reset to some unknown power on reset values.  Cut down
4252 	 * the noise on boot by doing nothing if we are already in
4253 	 * state D0.
4254 	 */
4255 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4256 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4257 	pci_restore_bars(dev);
4258 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4259 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4260 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4261 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4262 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4263 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4264 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4265 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4266 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4267 
4268 	/* Restore MSI and MSI-X configurations if they are present. */
4269 	if (dinfo->cfg.msi.msi_location != 0)
4270 		pci_resume_msi(dev);
4271 	if (dinfo->cfg.msix.msix_location != 0)
4272 		pci_resume_msix(dev);
4273 }
4274 
4275 void
4276 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4277 {
4278 	uint32_t cls;
4279 	int ps;
4280 
4281 	/*
4282 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4283 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4284 	 * which also require special treatment.  Other types are unknown, and
4285 	 * we err on the side of safety by ignoring them.  Powering down
4286 	 * bridges should not be undertaken lightly.
4287 	 */
4288 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4289 		return;
4290 
4291 	/*
4292 	 * Some drivers apparently write to these registers w/o updating our
4293 	 * cached copy.  No harm happens if we update the copy, so do so here
4294 	 * so we can restore them.  The COMMAND register is modified by the
4295 	 * bus w/o updating the cache.  This should represent the normally
4296 	 * writable portion of the 'defined' part of type 0 headers.  In
4297 	 * theory we also need to save/restore the PCI capability structures
4298 	 * we know about, but apart from power we don't know any that are
4299 	 * writable.
4300 	 */
4301 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4302 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4303 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4304 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4305 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4306 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4307 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4308 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4309 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4310 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4311 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4312 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4313 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4314 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4315 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4316 
4317 	/*
4318 	 * don't set the state for display devices, base peripherals and
4319 	 * memory devices since bad things happen when they are powered down.
4320 	 * We should (a) have drivers that can easily detach and (b) use
4321 	 * generic drivers for these devices so that some device actually
4322 	 * attaches.  We need to make sure that when we implement (a) we don't
4323 	 * power the device down on a reattach.
4324 	 */
4325 	cls = pci_get_class(dev);
4326 	if (!setstate)
4327 		return;
4328 	switch (pci_do_power_nodriver)
4329 	{
4330 		case 0:		/* NO powerdown at all */
4331 			return;
4332 		case 1:		/* Conservative about what to power down */
4333 			if (cls == PCIC_STORAGE)
4334 				return;
4335 			/*FALLTHROUGH*/
4336 		case 2:		/* Agressive about what to power down */
4337 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4338 			    cls == PCIC_BASEPERIPH)
4339 				return;
4340 			/*FALLTHROUGH*/
4341 		case 3:		/* Power down everything */
4342 			break;
4343 	}
4344 	/*
4345 	 * PCI spec says we can only go into D3 state from D0 state.
4346 	 * Transition from D[12] into D0 before going to D3 state.
4347 	 */
4348 	ps = pci_get_powerstate(dev);
4349 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4350 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4351 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4352 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4353 }
4354