xref: /freebsd/sys/dev/pci/pci.c (revision b7c60aadbbd5c846a250c05791fe7406d6d78bf4)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 static void		pci_load_vendor_data(void);
96 static int		pci_describe_parse_line(char **ptr, int *vendor,
97 			    int *device, char **desc);
98 static char		*pci_describe_device(device_t dev);
99 static int		pci_modevent(module_t mod, int what, void *arg);
100 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101 			    pcicfgregs *cfg);
102 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t *data);
105 #if 0
106 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t data);
108 #endif
109 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110 static void		pci_disable_msi(device_t dev);
111 static void		pci_enable_msi(device_t dev, uint64_t address,
112 			    uint16_t data);
113 static void		pci_enable_msix(device_t dev, u_int index,
114 			    uint64_t address, uint32_t data);
115 static void		pci_mask_msix(device_t dev, u_int index);
116 static void		pci_unmask_msix(device_t dev, u_int index);
117 static int		pci_msi_blacklisted(void);
118 static void		pci_resume_msi(device_t dev);
119 static void		pci_resume_msix(device_t dev);
120 static int		pci_remap_intr_method(device_t bus, device_t dev,
121 			    u_int irq);
122 
123 static device_method_t pci_methods[] = {
124 	/* Device interface */
125 	DEVMETHOD(device_probe,		pci_probe),
126 	DEVMETHOD(device_attach,	pci_attach),
127 	DEVMETHOD(device_detach,	bus_generic_detach),
128 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
129 	DEVMETHOD(device_suspend,	pci_suspend),
130 	DEVMETHOD(device_resume,	pci_resume),
131 
132 	/* Bus interface */
133 	DEVMETHOD(bus_print_child,	pci_print_child),
134 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
135 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
136 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
137 	DEVMETHOD(bus_driver_added,	pci_driver_added),
138 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
139 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
140 
141 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
142 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
143 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
144 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
145 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
146 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
147 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
148 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
149 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
150 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
151 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
152 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
153 
154 	/* PCI interface */
155 	DEVMETHOD(pci_read_config,	pci_read_config_method),
156 	DEVMETHOD(pci_write_config,	pci_write_config_method),
157 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
158 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
159 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
160 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
161 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
162 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
163 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
164 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
165 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
166 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
167 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
168 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
169 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
170 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
171 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
172 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
173 
174 	{ 0, 0 }
175 };
176 
177 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
178 
179 static devclass_t pci_devclass;
180 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
181 MODULE_VERSION(pci, 1);
182 
183 static char	*pci_vendordata;
184 static size_t	pci_vendordata_size;
185 
186 
187 struct pci_quirk {
188 	uint32_t devid;	/* Vendor/device of the card */
189 	int	type;
190 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
191 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
192 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
193 	int	arg1;
194 	int	arg2;
195 };
196 
197 struct pci_quirk pci_quirks[] = {
198 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
199 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
201 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
202 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
203 
204 	/*
205 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
206 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
207 	 */
208 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 
211 	/*
212 	 * MSI doesn't work on earlier Intel chipsets including
213 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
214 	 */
215 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 
223 	/*
224 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
225 	 * bridge.
226 	 */
227 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * Some virtualization environments emulate an older chipset
231 	 * but support MSI just fine.  QEMU uses the Intel 82440.
232 	 */
233 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
234 
235 	{ 0 }
236 };
237 
238 /* map register information */
239 #define	PCI_MAPMEM	0x01	/* memory map */
240 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
241 #define	PCI_MAPPORT	0x04	/* port map */
242 
243 struct devlist pci_devq;
244 uint32_t pci_generation;
245 uint32_t pci_numdevs = 0;
246 static int pcie_chipset, pcix_chipset;
247 
248 /* sysctl vars */
249 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
250 
251 static int pci_enable_io_modes = 1;
252 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
253 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
254     &pci_enable_io_modes, 1,
255     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
256 enable these bits correctly.  We'd like to do this all the time, but there\n\
257 are some peripherals that this causes problems with.");
258 
259 static int pci_do_power_nodriver = 0;
260 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
261 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
262     &pci_do_power_nodriver, 0,
263   "Place a function into D3 state when no driver attaches to it.  0 means\n\
264 disable.  1 means conservatively place devices into D3 state.  2 means\n\
265 agressively place devices into D3 state.  3 means put absolutely everything\n\
266 in D3 state.");
267 
268 int pci_do_power_resume = 1;
269 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
270 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
271     &pci_do_power_resume, 1,
272   "Transition from D3 -> D0 on resume.");
273 
274 int pci_do_power_suspend = 1;
275 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
276 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
277     &pci_do_power_suspend, 1,
278   "Transition from D0 -> D3 on suspend.");
279 
280 static int pci_do_msi = 1;
281 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
282 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
283     "Enable support for MSI interrupts");
284 
285 static int pci_do_msix = 1;
286 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
287 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
288     "Enable support for MSI-X interrupts");
289 
290 static int pci_honor_msi_blacklist = 1;
291 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
292 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
293     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
294 
295 #if defined(__i386__) || defined(__amd64__)
296 static int pci_usb_takeover = 1;
297 #else
298 static int pci_usb_takeover = 0;
299 #endif
300 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
301 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
302     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
303 Disable this if you depend on BIOS emulation of USB devices, that is\n\
304 you use USB devices (like keyboard or mouse) but do not load USB drivers");
305 
306 /* Find a device_t by bus/slot/function in domain 0 */
307 
308 device_t
309 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
310 {
311 
312 	return (pci_find_dbsf(0, bus, slot, func));
313 }
314 
315 /* Find a device_t by domain/bus/slot/function */
316 
317 device_t
318 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
319 {
320 	struct pci_devinfo *dinfo;
321 
322 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
323 		if ((dinfo->cfg.domain == domain) &&
324 		    (dinfo->cfg.bus == bus) &&
325 		    (dinfo->cfg.slot == slot) &&
326 		    (dinfo->cfg.func == func)) {
327 			return (dinfo->cfg.dev);
328 		}
329 	}
330 
331 	return (NULL);
332 }
333 
334 /* Find a device_t by vendor/device ID */
335 
336 device_t
337 pci_find_device(uint16_t vendor, uint16_t device)
338 {
339 	struct pci_devinfo *dinfo;
340 
341 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
342 		if ((dinfo->cfg.vendor == vendor) &&
343 		    (dinfo->cfg.device == device)) {
344 			return (dinfo->cfg.dev);
345 		}
346 	}
347 
348 	return (NULL);
349 }
350 
351 device_t
352 pci_find_class(uint8_t class, uint8_t subclass)
353 {
354 	struct pci_devinfo *dinfo;
355 
356 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
357 		if (dinfo->cfg.baseclass == class &&
358 		    dinfo->cfg.subclass == subclass) {
359 			return (dinfo->cfg.dev);
360 		}
361 	}
362 
363 	return (NULL);
364 }
365 
366 static int
367 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
368 {
369 	va_list ap;
370 	int retval;
371 
372 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
373 	    cfg->func);
374 	va_start(ap, fmt);
375 	retval += vprintf(fmt, ap);
376 	va_end(ap);
377 	return (retval);
378 }
379 
380 /* return base address of memory or port map */
381 
382 static pci_addr_t
383 pci_mapbase(uint64_t mapreg)
384 {
385 
386 	if (PCI_BAR_MEM(mapreg))
387 		return (mapreg & PCIM_BAR_MEM_BASE);
388 	else
389 		return (mapreg & PCIM_BAR_IO_BASE);
390 }
391 
392 /* return map type of memory or port map */
393 
394 static const char *
395 pci_maptype(uint64_t mapreg)
396 {
397 
398 	if (PCI_BAR_IO(mapreg))
399 		return ("I/O Port");
400 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
401 		return ("Prefetchable Memory");
402 	return ("Memory");
403 }
404 
405 /* return log2 of map size decoded for memory or port map */
406 
407 static int
408 pci_mapsize(uint64_t testval)
409 {
410 	int ln2size;
411 
412 	testval = pci_mapbase(testval);
413 	ln2size = 0;
414 	if (testval != 0) {
415 		while ((testval & 1) == 0)
416 		{
417 			ln2size++;
418 			testval >>= 1;
419 		}
420 	}
421 	return (ln2size);
422 }
423 
424 /* return base address of device ROM */
425 
426 static pci_addr_t
427 pci_rombase(uint64_t mapreg)
428 {
429 
430 	return (mapreg & PCIM_BIOS_ADDR_MASK);
431 }
432 
433 /* return log2 of map size decided for device ROM */
434 
435 static int
436 pci_romsize(uint64_t testval)
437 {
438 	int ln2size;
439 
440 	testval = pci_rombase(testval);
441 	ln2size = 0;
442 	if (testval != 0) {
443 		while ((testval & 1) == 0)
444 		{
445 			ln2size++;
446 			testval >>= 1;
447 		}
448 	}
449 	return (ln2size);
450 }
451 
452 /* return log2 of address range supported by map register */
453 
454 static int
455 pci_maprange(uint64_t mapreg)
456 {
457 	int ln2range = 0;
458 
459 	if (PCI_BAR_IO(mapreg))
460 		ln2range = 32;
461 	else
462 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
463 		case PCIM_BAR_MEM_32:
464 			ln2range = 32;
465 			break;
466 		case PCIM_BAR_MEM_1MB:
467 			ln2range = 20;
468 			break;
469 		case PCIM_BAR_MEM_64:
470 			ln2range = 64;
471 			break;
472 		}
473 	return (ln2range);
474 }
475 
476 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
477 
478 static void
479 pci_fixancient(pcicfgregs *cfg)
480 {
481 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
482 		return;
483 
484 	/* PCI to PCI bridges use header type 1 */
485 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
486 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
487 }
488 
489 /* extract header type specific config data */
490 
491 static void
492 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
493 {
494 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
495 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
496 	case PCIM_HDRTYPE_NORMAL:
497 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
498 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
499 		cfg->nummaps	    = PCI_MAXMAPS_0;
500 		break;
501 	case PCIM_HDRTYPE_BRIDGE:
502 		cfg->nummaps	    = PCI_MAXMAPS_1;
503 		break;
504 	case PCIM_HDRTYPE_CARDBUS:
505 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
506 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
507 		cfg->nummaps	    = PCI_MAXMAPS_2;
508 		break;
509 	}
510 #undef REG
511 }
512 
513 /* read configuration header into pcicfgregs structure */
514 struct pci_devinfo *
515 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
516 {
517 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
518 	pcicfgregs *cfg = NULL;
519 	struct pci_devinfo *devlist_entry;
520 	struct devlist *devlist_head;
521 
522 	devlist_head = &pci_devq;
523 
524 	devlist_entry = NULL;
525 
526 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
527 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
528 		if (devlist_entry == NULL)
529 			return (NULL);
530 
531 		cfg = &devlist_entry->cfg;
532 
533 		cfg->domain		= d;
534 		cfg->bus		= b;
535 		cfg->slot		= s;
536 		cfg->func		= f;
537 		cfg->vendor		= REG(PCIR_VENDOR, 2);
538 		cfg->device		= REG(PCIR_DEVICE, 2);
539 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
540 		cfg->statreg		= REG(PCIR_STATUS, 2);
541 		cfg->baseclass		= REG(PCIR_CLASS, 1);
542 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
543 		cfg->progif		= REG(PCIR_PROGIF, 1);
544 		cfg->revid		= REG(PCIR_REVID, 1);
545 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
546 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
547 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
548 		cfg->intpin		= REG(PCIR_INTPIN, 1);
549 		cfg->intline		= REG(PCIR_INTLINE, 1);
550 
551 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
552 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
553 
554 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
555 		cfg->hdrtype		&= ~PCIM_MFDEV;
556 		STAILQ_INIT(&cfg->maps);
557 
558 		pci_fixancient(cfg);
559 		pci_hdrtypedata(pcib, b, s, f, cfg);
560 
561 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
562 			pci_read_cap(pcib, cfg);
563 
564 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
565 
566 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
567 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
568 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
569 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
570 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
571 
572 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
573 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
574 		devlist_entry->conf.pc_vendor = cfg->vendor;
575 		devlist_entry->conf.pc_device = cfg->device;
576 
577 		devlist_entry->conf.pc_class = cfg->baseclass;
578 		devlist_entry->conf.pc_subclass = cfg->subclass;
579 		devlist_entry->conf.pc_progif = cfg->progif;
580 		devlist_entry->conf.pc_revid = cfg->revid;
581 
582 		pci_numdevs++;
583 		pci_generation++;
584 	}
585 	return (devlist_entry);
586 #undef REG
587 }
588 
589 static void
590 pci_read_cap(device_t pcib, pcicfgregs *cfg)
591 {
592 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
593 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
594 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
595 	uint64_t addr;
596 #endif
597 	uint32_t val;
598 	int	ptr, nextptr, ptrptr;
599 
600 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
601 	case PCIM_HDRTYPE_NORMAL:
602 	case PCIM_HDRTYPE_BRIDGE:
603 		ptrptr = PCIR_CAP_PTR;
604 		break;
605 	case PCIM_HDRTYPE_CARDBUS:
606 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
607 		break;
608 	default:
609 		return;		/* no extended capabilities support */
610 	}
611 	nextptr = REG(ptrptr, 1);	/* sanity check? */
612 
613 	/*
614 	 * Read capability entries.
615 	 */
616 	while (nextptr != 0) {
617 		/* Sanity check */
618 		if (nextptr > 255) {
619 			printf("illegal PCI extended capability offset %d\n",
620 			    nextptr);
621 			return;
622 		}
623 		/* Find the next entry */
624 		ptr = nextptr;
625 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
626 
627 		/* Process this entry */
628 		switch (REG(ptr + PCICAP_ID, 1)) {
629 		case PCIY_PMG:		/* PCI power management */
630 			if (cfg->pp.pp_cap == 0) {
631 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
632 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
633 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
634 				if ((nextptr - ptr) > PCIR_POWER_DATA)
635 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
636 			}
637 			break;
638 		case PCIY_HT:		/* HyperTransport */
639 			/* Determine HT-specific capability type. */
640 			val = REG(ptr + PCIR_HT_COMMAND, 2);
641 
642 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
643 				cfg->ht.ht_slave = ptr;
644 
645 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
646 			switch (val & PCIM_HTCMD_CAP_MASK) {
647 			case PCIM_HTCAP_MSI_MAPPING:
648 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
649 					/* Sanity check the mapping window. */
650 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
651 					    4);
652 					addr <<= 32;
653 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
654 					    4);
655 					if (addr != MSI_INTEL_ADDR_BASE)
656 						device_printf(pcib,
657 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
658 						    cfg->domain, cfg->bus,
659 						    cfg->slot, cfg->func,
660 						    (long long)addr);
661 				} else
662 					addr = MSI_INTEL_ADDR_BASE;
663 
664 				cfg->ht.ht_msimap = ptr;
665 				cfg->ht.ht_msictrl = val;
666 				cfg->ht.ht_msiaddr = addr;
667 				break;
668 			}
669 #endif
670 			break;
671 		case PCIY_MSI:		/* PCI MSI */
672 			cfg->msi.msi_location = ptr;
673 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
674 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
675 						     PCIM_MSICTRL_MMC_MASK)>>1);
676 			break;
677 		case PCIY_MSIX:		/* PCI MSI-X */
678 			cfg->msix.msix_location = ptr;
679 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
680 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
681 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
682 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
683 			cfg->msix.msix_table_bar = PCIR_BAR(val &
684 			    PCIM_MSIX_BIR_MASK);
685 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
686 			val = REG(ptr + PCIR_MSIX_PBA, 4);
687 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
688 			    PCIM_MSIX_BIR_MASK);
689 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
690 			break;
691 		case PCIY_VPD:		/* PCI Vital Product Data */
692 			cfg->vpd.vpd_reg = ptr;
693 			break;
694 		case PCIY_SUBVENDOR:
695 			/* Should always be true. */
696 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
697 			    PCIM_HDRTYPE_BRIDGE) {
698 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
699 				cfg->subvendor = val & 0xffff;
700 				cfg->subdevice = val >> 16;
701 			}
702 			break;
703 		case PCIY_PCIX:		/* PCI-X */
704 			/*
705 			 * Assume we have a PCI-X chipset if we have
706 			 * at least one PCI-PCI bridge with a PCI-X
707 			 * capability.  Note that some systems with
708 			 * PCI-express or HT chipsets might match on
709 			 * this check as well.
710 			 */
711 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
712 			    PCIM_HDRTYPE_BRIDGE)
713 				pcix_chipset = 1;
714 			break;
715 		case PCIY_EXPRESS:	/* PCI-express */
716 			/*
717 			 * Assume we have a PCI-express chipset if we have
718 			 * at least one PCI-express device.
719 			 */
720 			pcie_chipset = 1;
721 			break;
722 		default:
723 			break;
724 		}
725 	}
726 
727 
728 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
729 	/*
730 	 * Enable the MSI mapping window for all HyperTransport
731 	 * slaves.  PCI-PCI bridges have their windows enabled via
732 	 * PCIB_MAP_MSI().
733 	 */
734 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
735 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
736 		device_printf(pcib,
737 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
738 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
739 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
740 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
741 		     2);
742 	}
743 #endif
744 /* REG and WREG use carry through to next functions */
745 }
746 
747 /*
748  * PCI Vital Product Data
749  */
750 
751 #define	PCI_VPD_TIMEOUT		1000000
752 
753 static int
754 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
755 {
756 	int count = PCI_VPD_TIMEOUT;
757 
758 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
759 
760 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
761 
762 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
763 		if (--count < 0)
764 			return (ENXIO);
765 		DELAY(1);	/* limit looping */
766 	}
767 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
768 
769 	return (0);
770 }
771 
772 #if 0
773 static int
774 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
775 {
776 	int count = PCI_VPD_TIMEOUT;
777 
778 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
779 
780 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
781 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
782 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
783 		if (--count < 0)
784 			return (ENXIO);
785 		DELAY(1);	/* limit looping */
786 	}
787 
788 	return (0);
789 }
790 #endif
791 
792 #undef PCI_VPD_TIMEOUT
793 
794 struct vpd_readstate {
795 	device_t	pcib;
796 	pcicfgregs	*cfg;
797 	uint32_t	val;
798 	int		bytesinval;
799 	int		off;
800 	uint8_t		cksum;
801 };
802 
803 static int
804 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
805 {
806 	uint32_t reg;
807 	uint8_t byte;
808 
809 	if (vrs->bytesinval == 0) {
810 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
811 			return (ENXIO);
812 		vrs->val = le32toh(reg);
813 		vrs->off += 4;
814 		byte = vrs->val & 0xff;
815 		vrs->bytesinval = 3;
816 	} else {
817 		vrs->val = vrs->val >> 8;
818 		byte = vrs->val & 0xff;
819 		vrs->bytesinval--;
820 	}
821 
822 	vrs->cksum += byte;
823 	*data = byte;
824 	return (0);
825 }
826 
827 static void
828 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
829 {
830 	struct vpd_readstate vrs;
831 	int state;
832 	int name;
833 	int remain;
834 	int i;
835 	int alloc, off;		/* alloc/off for RO/W arrays */
836 	int cksumvalid;
837 	int dflen;
838 	uint8_t byte;
839 	uint8_t byte2;
840 
841 	/* init vpd reader */
842 	vrs.bytesinval = 0;
843 	vrs.off = 0;
844 	vrs.pcib = pcib;
845 	vrs.cfg = cfg;
846 	vrs.cksum = 0;
847 
848 	state = 0;
849 	name = remain = i = 0;	/* shut up stupid gcc */
850 	alloc = off = 0;	/* shut up stupid gcc */
851 	dflen = 0;		/* shut up stupid gcc */
852 	cksumvalid = -1;
853 	while (state >= 0) {
854 		if (vpd_nextbyte(&vrs, &byte)) {
855 			state = -2;
856 			break;
857 		}
858 #if 0
859 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
860 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
861 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
862 #endif
863 		switch (state) {
864 		case 0:		/* item name */
865 			if (byte & 0x80) {
866 				if (vpd_nextbyte(&vrs, &byte2)) {
867 					state = -2;
868 					break;
869 				}
870 				remain = byte2;
871 				if (vpd_nextbyte(&vrs, &byte2)) {
872 					state = -2;
873 					break;
874 				}
875 				remain |= byte2 << 8;
876 				if (remain > (0x7f*4 - vrs.off)) {
877 					state = -1;
878 					printf(
879 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
880 					    cfg->domain, cfg->bus, cfg->slot,
881 					    cfg->func, remain);
882 				}
883 				name = byte & 0x7f;
884 			} else {
885 				remain = byte & 0x7;
886 				name = (byte >> 3) & 0xf;
887 			}
888 			switch (name) {
889 			case 0x2:	/* String */
890 				cfg->vpd.vpd_ident = malloc(remain + 1,
891 				    M_DEVBUF, M_WAITOK);
892 				i = 0;
893 				state = 1;
894 				break;
895 			case 0xf:	/* End */
896 				state = -1;
897 				break;
898 			case 0x10:	/* VPD-R */
899 				alloc = 8;
900 				off = 0;
901 				cfg->vpd.vpd_ros = malloc(alloc *
902 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
903 				    M_WAITOK | M_ZERO);
904 				state = 2;
905 				break;
906 			case 0x11:	/* VPD-W */
907 				alloc = 8;
908 				off = 0;
909 				cfg->vpd.vpd_w = malloc(alloc *
910 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
911 				    M_WAITOK | M_ZERO);
912 				state = 5;
913 				break;
914 			default:	/* Invalid data, abort */
915 				state = -1;
916 				break;
917 			}
918 			break;
919 
920 		case 1:	/* Identifier String */
921 			cfg->vpd.vpd_ident[i++] = byte;
922 			remain--;
923 			if (remain == 0)  {
924 				cfg->vpd.vpd_ident[i] = '\0';
925 				state = 0;
926 			}
927 			break;
928 
929 		case 2:	/* VPD-R Keyword Header */
930 			if (off == alloc) {
931 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
932 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
933 				    M_DEVBUF, M_WAITOK | M_ZERO);
934 			}
935 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
936 			if (vpd_nextbyte(&vrs, &byte2)) {
937 				state = -2;
938 				break;
939 			}
940 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
941 			if (vpd_nextbyte(&vrs, &byte2)) {
942 				state = -2;
943 				break;
944 			}
945 			dflen = byte2;
946 			if (dflen == 0 &&
947 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
948 			    2) == 0) {
949 				/*
950 				 * if this happens, we can't trust the rest
951 				 * of the VPD.
952 				 */
953 				printf(
954 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
955 				    cfg->domain, cfg->bus, cfg->slot,
956 				    cfg->func, dflen);
957 				cksumvalid = 0;
958 				state = -1;
959 				break;
960 			} else if (dflen == 0) {
961 				cfg->vpd.vpd_ros[off].value = malloc(1 *
962 				    sizeof(*cfg->vpd.vpd_ros[off].value),
963 				    M_DEVBUF, M_WAITOK);
964 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
965 			} else
966 				cfg->vpd.vpd_ros[off].value = malloc(
967 				    (dflen + 1) *
968 				    sizeof(*cfg->vpd.vpd_ros[off].value),
969 				    M_DEVBUF, M_WAITOK);
970 			remain -= 3;
971 			i = 0;
972 			/* keep in sync w/ state 3's transistions */
973 			if (dflen == 0 && remain == 0)
974 				state = 0;
975 			else if (dflen == 0)
976 				state = 2;
977 			else
978 				state = 3;
979 			break;
980 
981 		case 3:	/* VPD-R Keyword Value */
982 			cfg->vpd.vpd_ros[off].value[i++] = byte;
983 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
984 			    "RV", 2) == 0 && cksumvalid == -1) {
985 				if (vrs.cksum == 0)
986 					cksumvalid = 1;
987 				else {
988 					if (bootverbose)
989 						printf(
990 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
991 						    cfg->domain, cfg->bus,
992 						    cfg->slot, cfg->func,
993 						    vrs.cksum);
994 					cksumvalid = 0;
995 					state = -1;
996 					break;
997 				}
998 			}
999 			dflen--;
1000 			remain--;
1001 			/* keep in sync w/ state 2's transistions */
1002 			if (dflen == 0)
1003 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1004 			if (dflen == 0 && remain == 0) {
1005 				cfg->vpd.vpd_rocnt = off;
1006 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1007 				    off * sizeof(*cfg->vpd.vpd_ros),
1008 				    M_DEVBUF, M_WAITOK | M_ZERO);
1009 				state = 0;
1010 			} else if (dflen == 0)
1011 				state = 2;
1012 			break;
1013 
1014 		case 4:
1015 			remain--;
1016 			if (remain == 0)
1017 				state = 0;
1018 			break;
1019 
1020 		case 5:	/* VPD-W Keyword Header */
1021 			if (off == alloc) {
1022 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1023 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1024 				    M_DEVBUF, M_WAITOK | M_ZERO);
1025 			}
1026 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1027 			if (vpd_nextbyte(&vrs, &byte2)) {
1028 				state = -2;
1029 				break;
1030 			}
1031 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1032 			if (vpd_nextbyte(&vrs, &byte2)) {
1033 				state = -2;
1034 				break;
1035 			}
1036 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1037 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1038 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1039 			    sizeof(*cfg->vpd.vpd_w[off].value),
1040 			    M_DEVBUF, M_WAITOK);
1041 			remain -= 3;
1042 			i = 0;
1043 			/* keep in sync w/ state 6's transistions */
1044 			if (dflen == 0 && remain == 0)
1045 				state = 0;
1046 			else if (dflen == 0)
1047 				state = 5;
1048 			else
1049 				state = 6;
1050 			break;
1051 
1052 		case 6:	/* VPD-W Keyword Value */
1053 			cfg->vpd.vpd_w[off].value[i++] = byte;
1054 			dflen--;
1055 			remain--;
1056 			/* keep in sync w/ state 5's transistions */
1057 			if (dflen == 0)
1058 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1059 			if (dflen == 0 && remain == 0) {
1060 				cfg->vpd.vpd_wcnt = off;
1061 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1062 				    off * sizeof(*cfg->vpd.vpd_w),
1063 				    M_DEVBUF, M_WAITOK | M_ZERO);
1064 				state = 0;
1065 			} else if (dflen == 0)
1066 				state = 5;
1067 			break;
1068 
1069 		default:
1070 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1071 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1072 			    state);
1073 			state = -1;
1074 			break;
1075 		}
1076 	}
1077 
1078 	if (cksumvalid == 0 || state < -1) {
1079 		/* read-only data bad, clean up */
1080 		if (cfg->vpd.vpd_ros != NULL) {
1081 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1082 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1083 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1084 			cfg->vpd.vpd_ros = NULL;
1085 		}
1086 	}
1087 	if (state < -1) {
1088 		/* I/O error, clean up */
1089 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1090 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1091 		if (cfg->vpd.vpd_ident != NULL) {
1092 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1093 			cfg->vpd.vpd_ident = NULL;
1094 		}
1095 		if (cfg->vpd.vpd_w != NULL) {
1096 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1097 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1098 			free(cfg->vpd.vpd_w, M_DEVBUF);
1099 			cfg->vpd.vpd_w = NULL;
1100 		}
1101 	}
1102 	cfg->vpd.vpd_cached = 1;
1103 #undef REG
1104 #undef WREG
1105 }
1106 
1107 int
1108 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1109 {
1110 	struct pci_devinfo *dinfo = device_get_ivars(child);
1111 	pcicfgregs *cfg = &dinfo->cfg;
1112 
1113 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1114 		pci_read_vpd(device_get_parent(dev), cfg);
1115 
1116 	*identptr = cfg->vpd.vpd_ident;
1117 
1118 	if (*identptr == NULL)
1119 		return (ENXIO);
1120 
1121 	return (0);
1122 }
1123 
1124 int
1125 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1126 	const char **vptr)
1127 {
1128 	struct pci_devinfo *dinfo = device_get_ivars(child);
1129 	pcicfgregs *cfg = &dinfo->cfg;
1130 	int i;
1131 
1132 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1133 		pci_read_vpd(device_get_parent(dev), cfg);
1134 
1135 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1136 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1137 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1138 			*vptr = cfg->vpd.vpd_ros[i].value;
1139 			return (0);
1140 		}
1141 
1142 	*vptr = NULL;
1143 	return (ENXIO);
1144 }
1145 
1146 /*
1147  * Find the requested extended capability and return the offset in
1148  * configuration space via the pointer provided. The function returns
1149  * 0 on success and error code otherwise.
1150  */
1151 int
1152 pci_find_extcap_method(device_t dev, device_t child, int capability,
1153     int *capreg)
1154 {
1155 	struct pci_devinfo *dinfo = device_get_ivars(child);
1156 	pcicfgregs *cfg = &dinfo->cfg;
1157 	u_int32_t status;
1158 	u_int8_t ptr;
1159 
1160 	/*
1161 	 * Check the CAP_LIST bit of the PCI status register first.
1162 	 */
1163 	status = pci_read_config(child, PCIR_STATUS, 2);
1164 	if (!(status & PCIM_STATUS_CAPPRESENT))
1165 		return (ENXIO);
1166 
1167 	/*
1168 	 * Determine the start pointer of the capabilities list.
1169 	 */
1170 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1171 	case PCIM_HDRTYPE_NORMAL:
1172 	case PCIM_HDRTYPE_BRIDGE:
1173 		ptr = PCIR_CAP_PTR;
1174 		break;
1175 	case PCIM_HDRTYPE_CARDBUS:
1176 		ptr = PCIR_CAP_PTR_2;
1177 		break;
1178 	default:
1179 		/* XXX: panic? */
1180 		return (ENXIO);		/* no extended capabilities support */
1181 	}
1182 	ptr = pci_read_config(child, ptr, 1);
1183 
1184 	/*
1185 	 * Traverse the capabilities list.
1186 	 */
1187 	while (ptr != 0) {
1188 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1189 			if (capreg != NULL)
1190 				*capreg = ptr;
1191 			return (0);
1192 		}
1193 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1194 	}
1195 
1196 	return (ENOENT);
1197 }
1198 
1199 /*
1200  * Support for MSI-X message interrupts.
1201  */
1202 void
1203 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1204 {
1205 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1206 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1207 	uint32_t offset;
1208 
1209 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1210 	offset = msix->msix_table_offset + index * 16;
1211 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1212 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1213 	bus_write_4(msix->msix_table_res, offset + 8, data);
1214 
1215 	/* Enable MSI -> HT mapping. */
1216 	pci_ht_map_msi(dev, address);
1217 }
1218 
1219 void
1220 pci_mask_msix(device_t dev, u_int index)
1221 {
1222 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1223 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1224 	uint32_t offset, val;
1225 
1226 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1227 	offset = msix->msix_table_offset + index * 16 + 12;
1228 	val = bus_read_4(msix->msix_table_res, offset);
1229 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1230 		val |= PCIM_MSIX_VCTRL_MASK;
1231 		bus_write_4(msix->msix_table_res, offset, val);
1232 	}
1233 }
1234 
1235 void
1236 pci_unmask_msix(device_t dev, u_int index)
1237 {
1238 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1239 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1240 	uint32_t offset, val;
1241 
1242 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1243 	offset = msix->msix_table_offset + index * 16 + 12;
1244 	val = bus_read_4(msix->msix_table_res, offset);
1245 	if (val & PCIM_MSIX_VCTRL_MASK) {
1246 		val &= ~PCIM_MSIX_VCTRL_MASK;
1247 		bus_write_4(msix->msix_table_res, offset, val);
1248 	}
1249 }
1250 
1251 int
1252 pci_pending_msix(device_t dev, u_int index)
1253 {
1254 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1255 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1256 	uint32_t offset, bit;
1257 
1258 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1259 	offset = msix->msix_pba_offset + (index / 32) * 4;
1260 	bit = 1 << index % 32;
1261 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1262 }
1263 
1264 /*
1265  * Restore MSI-X registers and table during resume.  If MSI-X is
1266  * enabled then walk the virtual table to restore the actual MSI-X
1267  * table.
1268  */
1269 static void
1270 pci_resume_msix(device_t dev)
1271 {
1272 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1273 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1274 	struct msix_table_entry *mte;
1275 	struct msix_vector *mv;
1276 	int i;
1277 
1278 	if (msix->msix_alloc > 0) {
1279 		/* First, mask all vectors. */
1280 		for (i = 0; i < msix->msix_msgnum; i++)
1281 			pci_mask_msix(dev, i);
1282 
1283 		/* Second, program any messages with at least one handler. */
1284 		for (i = 0; i < msix->msix_table_len; i++) {
1285 			mte = &msix->msix_table[i];
1286 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1287 				continue;
1288 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1289 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1290 			pci_unmask_msix(dev, i);
1291 		}
1292 	}
1293 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1294 	    msix->msix_ctrl, 2);
1295 }
1296 
1297 /*
1298  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1299  * returned in *count.  After this function returns, each message will be
1300  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1301  */
1302 int
1303 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1304 {
1305 	struct pci_devinfo *dinfo = device_get_ivars(child);
1306 	pcicfgregs *cfg = &dinfo->cfg;
1307 	struct resource_list_entry *rle;
1308 	int actual, error, i, irq, max;
1309 
1310 	/* Don't let count == 0 get us into trouble. */
1311 	if (*count == 0)
1312 		return (EINVAL);
1313 
1314 	/* If rid 0 is allocated, then fail. */
1315 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1316 	if (rle != NULL && rle->res != NULL)
1317 		return (ENXIO);
1318 
1319 	/* Already have allocated messages? */
1320 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1321 		return (ENXIO);
1322 
1323 	/* If MSI is blacklisted for this system, fail. */
1324 	if (pci_msi_blacklisted())
1325 		return (ENXIO);
1326 
1327 	/* MSI-X capability present? */
1328 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1329 		return (ENODEV);
1330 
1331 	/* Make sure the appropriate BARs are mapped. */
1332 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1333 	    cfg->msix.msix_table_bar);
1334 	if (rle == NULL || rle->res == NULL ||
1335 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1336 		return (ENXIO);
1337 	cfg->msix.msix_table_res = rle->res;
1338 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1339 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1340 		    cfg->msix.msix_pba_bar);
1341 		if (rle == NULL || rle->res == NULL ||
1342 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1343 			return (ENXIO);
1344 	}
1345 	cfg->msix.msix_pba_res = rle->res;
1346 
1347 	if (bootverbose)
1348 		device_printf(child,
1349 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1350 		    *count, cfg->msix.msix_msgnum);
1351 	max = min(*count, cfg->msix.msix_msgnum);
1352 	for (i = 0; i < max; i++) {
1353 		/* Allocate a message. */
1354 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1355 		if (error) {
1356 			if (i == 0)
1357 				return (error);
1358 			break;
1359 		}
1360 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1361 		    irq, 1);
1362 	}
1363 	actual = i;
1364 
1365 	if (bootverbose) {
1366 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1367 		if (actual == 1)
1368 			device_printf(child, "using IRQ %lu for MSI-X\n",
1369 			    rle->start);
1370 		else {
1371 			int run;
1372 
1373 			/*
1374 			 * Be fancy and try to print contiguous runs of
1375 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1376 			 * 'run' is true if we are in a range.
1377 			 */
1378 			device_printf(child, "using IRQs %lu", rle->start);
1379 			irq = rle->start;
1380 			run = 0;
1381 			for (i = 1; i < actual; i++) {
1382 				rle = resource_list_find(&dinfo->resources,
1383 				    SYS_RES_IRQ, i + 1);
1384 
1385 				/* Still in a run? */
1386 				if (rle->start == irq + 1) {
1387 					run = 1;
1388 					irq++;
1389 					continue;
1390 				}
1391 
1392 				/* Finish previous range. */
1393 				if (run) {
1394 					printf("-%d", irq);
1395 					run = 0;
1396 				}
1397 
1398 				/* Start new range. */
1399 				printf(",%lu", rle->start);
1400 				irq = rle->start;
1401 			}
1402 
1403 			/* Unfinished range? */
1404 			if (run)
1405 				printf("-%d", irq);
1406 			printf(" for MSI-X\n");
1407 		}
1408 	}
1409 
1410 	/* Mask all vectors. */
1411 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1412 		pci_mask_msix(child, i);
1413 
1414 	/* Allocate and initialize vector data and virtual table. */
1415 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1416 	    M_DEVBUF, M_WAITOK | M_ZERO);
1417 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1418 	    M_DEVBUF, M_WAITOK | M_ZERO);
1419 	for (i = 0; i < actual; i++) {
1420 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1421 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1422 		cfg->msix.msix_table[i].mte_vector = i + 1;
1423 	}
1424 
1425 	/* Update control register to enable MSI-X. */
1426 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1427 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1428 	    cfg->msix.msix_ctrl, 2);
1429 
1430 	/* Update counts of alloc'd messages. */
1431 	cfg->msix.msix_alloc = actual;
1432 	cfg->msix.msix_table_len = actual;
1433 	*count = actual;
1434 	return (0);
1435 }
1436 
1437 /*
1438  * By default, pci_alloc_msix() will assign the allocated IRQ
1439  * resources consecutively to the first N messages in the MSI-X table.
1440  * However, device drivers may want to use different layouts if they
1441  * either receive fewer messages than they asked for, or they wish to
1442  * populate the MSI-X table sparsely.  This method allows the driver
1443  * to specify what layout it wants.  It must be called after a
1444  * successful pci_alloc_msix() but before any of the associated
1445  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1446  *
1447  * The 'vectors' array contains 'count' message vectors.  The array
1448  * maps directly to the MSI-X table in that index 0 in the array
1449  * specifies the vector for the first message in the MSI-X table, etc.
1450  * The vector value in each array index can either be 0 to indicate
1451  * that no vector should be assigned to a message slot, or it can be a
1452  * number from 1 to N (where N is the count returned from a
1453  * succcessful call to pci_alloc_msix()) to indicate which message
1454  * vector (IRQ) to be used for the corresponding message.
1455  *
1456  * On successful return, each message with a non-zero vector will have
1457  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1458  * 1.  Additionally, if any of the IRQs allocated via the previous
1459  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1460  * will be freed back to the system automatically.
1461  *
1462  * For example, suppose a driver has a MSI-X table with 6 messages and
1463  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1464  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1465  * C.  After the call to pci_alloc_msix(), the device will be setup to
1466  * have an MSI-X table of ABC--- (where - means no vector assigned).
1467  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1468  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1469  * be freed back to the system.  This device will also have valid
1470  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1471  *
1472  * In any case, the SYS_RES_IRQ rid X will always map to the message
1473  * at MSI-X table index X - 1 and will only be valid if a vector is
1474  * assigned to that table entry.
1475  */
1476 int
1477 pci_remap_msix_method(device_t dev, device_t child, int count,
1478     const u_int *vectors)
1479 {
1480 	struct pci_devinfo *dinfo = device_get_ivars(child);
1481 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1482 	struct resource_list_entry *rle;
1483 	int i, irq, j, *used;
1484 
1485 	/*
1486 	 * Have to have at least one message in the table but the
1487 	 * table can't be bigger than the actual MSI-X table in the
1488 	 * device.
1489 	 */
1490 	if (count == 0 || count > msix->msix_msgnum)
1491 		return (EINVAL);
1492 
1493 	/* Sanity check the vectors. */
1494 	for (i = 0; i < count; i++)
1495 		if (vectors[i] > msix->msix_alloc)
1496 			return (EINVAL);
1497 
1498 	/*
1499 	 * Make sure there aren't any holes in the vectors to be used.
1500 	 * It's a big pain to support it, and it doesn't really make
1501 	 * sense anyway.  Also, at least one vector must be used.
1502 	 */
1503 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1504 	    M_ZERO);
1505 	for (i = 0; i < count; i++)
1506 		if (vectors[i] != 0)
1507 			used[vectors[i] - 1] = 1;
1508 	for (i = 0; i < msix->msix_alloc - 1; i++)
1509 		if (used[i] == 0 && used[i + 1] == 1) {
1510 			free(used, M_DEVBUF);
1511 			return (EINVAL);
1512 		}
1513 	if (used[0] != 1) {
1514 		free(used, M_DEVBUF);
1515 		return (EINVAL);
1516 	}
1517 
1518 	/* Make sure none of the resources are allocated. */
1519 	for (i = 0; i < msix->msix_table_len; i++) {
1520 		if (msix->msix_table[i].mte_vector == 0)
1521 			continue;
1522 		if (msix->msix_table[i].mte_handlers > 0)
1523 			return (EBUSY);
1524 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1525 		KASSERT(rle != NULL, ("missing resource"));
1526 		if (rle->res != NULL)
1527 			return (EBUSY);
1528 	}
1529 
1530 	/* Free the existing resource list entries. */
1531 	for (i = 0; i < msix->msix_table_len; i++) {
1532 		if (msix->msix_table[i].mte_vector == 0)
1533 			continue;
1534 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1535 	}
1536 
1537 	/*
1538 	 * Build the new virtual table keeping track of which vectors are
1539 	 * used.
1540 	 */
1541 	free(msix->msix_table, M_DEVBUF);
1542 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1543 	    M_DEVBUF, M_WAITOK | M_ZERO);
1544 	for (i = 0; i < count; i++)
1545 		msix->msix_table[i].mte_vector = vectors[i];
1546 	msix->msix_table_len = count;
1547 
1548 	/* Free any unused IRQs and resize the vectors array if necessary. */
1549 	j = msix->msix_alloc - 1;
1550 	if (used[j] == 0) {
1551 		struct msix_vector *vec;
1552 
1553 		while (used[j] == 0) {
1554 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1555 			    msix->msix_vectors[j].mv_irq);
1556 			j--;
1557 		}
1558 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1559 		    M_WAITOK);
1560 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1561 		    (j + 1));
1562 		free(msix->msix_vectors, M_DEVBUF);
1563 		msix->msix_vectors = vec;
1564 		msix->msix_alloc = j + 1;
1565 	}
1566 	free(used, M_DEVBUF);
1567 
1568 	/* Map the IRQs onto the rids. */
1569 	for (i = 0; i < count; i++) {
1570 		if (vectors[i] == 0)
1571 			continue;
1572 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1573 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1574 		    irq, 1);
1575 	}
1576 
1577 	if (bootverbose) {
1578 		device_printf(child, "Remapped MSI-X IRQs as: ");
1579 		for (i = 0; i < count; i++) {
1580 			if (i != 0)
1581 				printf(", ");
1582 			if (vectors[i] == 0)
1583 				printf("---");
1584 			else
1585 				printf("%d",
1586 				    msix->msix_vectors[vectors[i]].mv_irq);
1587 		}
1588 		printf("\n");
1589 	}
1590 
1591 	return (0);
1592 }
1593 
1594 static int
1595 pci_release_msix(device_t dev, device_t child)
1596 {
1597 	struct pci_devinfo *dinfo = device_get_ivars(child);
1598 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1599 	struct resource_list_entry *rle;
1600 	int i;
1601 
1602 	/* Do we have any messages to release? */
1603 	if (msix->msix_alloc == 0)
1604 		return (ENODEV);
1605 
1606 	/* Make sure none of the resources are allocated. */
1607 	for (i = 0; i < msix->msix_table_len; i++) {
1608 		if (msix->msix_table[i].mte_vector == 0)
1609 			continue;
1610 		if (msix->msix_table[i].mte_handlers > 0)
1611 			return (EBUSY);
1612 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1613 		KASSERT(rle != NULL, ("missing resource"));
1614 		if (rle->res != NULL)
1615 			return (EBUSY);
1616 	}
1617 
1618 	/* Update control register to disable MSI-X. */
1619 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1620 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1621 	    msix->msix_ctrl, 2);
1622 
1623 	/* Free the resource list entries. */
1624 	for (i = 0; i < msix->msix_table_len; i++) {
1625 		if (msix->msix_table[i].mte_vector == 0)
1626 			continue;
1627 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1628 	}
1629 	free(msix->msix_table, M_DEVBUF);
1630 	msix->msix_table_len = 0;
1631 
1632 	/* Release the IRQs. */
1633 	for (i = 0; i < msix->msix_alloc; i++)
1634 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1635 		    msix->msix_vectors[i].mv_irq);
1636 	free(msix->msix_vectors, M_DEVBUF);
1637 	msix->msix_alloc = 0;
1638 	return (0);
1639 }
1640 
1641 /*
1642  * Return the max supported MSI-X messages this device supports.
1643  * Basically, assuming the MD code can alloc messages, this function
1644  * should return the maximum value that pci_alloc_msix() can return.
1645  * Thus, it is subject to the tunables, etc.
1646  */
1647 int
1648 pci_msix_count_method(device_t dev, device_t child)
1649 {
1650 	struct pci_devinfo *dinfo = device_get_ivars(child);
1651 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1652 
1653 	if (pci_do_msix && msix->msix_location != 0)
1654 		return (msix->msix_msgnum);
1655 	return (0);
1656 }
1657 
1658 /*
1659  * HyperTransport MSI mapping control
1660  */
1661 void
1662 pci_ht_map_msi(device_t dev, uint64_t addr)
1663 {
1664 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1665 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1666 
1667 	if (!ht->ht_msimap)
1668 		return;
1669 
1670 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1671 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1672 		/* Enable MSI -> HT mapping. */
1673 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1674 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1675 		    ht->ht_msictrl, 2);
1676 	}
1677 
1678 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1679 		/* Disable MSI -> HT mapping. */
1680 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1681 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1682 		    ht->ht_msictrl, 2);
1683 	}
1684 }
1685 
1686 int
1687 pci_get_max_read_req(device_t dev)
1688 {
1689 	int cap;
1690 	uint16_t val;
1691 
1692 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1693 		return (0);
1694 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1695 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1696 	val >>= 12;
1697 	return (1 << (val + 7));
1698 }
1699 
1700 int
1701 pci_set_max_read_req(device_t dev, int size)
1702 {
1703 	int cap;
1704 	uint16_t val;
1705 
1706 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1707 		return (0);
1708 	if (size < 128)
1709 		size = 128;
1710 	if (size > 4096)
1711 		size = 4096;
1712 	size = (1 << (fls(size) - 1));
1713 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1714 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1715 	val |= (fls(size) - 8) << 12;
1716 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1717 	return (size);
1718 }
1719 
1720 /*
1721  * Support for MSI message signalled interrupts.
1722  */
1723 void
1724 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1725 {
1726 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1727 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1728 
1729 	/* Write data and address values. */
1730 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1731 	    address & 0xffffffff, 4);
1732 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1733 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1734 		    address >> 32, 4);
1735 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1736 		    data, 2);
1737 	} else
1738 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1739 		    2);
1740 
1741 	/* Enable MSI in the control register. */
1742 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1743 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1744 	    2);
1745 
1746 	/* Enable MSI -> HT mapping. */
1747 	pci_ht_map_msi(dev, address);
1748 }
1749 
1750 void
1751 pci_disable_msi(device_t dev)
1752 {
1753 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1754 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1755 
1756 	/* Disable MSI -> HT mapping. */
1757 	pci_ht_map_msi(dev, 0);
1758 
1759 	/* Disable MSI in the control register. */
1760 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1761 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1762 	    2);
1763 }
1764 
1765 /*
1766  * Restore MSI registers during resume.  If MSI is enabled then
1767  * restore the data and address registers in addition to the control
1768  * register.
1769  */
1770 static void
1771 pci_resume_msi(device_t dev)
1772 {
1773 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1774 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1775 	uint64_t address;
1776 	uint16_t data;
1777 
1778 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1779 		address = msi->msi_addr;
1780 		data = msi->msi_data;
1781 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1782 		    address & 0xffffffff, 4);
1783 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1784 			pci_write_config(dev, msi->msi_location +
1785 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1786 			pci_write_config(dev, msi->msi_location +
1787 			    PCIR_MSI_DATA_64BIT, data, 2);
1788 		} else
1789 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1790 			    data, 2);
1791 	}
1792 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1793 	    2);
1794 }
1795 
1796 static int
1797 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1798 {
1799 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1800 	pcicfgregs *cfg = &dinfo->cfg;
1801 	struct resource_list_entry *rle;
1802 	struct msix_table_entry *mte;
1803 	struct msix_vector *mv;
1804 	uint64_t addr;
1805 	uint32_t data;
1806 	int error, i, j;
1807 
1808 	/*
1809 	 * Handle MSI first.  We try to find this IRQ among our list
1810 	 * of MSI IRQs.  If we find it, we request updated address and
1811 	 * data registers and apply the results.
1812 	 */
1813 	if (cfg->msi.msi_alloc > 0) {
1814 
1815 		/* If we don't have any active handlers, nothing to do. */
1816 		if (cfg->msi.msi_handlers == 0)
1817 			return (0);
1818 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1819 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1820 			    i + 1);
1821 			if (rle->start == irq) {
1822 				error = PCIB_MAP_MSI(device_get_parent(bus),
1823 				    dev, irq, &addr, &data);
1824 				if (error)
1825 					return (error);
1826 				pci_disable_msi(dev);
1827 				dinfo->cfg.msi.msi_addr = addr;
1828 				dinfo->cfg.msi.msi_data = data;
1829 				pci_enable_msi(dev, addr, data);
1830 				return (0);
1831 			}
1832 		}
1833 		return (ENOENT);
1834 	}
1835 
1836 	/*
1837 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1838 	 * we request the updated mapping info.  If that works, we go
1839 	 * through all the slots that use this IRQ and update them.
1840 	 */
1841 	if (cfg->msix.msix_alloc > 0) {
1842 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1843 			mv = &cfg->msix.msix_vectors[i];
1844 			if (mv->mv_irq == irq) {
1845 				error = PCIB_MAP_MSI(device_get_parent(bus),
1846 				    dev, irq, &addr, &data);
1847 				if (error)
1848 					return (error);
1849 				mv->mv_address = addr;
1850 				mv->mv_data = data;
1851 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1852 					mte = &cfg->msix.msix_table[j];
1853 					if (mte->mte_vector != i + 1)
1854 						continue;
1855 					if (mte->mte_handlers == 0)
1856 						continue;
1857 					pci_mask_msix(dev, j);
1858 					pci_enable_msix(dev, j, addr, data);
1859 					pci_unmask_msix(dev, j);
1860 				}
1861 			}
1862 		}
1863 		return (ENOENT);
1864 	}
1865 
1866 	return (ENOENT);
1867 }
1868 
1869 /*
1870  * Returns true if the specified device is blacklisted because MSI
1871  * doesn't work.
1872  */
1873 int
1874 pci_msi_device_blacklisted(device_t dev)
1875 {
1876 	struct pci_quirk *q;
1877 
1878 	if (!pci_honor_msi_blacklist)
1879 		return (0);
1880 
1881 	for (q = &pci_quirks[0]; q->devid; q++) {
1882 		if (q->devid == pci_get_devid(dev) &&
1883 		    q->type == PCI_QUIRK_DISABLE_MSI)
1884 			return (1);
1885 	}
1886 	return (0);
1887 }
1888 
1889 /*
1890  * Returns true if a specified chipset supports MSI when it is
1891  * emulated hardware in a virtual machine.
1892  */
1893 static int
1894 pci_msi_vm_chipset(device_t dev)
1895 {
1896 	struct pci_quirk *q;
1897 
1898 	for (q = &pci_quirks[0]; q->devid; q++) {
1899 		if (q->devid == pci_get_devid(dev) &&
1900 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1901 			return (1);
1902 	}
1903 	return (0);
1904 }
1905 
1906 /*
1907  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1908  * we just check for blacklisted chipsets as represented by the
1909  * host-PCI bridge at device 0:0:0.  In the future, it may become
1910  * necessary to check other system attributes, such as the kenv values
1911  * that give the motherboard manufacturer and model number.
1912  */
1913 static int
1914 pci_msi_blacklisted(void)
1915 {
1916 	device_t dev;
1917 
1918 	if (!pci_honor_msi_blacklist)
1919 		return (0);
1920 
1921 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1922 	if (!(pcie_chipset || pcix_chipset)) {
1923 		if (vm_guest != VM_GUEST_NO) {
1924 			dev = pci_find_bsf(0, 0, 0);
1925 			if (dev != NULL)
1926 				return (pci_msi_vm_chipset(dev) == 0);
1927 		}
1928 		return (1);
1929 	}
1930 
1931 	dev = pci_find_bsf(0, 0, 0);
1932 	if (dev != NULL)
1933 		return (pci_msi_device_blacklisted(dev));
1934 	return (0);
1935 }
1936 
1937 /*
1938  * Attempt to allocate *count MSI messages.  The actual number allocated is
1939  * returned in *count.  After this function returns, each message will be
1940  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1941  */
1942 int
1943 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1944 {
1945 	struct pci_devinfo *dinfo = device_get_ivars(child);
1946 	pcicfgregs *cfg = &dinfo->cfg;
1947 	struct resource_list_entry *rle;
1948 	int actual, error, i, irqs[32];
1949 	uint16_t ctrl;
1950 
1951 	/* Don't let count == 0 get us into trouble. */
1952 	if (*count == 0)
1953 		return (EINVAL);
1954 
1955 	/* If rid 0 is allocated, then fail. */
1956 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1957 	if (rle != NULL && rle->res != NULL)
1958 		return (ENXIO);
1959 
1960 	/* Already have allocated messages? */
1961 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1962 		return (ENXIO);
1963 
1964 	/* If MSI is blacklisted for this system, fail. */
1965 	if (pci_msi_blacklisted())
1966 		return (ENXIO);
1967 
1968 	/* MSI capability present? */
1969 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1970 		return (ENODEV);
1971 
1972 	if (bootverbose)
1973 		device_printf(child,
1974 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1975 		    *count, cfg->msi.msi_msgnum);
1976 
1977 	/* Don't ask for more than the device supports. */
1978 	actual = min(*count, cfg->msi.msi_msgnum);
1979 
1980 	/* Don't ask for more than 32 messages. */
1981 	actual = min(actual, 32);
1982 
1983 	/* MSI requires power of 2 number of messages. */
1984 	if (!powerof2(actual))
1985 		return (EINVAL);
1986 
1987 	for (;;) {
1988 		/* Try to allocate N messages. */
1989 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1990 		    actual, irqs);
1991 		if (error == 0)
1992 			break;
1993 		if (actual == 1)
1994 			return (error);
1995 
1996 		/* Try N / 2. */
1997 		actual >>= 1;
1998 	}
1999 
2000 	/*
2001 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2002 	 * resources in the irqs[] array, so add new resources
2003 	 * starting at rid 1.
2004 	 */
2005 	for (i = 0; i < actual; i++)
2006 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2007 		    irqs[i], irqs[i], 1);
2008 
2009 	if (bootverbose) {
2010 		if (actual == 1)
2011 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2012 		else {
2013 			int run;
2014 
2015 			/*
2016 			 * Be fancy and try to print contiguous runs
2017 			 * of IRQ values as ranges.  'run' is true if
2018 			 * we are in a range.
2019 			 */
2020 			device_printf(child, "using IRQs %d", irqs[0]);
2021 			run = 0;
2022 			for (i = 1; i < actual; i++) {
2023 
2024 				/* Still in a run? */
2025 				if (irqs[i] == irqs[i - 1] + 1) {
2026 					run = 1;
2027 					continue;
2028 				}
2029 
2030 				/* Finish previous range. */
2031 				if (run) {
2032 					printf("-%d", irqs[i - 1]);
2033 					run = 0;
2034 				}
2035 
2036 				/* Start new range. */
2037 				printf(",%d", irqs[i]);
2038 			}
2039 
2040 			/* Unfinished range? */
2041 			if (run)
2042 				printf("-%d", irqs[actual - 1]);
2043 			printf(" for MSI\n");
2044 		}
2045 	}
2046 
2047 	/* Update control register with actual count. */
2048 	ctrl = cfg->msi.msi_ctrl;
2049 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2050 	ctrl |= (ffs(actual) - 1) << 4;
2051 	cfg->msi.msi_ctrl = ctrl;
2052 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2053 
2054 	/* Update counts of alloc'd messages. */
2055 	cfg->msi.msi_alloc = actual;
2056 	cfg->msi.msi_handlers = 0;
2057 	*count = actual;
2058 	return (0);
2059 }
2060 
2061 /* Release the MSI messages associated with this device. */
2062 int
2063 pci_release_msi_method(device_t dev, device_t child)
2064 {
2065 	struct pci_devinfo *dinfo = device_get_ivars(child);
2066 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2067 	struct resource_list_entry *rle;
2068 	int error, i, irqs[32];
2069 
2070 	/* Try MSI-X first. */
2071 	error = pci_release_msix(dev, child);
2072 	if (error != ENODEV)
2073 		return (error);
2074 
2075 	/* Do we have any messages to release? */
2076 	if (msi->msi_alloc == 0)
2077 		return (ENODEV);
2078 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2079 
2080 	/* Make sure none of the resources are allocated. */
2081 	if (msi->msi_handlers > 0)
2082 		return (EBUSY);
2083 	for (i = 0; i < msi->msi_alloc; i++) {
2084 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2085 		KASSERT(rle != NULL, ("missing MSI resource"));
2086 		if (rle->res != NULL)
2087 			return (EBUSY);
2088 		irqs[i] = rle->start;
2089 	}
2090 
2091 	/* Update control register with 0 count. */
2092 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2093 	    ("%s: MSI still enabled", __func__));
2094 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2095 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2096 	    msi->msi_ctrl, 2);
2097 
2098 	/* Release the messages. */
2099 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2100 	for (i = 0; i < msi->msi_alloc; i++)
2101 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2102 
2103 	/* Update alloc count. */
2104 	msi->msi_alloc = 0;
2105 	msi->msi_addr = 0;
2106 	msi->msi_data = 0;
2107 	return (0);
2108 }
2109 
2110 /*
2111  * Return the max supported MSI messages this device supports.
2112  * Basically, assuming the MD code can alloc messages, this function
2113  * should return the maximum value that pci_alloc_msi() can return.
2114  * Thus, it is subject to the tunables, etc.
2115  */
2116 int
2117 pci_msi_count_method(device_t dev, device_t child)
2118 {
2119 	struct pci_devinfo *dinfo = device_get_ivars(child);
2120 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2121 
2122 	if (pci_do_msi && msi->msi_location != 0)
2123 		return (msi->msi_msgnum);
2124 	return (0);
2125 }
2126 
2127 /* free pcicfgregs structure and all depending data structures */
2128 
2129 int
2130 pci_freecfg(struct pci_devinfo *dinfo)
2131 {
2132 	struct devlist *devlist_head;
2133 	struct pci_map *pm, *next;
2134 	int i;
2135 
2136 	devlist_head = &pci_devq;
2137 
2138 	if (dinfo->cfg.vpd.vpd_reg) {
2139 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2140 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2141 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2142 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2143 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2144 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2145 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2146 	}
2147 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2148 		free(pm, M_DEVBUF);
2149 	}
2150 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2151 	free(dinfo, M_DEVBUF);
2152 
2153 	/* increment the generation count */
2154 	pci_generation++;
2155 
2156 	/* we're losing one device */
2157 	pci_numdevs--;
2158 	return (0);
2159 }
2160 
2161 /*
2162  * PCI power manangement
2163  */
2164 int
2165 pci_set_powerstate_method(device_t dev, device_t child, int state)
2166 {
2167 	struct pci_devinfo *dinfo = device_get_ivars(child);
2168 	pcicfgregs *cfg = &dinfo->cfg;
2169 	uint16_t status;
2170 	int result, oldstate, highest, delay;
2171 
2172 	if (cfg->pp.pp_cap == 0)
2173 		return (EOPNOTSUPP);
2174 
2175 	/*
2176 	 * Optimize a no state change request away.  While it would be OK to
2177 	 * write to the hardware in theory, some devices have shown odd
2178 	 * behavior when going from D3 -> D3.
2179 	 */
2180 	oldstate = pci_get_powerstate(child);
2181 	if (oldstate == state)
2182 		return (0);
2183 
2184 	/*
2185 	 * The PCI power management specification states that after a state
2186 	 * transition between PCI power states, system software must
2187 	 * guarantee a minimal delay before the function accesses the device.
2188 	 * Compute the worst case delay that we need to guarantee before we
2189 	 * access the device.  Many devices will be responsive much more
2190 	 * quickly than this delay, but there are some that don't respond
2191 	 * instantly to state changes.  Transitions to/from D3 state require
2192 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2193 	 * is done below with DELAY rather than a sleeper function because
2194 	 * this function can be called from contexts where we cannot sleep.
2195 	 */
2196 	highest = (oldstate > state) ? oldstate : state;
2197 	if (highest == PCI_POWERSTATE_D3)
2198 	    delay = 10000;
2199 	else if (highest == PCI_POWERSTATE_D2)
2200 	    delay = 200;
2201 	else
2202 	    delay = 0;
2203 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2204 	    & ~PCIM_PSTAT_DMASK;
2205 	result = 0;
2206 	switch (state) {
2207 	case PCI_POWERSTATE_D0:
2208 		status |= PCIM_PSTAT_D0;
2209 		break;
2210 	case PCI_POWERSTATE_D1:
2211 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2212 			return (EOPNOTSUPP);
2213 		status |= PCIM_PSTAT_D1;
2214 		break;
2215 	case PCI_POWERSTATE_D2:
2216 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2217 			return (EOPNOTSUPP);
2218 		status |= PCIM_PSTAT_D2;
2219 		break;
2220 	case PCI_POWERSTATE_D3:
2221 		status |= PCIM_PSTAT_D3;
2222 		break;
2223 	default:
2224 		return (EINVAL);
2225 	}
2226 
2227 	if (bootverbose)
2228 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2229 		    state);
2230 
2231 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2232 	if (delay)
2233 		DELAY(delay);
2234 	return (0);
2235 }
2236 
2237 int
2238 pci_get_powerstate_method(device_t dev, device_t child)
2239 {
2240 	struct pci_devinfo *dinfo = device_get_ivars(child);
2241 	pcicfgregs *cfg = &dinfo->cfg;
2242 	uint16_t status;
2243 	int result;
2244 
2245 	if (cfg->pp.pp_cap != 0) {
2246 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2247 		switch (status & PCIM_PSTAT_DMASK) {
2248 		case PCIM_PSTAT_D0:
2249 			result = PCI_POWERSTATE_D0;
2250 			break;
2251 		case PCIM_PSTAT_D1:
2252 			result = PCI_POWERSTATE_D1;
2253 			break;
2254 		case PCIM_PSTAT_D2:
2255 			result = PCI_POWERSTATE_D2;
2256 			break;
2257 		case PCIM_PSTAT_D3:
2258 			result = PCI_POWERSTATE_D3;
2259 			break;
2260 		default:
2261 			result = PCI_POWERSTATE_UNKNOWN;
2262 			break;
2263 		}
2264 	} else {
2265 		/* No support, device is always at D0 */
2266 		result = PCI_POWERSTATE_D0;
2267 	}
2268 	return (result);
2269 }
2270 
2271 /*
2272  * Some convenience functions for PCI device drivers.
2273  */
2274 
2275 static __inline void
2276 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2277 {
2278 	uint16_t	command;
2279 
2280 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2281 	command |= bit;
2282 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2283 }
2284 
2285 static __inline void
2286 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2287 {
2288 	uint16_t	command;
2289 
2290 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2291 	command &= ~bit;
2292 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2293 }
2294 
2295 int
2296 pci_enable_busmaster_method(device_t dev, device_t child)
2297 {
2298 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2299 	return (0);
2300 }
2301 
2302 int
2303 pci_disable_busmaster_method(device_t dev, device_t child)
2304 {
2305 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2306 	return (0);
2307 }
2308 
2309 int
2310 pci_enable_io_method(device_t dev, device_t child, int space)
2311 {
2312 	uint16_t bit;
2313 
2314 	switch(space) {
2315 	case SYS_RES_IOPORT:
2316 		bit = PCIM_CMD_PORTEN;
2317 		break;
2318 	case SYS_RES_MEMORY:
2319 		bit = PCIM_CMD_MEMEN;
2320 		break;
2321 	default:
2322 		return (EINVAL);
2323 	}
2324 	pci_set_command_bit(dev, child, bit);
2325 	return (0);
2326 }
2327 
2328 int
2329 pci_disable_io_method(device_t dev, device_t child, int space)
2330 {
2331 	uint16_t bit;
2332 
2333 	switch(space) {
2334 	case SYS_RES_IOPORT:
2335 		bit = PCIM_CMD_PORTEN;
2336 		break;
2337 	case SYS_RES_MEMORY:
2338 		bit = PCIM_CMD_MEMEN;
2339 		break;
2340 	default:
2341 		return (EINVAL);
2342 	}
2343 	pci_clear_command_bit(dev, child, bit);
2344 	return (0);
2345 }
2346 
2347 /*
2348  * New style pci driver.  Parent device is either a pci-host-bridge or a
2349  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2350  */
2351 
2352 void
2353 pci_print_verbose(struct pci_devinfo *dinfo)
2354 {
2355 
2356 	if (bootverbose) {
2357 		pcicfgregs *cfg = &dinfo->cfg;
2358 
2359 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2360 		    cfg->vendor, cfg->device, cfg->revid);
2361 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2362 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2363 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2364 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2365 		    cfg->mfdev);
2366 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2367 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2368 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2369 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2370 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2371 		if (cfg->intpin > 0)
2372 			printf("\tintpin=%c, irq=%d\n",
2373 			    cfg->intpin +'a' -1, cfg->intline);
2374 		if (cfg->pp.pp_cap) {
2375 			uint16_t status;
2376 
2377 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2378 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2379 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2380 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2381 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2382 			    status & PCIM_PSTAT_DMASK);
2383 		}
2384 		if (cfg->msi.msi_location) {
2385 			int ctrl;
2386 
2387 			ctrl = cfg->msi.msi_ctrl;
2388 			printf("\tMSI supports %d message%s%s%s\n",
2389 			    cfg->msi.msi_msgnum,
2390 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2391 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2392 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2393 		}
2394 		if (cfg->msix.msix_location) {
2395 			printf("\tMSI-X supports %d message%s ",
2396 			    cfg->msix.msix_msgnum,
2397 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2398 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2399 				printf("in map 0x%x\n",
2400 				    cfg->msix.msix_table_bar);
2401 			else
2402 				printf("in maps 0x%x and 0x%x\n",
2403 				    cfg->msix.msix_table_bar,
2404 				    cfg->msix.msix_pba_bar);
2405 		}
2406 	}
2407 }
2408 
2409 static int
2410 pci_porten(device_t dev)
2411 {
2412 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2413 }
2414 
2415 static int
2416 pci_memen(device_t dev)
2417 {
2418 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2419 }
2420 
2421 static void
2422 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2423 {
2424 	struct pci_devinfo *dinfo;
2425 	pci_addr_t map, testval;
2426 	int ln2range;
2427 	uint16_t cmd;
2428 
2429 	/*
2430 	 * The device ROM BAR is special.  It is always a 32-bit
2431 	 * memory BAR.  Bit 0 is special and should not be set when
2432 	 * sizing the BAR.
2433 	 */
2434 	dinfo = device_get_ivars(dev);
2435 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2436 		map = pci_read_config(dev, reg, 4);
2437 		pci_write_config(dev, reg, 0xfffffffe, 4);
2438 		testval = pci_read_config(dev, reg, 4);
2439 		pci_write_config(dev, reg, map, 4);
2440 		*mapp = map;
2441 		*testvalp = testval;
2442 		return;
2443 	}
2444 
2445 	map = pci_read_config(dev, reg, 4);
2446 	ln2range = pci_maprange(map);
2447 	if (ln2range == 64)
2448 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2449 
2450 	/*
2451 	 * Disable decoding via the command register before
2452 	 * determining the BAR's length since we will be placing it in
2453 	 * a weird state.
2454 	 */
2455 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2456 	pci_write_config(dev, PCIR_COMMAND,
2457 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2458 
2459 	/*
2460 	 * Determine the BAR's length by writing all 1's.  The bottom
2461 	 * log_2(size) bits of the BAR will stick as 0 when we read
2462 	 * the value back.
2463 	 */
2464 	pci_write_config(dev, reg, 0xffffffff, 4);
2465 	testval = pci_read_config(dev, reg, 4);
2466 	if (ln2range == 64) {
2467 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2468 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2469 	}
2470 
2471 	/*
2472 	 * Restore the original value of the BAR.  We may have reprogrammed
2473 	 * the BAR of the low-level console device and when booting verbose,
2474 	 * we need the console device addressable.
2475 	 */
2476 	pci_write_config(dev, reg, map, 4);
2477 	if (ln2range == 64)
2478 		pci_write_config(dev, reg + 4, map >> 32, 4);
2479 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2480 
2481 	*mapp = map;
2482 	*testvalp = testval;
2483 }
2484 
2485 static void
2486 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2487 {
2488 	struct pci_devinfo *dinfo;
2489 	int ln2range;
2490 
2491 	/* The device ROM BAR is always a 32-bit memory BAR. */
2492 	dinfo = device_get_ivars(dev);
2493 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2494 		ln2range = 32;
2495 	else
2496 		ln2range = pci_maprange(pm->pm_value);
2497 	pci_write_config(dev, pm->pm_reg, base, 4);
2498 	if (ln2range == 64)
2499 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2500 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2501 	if (ln2range == 64)
2502 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2503 		    pm->pm_reg + 4, 4) << 32;
2504 }
2505 
2506 struct pci_map *
2507 pci_find_bar(device_t dev, int reg)
2508 {
2509 	struct pci_devinfo *dinfo;
2510 	struct pci_map *pm;
2511 
2512 	dinfo = device_get_ivars(dev);
2513 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2514 		if (pm->pm_reg == reg)
2515 			return (pm);
2516 	}
2517 	return (NULL);
2518 }
2519 
2520 int
2521 pci_bar_enabled(device_t dev, struct pci_map *pm)
2522 {
2523 	struct pci_devinfo *dinfo;
2524 	uint16_t cmd;
2525 
2526 	dinfo = device_get_ivars(dev);
2527 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2528 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2529 		return (0);
2530 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2531 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2532 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2533 	else
2534 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2535 }
2536 
2537 static struct pci_map *
2538 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2539 {
2540 	struct pci_devinfo *dinfo;
2541 	struct pci_map *pm, *prev;
2542 
2543 	dinfo = device_get_ivars(dev);
2544 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2545 	pm->pm_reg = reg;
2546 	pm->pm_value = value;
2547 	pm->pm_size = size;
2548 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2549 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2550 		    reg));
2551 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2552 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2553 			break;
2554 	}
2555 	if (prev != NULL)
2556 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2557 	else
2558 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2559 	return (pm);
2560 }
2561 
2562 static void
2563 pci_restore_bars(device_t dev)
2564 {
2565 	struct pci_devinfo *dinfo;
2566 	struct pci_map *pm;
2567 	int ln2range;
2568 
2569 	dinfo = device_get_ivars(dev);
2570 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2571 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2572 			ln2range = 32;
2573 		else
2574 			ln2range = pci_maprange(pm->pm_value);
2575 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2576 		if (ln2range == 64)
2577 			pci_write_config(dev, pm->pm_reg + 4,
2578 			    pm->pm_value >> 32, 4);
2579 	}
2580 }
2581 
2582 /*
2583  * Add a resource based on a pci map register. Return 1 if the map
2584  * register is a 32bit map register or 2 if it is a 64bit register.
2585  */
2586 static int
2587 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2588     int force, int prefetch)
2589 {
2590 	struct pci_map *pm;
2591 	pci_addr_t base, map, testval;
2592 	pci_addr_t start, end, count;
2593 	int barlen, basezero, maprange, mapsize, type;
2594 	uint16_t cmd;
2595 	struct resource *res;
2596 
2597 	/*
2598 	 * The BAR may already exist if the device is a CardBus card
2599 	 * whose CIS is stored in this BAR.
2600 	 */
2601 	pm = pci_find_bar(dev, reg);
2602 	if (pm != NULL) {
2603 		maprange = pci_maprange(pm->pm_value);
2604 		barlen = maprange == 64 ? 2 : 1;
2605 		return (barlen);
2606 	}
2607 
2608 	pci_read_bar(dev, reg, &map, &testval);
2609 	if (PCI_BAR_MEM(map)) {
2610 		type = SYS_RES_MEMORY;
2611 		if (map & PCIM_BAR_MEM_PREFETCH)
2612 			prefetch = 1;
2613 	} else
2614 		type = SYS_RES_IOPORT;
2615 	mapsize = pci_mapsize(testval);
2616 	base = pci_mapbase(map);
2617 #ifdef __PCI_BAR_ZERO_VALID
2618 	basezero = 0;
2619 #else
2620 	basezero = base == 0;
2621 #endif
2622 	maprange = pci_maprange(map);
2623 	barlen = maprange == 64 ? 2 : 1;
2624 
2625 	/*
2626 	 * For I/O registers, if bottom bit is set, and the next bit up
2627 	 * isn't clear, we know we have a BAR that doesn't conform to the
2628 	 * spec, so ignore it.  Also, sanity check the size of the data
2629 	 * areas to the type of memory involved.  Memory must be at least
2630 	 * 16 bytes in size, while I/O ranges must be at least 4.
2631 	 */
2632 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2633 		return (barlen);
2634 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2635 	    (type == SYS_RES_IOPORT && mapsize < 2))
2636 		return (barlen);
2637 
2638 	/* Save a record of this BAR. */
2639 	pm = pci_add_bar(dev, reg, map, mapsize);
2640 	if (bootverbose) {
2641 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2642 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2643 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2644 			printf(", port disabled\n");
2645 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2646 			printf(", memory disabled\n");
2647 		else
2648 			printf(", enabled\n");
2649 	}
2650 
2651 	/*
2652 	 * If base is 0, then we have problems if this architecture does
2653 	 * not allow that.  It is best to ignore such entries for the
2654 	 * moment.  These will be allocated later if the driver specifically
2655 	 * requests them.  However, some removable busses look better when
2656 	 * all resources are allocated, so allow '0' to be overriden.
2657 	 *
2658 	 * Similarly treat maps whose values is the same as the test value
2659 	 * read back.  These maps have had all f's written to them by the
2660 	 * BIOS in an attempt to disable the resources.
2661 	 */
2662 	if (!force && (basezero || map == testval))
2663 		return (barlen);
2664 	if ((u_long)base != base) {
2665 		device_printf(bus,
2666 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2667 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2668 		    pci_get_function(dev), reg);
2669 		return (barlen);
2670 	}
2671 
2672 	/*
2673 	 * This code theoretically does the right thing, but has
2674 	 * undesirable side effects in some cases where peripherals
2675 	 * respond oddly to having these bits enabled.  Let the user
2676 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2677 	 * default).
2678 	 */
2679 	if (pci_enable_io_modes) {
2680 		/* Turn on resources that have been left off by a lazy BIOS */
2681 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2682 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2683 			cmd |= PCIM_CMD_PORTEN;
2684 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2685 		}
2686 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2687 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2688 			cmd |= PCIM_CMD_MEMEN;
2689 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2690 		}
2691 	} else {
2692 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2693 			return (barlen);
2694 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2695 			return (barlen);
2696 	}
2697 
2698 	count = (pci_addr_t)1 << mapsize;
2699 	if (basezero || base == pci_mapbase(testval)) {
2700 		start = 0;	/* Let the parent decide. */
2701 		end = ~0ul;
2702 	} else {
2703 		start = base;
2704 		end = base + count - 1;
2705 	}
2706 	resource_list_add(rl, type, reg, start, end, count);
2707 
2708 	/*
2709 	 * Try to allocate the resource for this BAR from our parent
2710 	 * so that this resource range is already reserved.  The
2711 	 * driver for this device will later inherit this resource in
2712 	 * pci_alloc_resource().
2713 	 */
2714 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2715 	    prefetch ? RF_PREFETCHABLE : 0);
2716 	if (res == NULL) {
2717 		/*
2718 		 * If the allocation fails, clear the BAR and delete
2719 		 * the resource list entry to force
2720 		 * pci_alloc_resource() to allocate resources from the
2721 		 * parent.
2722 		 */
2723 		resource_list_delete(rl, type, reg);
2724 		start = 0;
2725 	} else
2726 		start = rman_get_start(res);
2727 	pci_write_bar(dev, pm, start);
2728 	return (barlen);
2729 }
2730 
2731 /*
2732  * For ATA devices we need to decide early what addressing mode to use.
2733  * Legacy demands that the primary and secondary ATA ports sits on the
2734  * same addresses that old ISA hardware did. This dictates that we use
2735  * those addresses and ignore the BAR's if we cannot set PCI native
2736  * addressing mode.
2737  */
2738 static void
2739 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2740     uint32_t prefetchmask)
2741 {
2742 	struct resource *r;
2743 	int rid, type, progif;
2744 #if 0
2745 	/* if this device supports PCI native addressing use it */
2746 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2747 	if ((progif & 0x8a) == 0x8a) {
2748 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2749 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2750 			printf("Trying ATA native PCI addressing mode\n");
2751 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2752 		}
2753 	}
2754 #endif
2755 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2756 	type = SYS_RES_IOPORT;
2757 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2758 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2759 		    prefetchmask & (1 << 0));
2760 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2761 		    prefetchmask & (1 << 1));
2762 	} else {
2763 		rid = PCIR_BAR(0);
2764 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2765 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2766 		    0x1f7, 8, 0);
2767 		rid = PCIR_BAR(1);
2768 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2769 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2770 		    0x3f6, 1, 0);
2771 	}
2772 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2773 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2774 		    prefetchmask & (1 << 2));
2775 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2776 		    prefetchmask & (1 << 3));
2777 	} else {
2778 		rid = PCIR_BAR(2);
2779 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2780 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2781 		    0x177, 8, 0);
2782 		rid = PCIR_BAR(3);
2783 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2784 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2785 		    0x376, 1, 0);
2786 	}
2787 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2788 	    prefetchmask & (1 << 4));
2789 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2790 	    prefetchmask & (1 << 5));
2791 }
2792 
2793 static void
2794 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2795 {
2796 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2797 	pcicfgregs *cfg = &dinfo->cfg;
2798 	char tunable_name[64];
2799 	int irq;
2800 
2801 	/* Has to have an intpin to have an interrupt. */
2802 	if (cfg->intpin == 0)
2803 		return;
2804 
2805 	/* Let the user override the IRQ with a tunable. */
2806 	irq = PCI_INVALID_IRQ;
2807 	snprintf(tunable_name, sizeof(tunable_name),
2808 	    "hw.pci%d.%d.%d.INT%c.irq",
2809 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2810 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2811 		irq = PCI_INVALID_IRQ;
2812 
2813 	/*
2814 	 * If we didn't get an IRQ via the tunable, then we either use the
2815 	 * IRQ value in the intline register or we ask the bus to route an
2816 	 * interrupt for us.  If force_route is true, then we only use the
2817 	 * value in the intline register if the bus was unable to assign an
2818 	 * IRQ.
2819 	 */
2820 	if (!PCI_INTERRUPT_VALID(irq)) {
2821 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2822 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2823 		if (!PCI_INTERRUPT_VALID(irq))
2824 			irq = cfg->intline;
2825 	}
2826 
2827 	/* If after all that we don't have an IRQ, just bail. */
2828 	if (!PCI_INTERRUPT_VALID(irq))
2829 		return;
2830 
2831 	/* Update the config register if it changed. */
2832 	if (irq != cfg->intline) {
2833 		cfg->intline = irq;
2834 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2835 	}
2836 
2837 	/* Add this IRQ as rid 0 interrupt resource. */
2838 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2839 }
2840 
2841 /* Perform early OHCI takeover from SMM. */
2842 static void
2843 ohci_early_takeover(device_t self)
2844 {
2845 	struct resource *res;
2846 	uint32_t ctl;
2847 	int rid;
2848 	int i;
2849 
2850 	rid = PCIR_BAR(0);
2851 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2852 	if (res == NULL)
2853 		return;
2854 
2855 	ctl = bus_read_4(res, OHCI_CONTROL);
2856 	if (ctl & OHCI_IR) {
2857 		if (bootverbose)
2858 			printf("ohci early: "
2859 			    "SMM active, request owner change\n");
2860 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2861 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2862 			DELAY(1000);
2863 			ctl = bus_read_4(res, OHCI_CONTROL);
2864 		}
2865 		if (ctl & OHCI_IR) {
2866 			if (bootverbose)
2867 				printf("ohci early: "
2868 				    "SMM does not respond, resetting\n");
2869 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2870 		}
2871 		/* Disable interrupts */
2872 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2873 	}
2874 
2875 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2876 }
2877 
2878 /* Perform early UHCI takeover from SMM. */
2879 static void
2880 uhci_early_takeover(device_t self)
2881 {
2882 	struct resource *res;
2883 	int rid;
2884 
2885 	/*
2886 	 * Set the PIRQD enable bit and switch off all the others. We don't
2887 	 * want legacy support to interfere with us XXX Does this also mean
2888 	 * that the BIOS won't touch the keyboard anymore if it is connected
2889 	 * to the ports of the root hub?
2890 	 */
2891 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2892 
2893 	/* Disable interrupts */
2894 	rid = PCI_UHCI_BASE_REG;
2895 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2896 	if (res != NULL) {
2897 		bus_write_2(res, UHCI_INTR, 0);
2898 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2899 	}
2900 }
2901 
2902 /* Perform early EHCI takeover from SMM. */
2903 static void
2904 ehci_early_takeover(device_t self)
2905 {
2906 	struct resource *res;
2907 	uint32_t cparams;
2908 	uint32_t eec;
2909 	uint8_t eecp;
2910 	uint8_t bios_sem;
2911 	uint8_t offs;
2912 	int rid;
2913 	int i;
2914 
2915 	rid = PCIR_BAR(0);
2916 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2917 	if (res == NULL)
2918 		return;
2919 
2920 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2921 
2922 	/* Synchronise with the BIOS if it owns the controller. */
2923 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2924 	    eecp = EHCI_EECP_NEXT(eec)) {
2925 		eec = pci_read_config(self, eecp, 4);
2926 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2927 			continue;
2928 		}
2929 		bios_sem = pci_read_config(self, eecp +
2930 		    EHCI_LEGSUP_BIOS_SEM, 1);
2931 		if (bios_sem == 0) {
2932 			continue;
2933 		}
2934 		if (bootverbose)
2935 			printf("ehci early: "
2936 			    "SMM active, request owner change\n");
2937 
2938 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2939 
2940 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2941 			DELAY(1000);
2942 			bios_sem = pci_read_config(self, eecp +
2943 			    EHCI_LEGSUP_BIOS_SEM, 1);
2944 		}
2945 
2946 		if (bios_sem != 0) {
2947 			if (bootverbose)
2948 				printf("ehci early: "
2949 				    "SMM does not respond\n");
2950 		}
2951 		/* Disable interrupts */
2952 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2953 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2954 	}
2955 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2956 }
2957 
2958 /* Perform early XHCI takeover from SMM. */
2959 static void
2960 xhci_early_takeover(device_t self)
2961 {
2962 	struct resource *res;
2963 	uint32_t cparams;
2964 	uint32_t eec;
2965 	uint8_t eecp;
2966 	uint8_t bios_sem;
2967 	uint8_t offs;
2968 	int rid;
2969 	int i;
2970 
2971 	rid = PCIR_BAR(0);
2972 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2973 	if (res == NULL)
2974 		return;
2975 
2976 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2977 
2978 	eec = -1;
2979 
2980 	/* Synchronise with the BIOS if it owns the controller. */
2981 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2982 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2983 		eec = bus_read_4(res, eecp);
2984 
2985 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2986 			continue;
2987 
2988 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2989 		if (bios_sem == 0)
2990 			continue;
2991 
2992 		if (bootverbose)
2993 			printf("xhci early: "
2994 			    "SMM active, request owner change\n");
2995 
2996 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2997 
2998 		/* wait a maximum of 5 second */
2999 
3000 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3001 			DELAY(1000);
3002 			bios_sem = bus_read_1(res, eecp +
3003 			    XHCI_XECP_BIOS_SEM);
3004 		}
3005 
3006 		if (bios_sem != 0) {
3007 			if (bootverbose)
3008 				printf("xhci early: "
3009 				    "SMM does not respond\n");
3010 		}
3011 
3012 		/* Disable interrupts */
3013 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3014 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3015 		bus_read_4(res, offs + XHCI_USBSTS);
3016 	}
3017 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3018 }
3019 
3020 void
3021 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3022 {
3023 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3024 	pcicfgregs *cfg = &dinfo->cfg;
3025 	struct resource_list *rl = &dinfo->resources;
3026 	struct pci_quirk *q;
3027 	int i;
3028 
3029 	/* ATA devices needs special map treatment */
3030 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3031 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3032 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3033 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3034 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3035 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3036 	else
3037 		for (i = 0; i < cfg->nummaps;)
3038 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3039 			    prefetchmask & (1 << i));
3040 
3041 	/*
3042 	 * Add additional, quirked resources.
3043 	 */
3044 	for (q = &pci_quirks[0]; q->devid; q++) {
3045 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3046 		    && q->type == PCI_QUIRK_MAP_REG)
3047 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3048 	}
3049 
3050 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3051 #ifdef __PCI_REROUTE_INTERRUPT
3052 		/*
3053 		 * Try to re-route interrupts. Sometimes the BIOS or
3054 		 * firmware may leave bogus values in these registers.
3055 		 * If the re-route fails, then just stick with what we
3056 		 * have.
3057 		 */
3058 		pci_assign_interrupt(bus, dev, 1);
3059 #else
3060 		pci_assign_interrupt(bus, dev, 0);
3061 #endif
3062 	}
3063 
3064 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3065 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3066 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3067 			xhci_early_takeover(dev);
3068 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3069 			ehci_early_takeover(dev);
3070 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3071 			ohci_early_takeover(dev);
3072 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3073 			uhci_early_takeover(dev);
3074 	}
3075 }
3076 
3077 void
3078 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3079 {
3080 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3081 	device_t pcib = device_get_parent(dev);
3082 	struct pci_devinfo *dinfo;
3083 	int maxslots;
3084 	int s, f, pcifunchigh;
3085 	uint8_t hdrtype;
3086 
3087 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3088 	    ("dinfo_size too small"));
3089 	maxslots = PCIB_MAXSLOTS(pcib);
3090 	for (s = 0; s <= maxslots; s++) {
3091 		pcifunchigh = 0;
3092 		f = 0;
3093 		DELAY(1);
3094 		hdrtype = REG(PCIR_HDRTYPE, 1);
3095 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3096 			continue;
3097 		if (hdrtype & PCIM_MFDEV)
3098 			pcifunchigh = PCI_FUNCMAX;
3099 		for (f = 0; f <= pcifunchigh; f++) {
3100 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3101 			    dinfo_size);
3102 			if (dinfo != NULL) {
3103 				pci_add_child(dev, dinfo);
3104 			}
3105 		}
3106 	}
3107 #undef REG
3108 }
3109 
3110 void
3111 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3112 {
3113 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3114 	device_set_ivars(dinfo->cfg.dev, dinfo);
3115 	resource_list_init(&dinfo->resources);
3116 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3117 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3118 	pci_print_verbose(dinfo);
3119 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3120 }
3121 
3122 static int
3123 pci_probe(device_t dev)
3124 {
3125 
3126 	device_set_desc(dev, "PCI bus");
3127 
3128 	/* Allow other subclasses to override this driver. */
3129 	return (BUS_PROBE_GENERIC);
3130 }
3131 
3132 static int
3133 pci_attach(device_t dev)
3134 {
3135 	int busno, domain;
3136 
3137 	/*
3138 	 * Since there can be multiple independantly numbered PCI
3139 	 * busses on systems with multiple PCI domains, we can't use
3140 	 * the unit number to decide which bus we are probing. We ask
3141 	 * the parent pcib what our domain and bus numbers are.
3142 	 */
3143 	domain = pcib_get_domain(dev);
3144 	busno = pcib_get_bus(dev);
3145 	if (bootverbose)
3146 		device_printf(dev, "domain=%d, physical bus=%d\n",
3147 		    domain, busno);
3148 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3149 	return (bus_generic_attach(dev));
3150 }
3151 
3152 static void
3153 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3154     int state)
3155 {
3156 	device_t child, pcib;
3157 	struct pci_devinfo *dinfo;
3158 	int dstate, i;
3159 
3160 	/*
3161 	 * Set the device to the given state.  If the firmware suggests
3162 	 * a different power state, use it instead.  If power management
3163 	 * is not present, the firmware is responsible for managing
3164 	 * device power.  Skip children who aren't attached since they
3165 	 * are handled separately.
3166 	 */
3167 	pcib = device_get_parent(dev);
3168 	for (i = 0; i < numdevs; i++) {
3169 		child = devlist[i];
3170 		dinfo = device_get_ivars(child);
3171 		dstate = state;
3172 		if (device_is_attached(child) &&
3173 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3174 			pci_set_powerstate(child, dstate);
3175 	}
3176 }
3177 
3178 int
3179 pci_suspend(device_t dev)
3180 {
3181 	device_t child, *devlist;
3182 	struct pci_devinfo *dinfo;
3183 	int error, i, numdevs;
3184 
3185 	/*
3186 	 * Save the PCI configuration space for each child and set the
3187 	 * device in the appropriate power state for this sleep state.
3188 	 */
3189 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3190 		return (error);
3191 	for (i = 0; i < numdevs; i++) {
3192 		child = devlist[i];
3193 		dinfo = device_get_ivars(child);
3194 		pci_cfg_save(child, dinfo, 0);
3195 	}
3196 
3197 	/* Suspend devices before potentially powering them down. */
3198 	error = bus_generic_suspend(dev);
3199 	if (error) {
3200 		free(devlist, M_TEMP);
3201 		return (error);
3202 	}
3203 	if (pci_do_power_suspend)
3204 		pci_set_power_children(dev, devlist, numdevs,
3205 		    PCI_POWERSTATE_D3);
3206 	free(devlist, M_TEMP);
3207 	return (0);
3208 }
3209 
3210 int
3211 pci_resume(device_t dev)
3212 {
3213 	device_t child, *devlist;
3214 	struct pci_devinfo *dinfo;
3215 	int error, i, numdevs;
3216 
3217 	/*
3218 	 * Set each child to D0 and restore its PCI configuration space.
3219 	 */
3220 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3221 		return (error);
3222 	if (pci_do_power_resume)
3223 		pci_set_power_children(dev, devlist, numdevs,
3224 		    PCI_POWERSTATE_D0);
3225 
3226 	/* Now the device is powered up, restore its config space. */
3227 	for (i = 0; i < numdevs; i++) {
3228 		child = devlist[i];
3229 		dinfo = device_get_ivars(child);
3230 
3231 		pci_cfg_restore(child, dinfo);
3232 		if (!device_is_attached(child))
3233 			pci_cfg_save(child, dinfo, 1);
3234 	}
3235 
3236 	/*
3237 	 * Resume critical devices first, then everything else later.
3238 	 */
3239 	for (i = 0; i < numdevs; i++) {
3240 		child = devlist[i];
3241 		switch (pci_get_class(child)) {
3242 		case PCIC_DISPLAY:
3243 		case PCIC_MEMORY:
3244 		case PCIC_BRIDGE:
3245 		case PCIC_BASEPERIPH:
3246 			DEVICE_RESUME(child);
3247 			break;
3248 		}
3249 	}
3250 	for (i = 0; i < numdevs; i++) {
3251 		child = devlist[i];
3252 		switch (pci_get_class(child)) {
3253 		case PCIC_DISPLAY:
3254 		case PCIC_MEMORY:
3255 		case PCIC_BRIDGE:
3256 		case PCIC_BASEPERIPH:
3257 			break;
3258 		default:
3259 			DEVICE_RESUME(child);
3260 		}
3261 	}
3262 	free(devlist, M_TEMP);
3263 	return (0);
3264 }
3265 
3266 static void
3267 pci_load_vendor_data(void)
3268 {
3269 	caddr_t data;
3270 	void *ptr;
3271 	size_t sz;
3272 
3273 	data = preload_search_by_type("pci_vendor_data");
3274 	if (data != NULL) {
3275 		ptr = preload_fetch_addr(data);
3276 		sz = preload_fetch_size(data);
3277 		if (ptr != NULL && sz != 0) {
3278 			pci_vendordata = ptr;
3279 			pci_vendordata_size = sz;
3280 			/* terminate the database */
3281 			pci_vendordata[pci_vendordata_size] = '\n';
3282 		}
3283 	}
3284 }
3285 
3286 void
3287 pci_driver_added(device_t dev, driver_t *driver)
3288 {
3289 	int numdevs;
3290 	device_t *devlist;
3291 	device_t child;
3292 	struct pci_devinfo *dinfo;
3293 	int i;
3294 
3295 	if (bootverbose)
3296 		device_printf(dev, "driver added\n");
3297 	DEVICE_IDENTIFY(driver, dev);
3298 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3299 		return;
3300 	for (i = 0; i < numdevs; i++) {
3301 		child = devlist[i];
3302 		if (device_get_state(child) != DS_NOTPRESENT)
3303 			continue;
3304 		dinfo = device_get_ivars(child);
3305 		pci_print_verbose(dinfo);
3306 		if (bootverbose)
3307 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3308 		pci_cfg_restore(child, dinfo);
3309 		if (device_probe_and_attach(child) != 0)
3310 			pci_cfg_save(child, dinfo, 1);
3311 	}
3312 	free(devlist, M_TEMP);
3313 }
3314 
3315 int
3316 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3317     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3318 {
3319 	struct pci_devinfo *dinfo;
3320 	struct msix_table_entry *mte;
3321 	struct msix_vector *mv;
3322 	uint64_t addr;
3323 	uint32_t data;
3324 	void *cookie;
3325 	int error, rid;
3326 
3327 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3328 	    arg, &cookie);
3329 	if (error)
3330 		return (error);
3331 
3332 	/* If this is not a direct child, just bail out. */
3333 	if (device_get_parent(child) != dev) {
3334 		*cookiep = cookie;
3335 		return(0);
3336 	}
3337 
3338 	rid = rman_get_rid(irq);
3339 	if (rid == 0) {
3340 		/* Make sure that INTx is enabled */
3341 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3342 	} else {
3343 		/*
3344 		 * Check to see if the interrupt is MSI or MSI-X.
3345 		 * Ask our parent to map the MSI and give
3346 		 * us the address and data register values.
3347 		 * If we fail for some reason, teardown the
3348 		 * interrupt handler.
3349 		 */
3350 		dinfo = device_get_ivars(child);
3351 		if (dinfo->cfg.msi.msi_alloc > 0) {
3352 			if (dinfo->cfg.msi.msi_addr == 0) {
3353 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3354 			    ("MSI has handlers, but vectors not mapped"));
3355 				error = PCIB_MAP_MSI(device_get_parent(dev),
3356 				    child, rman_get_start(irq), &addr, &data);
3357 				if (error)
3358 					goto bad;
3359 				dinfo->cfg.msi.msi_addr = addr;
3360 				dinfo->cfg.msi.msi_data = data;
3361 			}
3362 			if (dinfo->cfg.msi.msi_handlers == 0)
3363 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3364 				    dinfo->cfg.msi.msi_data);
3365 			dinfo->cfg.msi.msi_handlers++;
3366 		} else {
3367 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3368 			    ("No MSI or MSI-X interrupts allocated"));
3369 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3370 			    ("MSI-X index too high"));
3371 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3372 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3373 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3374 			KASSERT(mv->mv_irq == rman_get_start(irq),
3375 			    ("IRQ mismatch"));
3376 			if (mv->mv_address == 0) {
3377 				KASSERT(mte->mte_handlers == 0,
3378 		    ("MSI-X table entry has handlers, but vector not mapped"));
3379 				error = PCIB_MAP_MSI(device_get_parent(dev),
3380 				    child, rman_get_start(irq), &addr, &data);
3381 				if (error)
3382 					goto bad;
3383 				mv->mv_address = addr;
3384 				mv->mv_data = data;
3385 			}
3386 			if (mte->mte_handlers == 0) {
3387 				pci_enable_msix(child, rid - 1, mv->mv_address,
3388 				    mv->mv_data);
3389 				pci_unmask_msix(child, rid - 1);
3390 			}
3391 			mte->mte_handlers++;
3392 		}
3393 
3394 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3395 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3396 	bad:
3397 		if (error) {
3398 			(void)bus_generic_teardown_intr(dev, child, irq,
3399 			    cookie);
3400 			return (error);
3401 		}
3402 	}
3403 	*cookiep = cookie;
3404 	return (0);
3405 }
3406 
3407 int
3408 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3409     void *cookie)
3410 {
3411 	struct msix_table_entry *mte;
3412 	struct resource_list_entry *rle;
3413 	struct pci_devinfo *dinfo;
3414 	int error, rid;
3415 
3416 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3417 		return (EINVAL);
3418 
3419 	/* If this isn't a direct child, just bail out */
3420 	if (device_get_parent(child) != dev)
3421 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3422 
3423 	rid = rman_get_rid(irq);
3424 	if (rid == 0) {
3425 		/* Mask INTx */
3426 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3427 	} else {
3428 		/*
3429 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3430 		 * decrement the appropriate handlers count and mask the
3431 		 * MSI-X message, or disable MSI messages if the count
3432 		 * drops to 0.
3433 		 */
3434 		dinfo = device_get_ivars(child);
3435 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3436 		if (rle->res != irq)
3437 			return (EINVAL);
3438 		if (dinfo->cfg.msi.msi_alloc > 0) {
3439 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3440 			    ("MSI-X index too high"));
3441 			if (dinfo->cfg.msi.msi_handlers == 0)
3442 				return (EINVAL);
3443 			dinfo->cfg.msi.msi_handlers--;
3444 			if (dinfo->cfg.msi.msi_handlers == 0)
3445 				pci_disable_msi(child);
3446 		} else {
3447 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3448 			    ("No MSI or MSI-X interrupts allocated"));
3449 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3450 			    ("MSI-X index too high"));
3451 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3452 			if (mte->mte_handlers == 0)
3453 				return (EINVAL);
3454 			mte->mte_handlers--;
3455 			if (mte->mte_handlers == 0)
3456 				pci_mask_msix(child, rid - 1);
3457 		}
3458 	}
3459 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3460 	if (rid > 0)
3461 		KASSERT(error == 0,
3462 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3463 	return (error);
3464 }
3465 
3466 int
3467 pci_print_child(device_t dev, device_t child)
3468 {
3469 	struct pci_devinfo *dinfo;
3470 	struct resource_list *rl;
3471 	int retval = 0;
3472 
3473 	dinfo = device_get_ivars(child);
3474 	rl = &dinfo->resources;
3475 
3476 	retval += bus_print_child_header(dev, child);
3477 
3478 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3479 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3480 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3481 	if (device_get_flags(dev))
3482 		retval += printf(" flags %#x", device_get_flags(dev));
3483 
3484 	retval += printf(" at device %d.%d", pci_get_slot(child),
3485 	    pci_get_function(child));
3486 
3487 	retval += bus_print_child_footer(dev, child);
3488 
3489 	return (retval);
3490 }
3491 
3492 static struct
3493 {
3494 	int	class;
3495 	int	subclass;
3496 	char	*desc;
3497 } pci_nomatch_tab[] = {
3498 	{PCIC_OLD,		-1,			"old"},
3499 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3500 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3501 	{PCIC_STORAGE,		-1,			"mass storage"},
3502 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3503 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3504 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3505 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3506 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3507 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3508 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3509 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3510 	{PCIC_NETWORK,		-1,			"network"},
3511 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3512 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3513 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3514 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3515 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3516 	{PCIC_DISPLAY,		-1,			"display"},
3517 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3518 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3519 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3520 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3521 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3522 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3523 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3524 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3525 	{PCIC_MEMORY,		-1,			"memory"},
3526 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3527 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3528 	{PCIC_BRIDGE,		-1,			"bridge"},
3529 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3530 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3531 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3532 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3533 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3534 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3535 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3536 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3537 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3538 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3539 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3540 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3541 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3542 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3543 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3544 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3545 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3546 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3547 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3548 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3549 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3550 	{PCIC_INPUTDEV,		-1,			"input device"},
3551 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3552 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3553 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3554 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3555 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3556 	{PCIC_DOCKING,		-1,			"docking station"},
3557 	{PCIC_PROCESSOR,	-1,			"processor"},
3558 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3559 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3560 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3561 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3562 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3563 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3564 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3565 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3566 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3567 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3568 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3569 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3570 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3571 	{PCIC_SATCOM,		-1,			"satellite communication"},
3572 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3573 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3574 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3575 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3576 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3577 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3578 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3579 	{PCIC_DASP,		-1,			"dasp"},
3580 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3581 	{0, 0,		NULL}
3582 };
3583 
3584 void
3585 pci_probe_nomatch(device_t dev, device_t child)
3586 {
3587 	int	i;
3588 	char	*cp, *scp, *device;
3589 
3590 	/*
3591 	 * Look for a listing for this device in a loaded device database.
3592 	 */
3593 	if ((device = pci_describe_device(child)) != NULL) {
3594 		device_printf(dev, "<%s>", device);
3595 		free(device, M_DEVBUF);
3596 	} else {
3597 		/*
3598 		 * Scan the class/subclass descriptions for a general
3599 		 * description.
3600 		 */
3601 		cp = "unknown";
3602 		scp = NULL;
3603 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3604 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3605 				if (pci_nomatch_tab[i].subclass == -1) {
3606 					cp = pci_nomatch_tab[i].desc;
3607 				} else if (pci_nomatch_tab[i].subclass ==
3608 				    pci_get_subclass(child)) {
3609 					scp = pci_nomatch_tab[i].desc;
3610 				}
3611 			}
3612 		}
3613 		device_printf(dev, "<%s%s%s>",
3614 		    cp ? cp : "",
3615 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3616 		    scp ? scp : "");
3617 	}
3618 	printf(" at device %d.%d (no driver attached)\n",
3619 	    pci_get_slot(child), pci_get_function(child));
3620 	pci_cfg_save(child, device_get_ivars(child), 1);
3621 	return;
3622 }
3623 
3624 /*
3625  * Parse the PCI device database, if loaded, and return a pointer to a
3626  * description of the device.
3627  *
3628  * The database is flat text formatted as follows:
3629  *
3630  * Any line not in a valid format is ignored.
3631  * Lines are terminated with newline '\n' characters.
3632  *
3633  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3634  * the vendor name.
3635  *
3636  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3637  * - devices cannot be listed without a corresponding VENDOR line.
3638  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3639  * another TAB, then the device name.
3640  */
3641 
3642 /*
3643  * Assuming (ptr) points to the beginning of a line in the database,
3644  * return the vendor or device and description of the next entry.
3645  * The value of (vendor) or (device) inappropriate for the entry type
3646  * is set to -1.  Returns nonzero at the end of the database.
3647  *
3648  * Note that this is slightly unrobust in the face of corrupt data;
3649  * we attempt to safeguard against this by spamming the end of the
3650  * database with a newline when we initialise.
3651  */
3652 static int
3653 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3654 {
3655 	char	*cp = *ptr;
3656 	int	left;
3657 
3658 	*device = -1;
3659 	*vendor = -1;
3660 	**desc = '\0';
3661 	for (;;) {
3662 		left = pci_vendordata_size - (cp - pci_vendordata);
3663 		if (left <= 0) {
3664 			*ptr = cp;
3665 			return(1);
3666 		}
3667 
3668 		/* vendor entry? */
3669 		if (*cp != '\t' &&
3670 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3671 			break;
3672 		/* device entry? */
3673 		if (*cp == '\t' &&
3674 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3675 			break;
3676 
3677 		/* skip to next line */
3678 		while (*cp != '\n' && left > 0) {
3679 			cp++;
3680 			left--;
3681 		}
3682 		if (*cp == '\n') {
3683 			cp++;
3684 			left--;
3685 		}
3686 	}
3687 	/* skip to next line */
3688 	while (*cp != '\n' && left > 0) {
3689 		cp++;
3690 		left--;
3691 	}
3692 	if (*cp == '\n' && left > 0)
3693 		cp++;
3694 	*ptr = cp;
3695 	return(0);
3696 }
3697 
3698 static char *
3699 pci_describe_device(device_t dev)
3700 {
3701 	int	vendor, device;
3702 	char	*desc, *vp, *dp, *line;
3703 
3704 	desc = vp = dp = NULL;
3705 
3706 	/*
3707 	 * If we have no vendor data, we can't do anything.
3708 	 */
3709 	if (pci_vendordata == NULL)
3710 		goto out;
3711 
3712 	/*
3713 	 * Scan the vendor data looking for this device
3714 	 */
3715 	line = pci_vendordata;
3716 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3717 		goto out;
3718 	for (;;) {
3719 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3720 			goto out;
3721 		if (vendor == pci_get_vendor(dev))
3722 			break;
3723 	}
3724 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3725 		goto out;
3726 	for (;;) {
3727 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3728 			*dp = 0;
3729 			break;
3730 		}
3731 		if (vendor != -1) {
3732 			*dp = 0;
3733 			break;
3734 		}
3735 		if (device == pci_get_device(dev))
3736 			break;
3737 	}
3738 	if (dp[0] == '\0')
3739 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3740 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3741 	    NULL)
3742 		sprintf(desc, "%s, %s", vp, dp);
3743  out:
3744 	if (vp != NULL)
3745 		free(vp, M_DEVBUF);
3746 	if (dp != NULL)
3747 		free(dp, M_DEVBUF);
3748 	return(desc);
3749 }
3750 
3751 int
3752 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3753 {
3754 	struct pci_devinfo *dinfo;
3755 	pcicfgregs *cfg;
3756 
3757 	dinfo = device_get_ivars(child);
3758 	cfg = &dinfo->cfg;
3759 
3760 	switch (which) {
3761 	case PCI_IVAR_ETHADDR:
3762 		/*
3763 		 * The generic accessor doesn't deal with failure, so
3764 		 * we set the return value, then return an error.
3765 		 */
3766 		*((uint8_t **) result) = NULL;
3767 		return (EINVAL);
3768 	case PCI_IVAR_SUBVENDOR:
3769 		*result = cfg->subvendor;
3770 		break;
3771 	case PCI_IVAR_SUBDEVICE:
3772 		*result = cfg->subdevice;
3773 		break;
3774 	case PCI_IVAR_VENDOR:
3775 		*result = cfg->vendor;
3776 		break;
3777 	case PCI_IVAR_DEVICE:
3778 		*result = cfg->device;
3779 		break;
3780 	case PCI_IVAR_DEVID:
3781 		*result = (cfg->device << 16) | cfg->vendor;
3782 		break;
3783 	case PCI_IVAR_CLASS:
3784 		*result = cfg->baseclass;
3785 		break;
3786 	case PCI_IVAR_SUBCLASS:
3787 		*result = cfg->subclass;
3788 		break;
3789 	case PCI_IVAR_PROGIF:
3790 		*result = cfg->progif;
3791 		break;
3792 	case PCI_IVAR_REVID:
3793 		*result = cfg->revid;
3794 		break;
3795 	case PCI_IVAR_INTPIN:
3796 		*result = cfg->intpin;
3797 		break;
3798 	case PCI_IVAR_IRQ:
3799 		*result = cfg->intline;
3800 		break;
3801 	case PCI_IVAR_DOMAIN:
3802 		*result = cfg->domain;
3803 		break;
3804 	case PCI_IVAR_BUS:
3805 		*result = cfg->bus;
3806 		break;
3807 	case PCI_IVAR_SLOT:
3808 		*result = cfg->slot;
3809 		break;
3810 	case PCI_IVAR_FUNCTION:
3811 		*result = cfg->func;
3812 		break;
3813 	case PCI_IVAR_CMDREG:
3814 		*result = cfg->cmdreg;
3815 		break;
3816 	case PCI_IVAR_CACHELNSZ:
3817 		*result = cfg->cachelnsz;
3818 		break;
3819 	case PCI_IVAR_MINGNT:
3820 		*result = cfg->mingnt;
3821 		break;
3822 	case PCI_IVAR_MAXLAT:
3823 		*result = cfg->maxlat;
3824 		break;
3825 	case PCI_IVAR_LATTIMER:
3826 		*result = cfg->lattimer;
3827 		break;
3828 	default:
3829 		return (ENOENT);
3830 	}
3831 	return (0);
3832 }
3833 
3834 int
3835 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3836 {
3837 	struct pci_devinfo *dinfo;
3838 
3839 	dinfo = device_get_ivars(child);
3840 
3841 	switch (which) {
3842 	case PCI_IVAR_INTPIN:
3843 		dinfo->cfg.intpin = value;
3844 		return (0);
3845 	case PCI_IVAR_ETHADDR:
3846 	case PCI_IVAR_SUBVENDOR:
3847 	case PCI_IVAR_SUBDEVICE:
3848 	case PCI_IVAR_VENDOR:
3849 	case PCI_IVAR_DEVICE:
3850 	case PCI_IVAR_DEVID:
3851 	case PCI_IVAR_CLASS:
3852 	case PCI_IVAR_SUBCLASS:
3853 	case PCI_IVAR_PROGIF:
3854 	case PCI_IVAR_REVID:
3855 	case PCI_IVAR_IRQ:
3856 	case PCI_IVAR_DOMAIN:
3857 	case PCI_IVAR_BUS:
3858 	case PCI_IVAR_SLOT:
3859 	case PCI_IVAR_FUNCTION:
3860 		return (EINVAL);	/* disallow for now */
3861 
3862 	default:
3863 		return (ENOENT);
3864 	}
3865 }
3866 
3867 
3868 #include "opt_ddb.h"
3869 #ifdef DDB
3870 #include <ddb/ddb.h>
3871 #include <sys/cons.h>
3872 
3873 /*
3874  * List resources based on pci map registers, used for within ddb
3875  */
3876 
3877 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3878 {
3879 	struct pci_devinfo *dinfo;
3880 	struct devlist *devlist_head;
3881 	struct pci_conf *p;
3882 	const char *name;
3883 	int i, error, none_count;
3884 
3885 	none_count = 0;
3886 	/* get the head of the device queue */
3887 	devlist_head = &pci_devq;
3888 
3889 	/*
3890 	 * Go through the list of devices and print out devices
3891 	 */
3892 	for (error = 0, i = 0,
3893 	     dinfo = STAILQ_FIRST(devlist_head);
3894 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3895 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3896 
3897 		/* Populate pd_name and pd_unit */
3898 		name = NULL;
3899 		if (dinfo->cfg.dev)
3900 			name = device_get_name(dinfo->cfg.dev);
3901 
3902 		p = &dinfo->conf;
3903 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3904 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3905 			(name && *name) ? name : "none",
3906 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3907 			none_count++,
3908 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3909 			p->pc_sel.pc_func, (p->pc_class << 16) |
3910 			(p->pc_subclass << 8) | p->pc_progif,
3911 			(p->pc_subdevice << 16) | p->pc_subvendor,
3912 			(p->pc_device << 16) | p->pc_vendor,
3913 			p->pc_revid, p->pc_hdr);
3914 	}
3915 }
3916 #endif /* DDB */
3917 
3918 static struct resource *
3919 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3920     u_long start, u_long end, u_long count, u_int flags)
3921 {
3922 	struct pci_devinfo *dinfo = device_get_ivars(child);
3923 	struct resource_list *rl = &dinfo->resources;
3924 	struct resource_list_entry *rle;
3925 	struct resource *res;
3926 	struct pci_map *pm;
3927 	pci_addr_t map, testval;
3928 	int mapsize;
3929 
3930 	res = NULL;
3931 	pm = pci_find_bar(child, *rid);
3932 	if (pm != NULL) {
3933 		/* This is a BAR that we failed to allocate earlier. */
3934 		mapsize = pm->pm_size;
3935 		map = pm->pm_value;
3936 	} else {
3937 		/*
3938 		 * Weed out the bogons, and figure out how large the
3939 		 * BAR/map is.  BARs that read back 0 here are bogus
3940 		 * and unimplemented.  Note: atapci in legacy mode are
3941 		 * special and handled elsewhere in the code.  If you
3942 		 * have a atapci device in legacy mode and it fails
3943 		 * here, that other code is broken.
3944 		 */
3945 		pci_read_bar(child, *rid, &map, &testval);
3946 
3947 		/*
3948 		 * Determine the size of the BAR and ignore BARs with a size
3949 		 * of 0.  Device ROM BARs use a different mask value.
3950 		 */
3951 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3952 			mapsize = pci_romsize(testval);
3953 		else
3954 			mapsize = pci_mapsize(testval);
3955 		if (mapsize == 0)
3956 			goto out;
3957 		pm = pci_add_bar(child, *rid, map, mapsize);
3958 	}
3959 
3960 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3961 		if (type != SYS_RES_MEMORY) {
3962 			if (bootverbose)
3963 				device_printf(dev,
3964 				    "child %s requested type %d for rid %#x,"
3965 				    " but the BAR says it is an memio\n",
3966 				    device_get_nameunit(child), type, *rid);
3967 			goto out;
3968 		}
3969 	} else {
3970 		if (type != SYS_RES_IOPORT) {
3971 			if (bootverbose)
3972 				device_printf(dev,
3973 				    "child %s requested type %d for rid %#x,"
3974 				    " but the BAR says it is an ioport\n",
3975 				    device_get_nameunit(child), type, *rid);
3976 			goto out;
3977 		}
3978 	}
3979 
3980 	/*
3981 	 * For real BARs, we need to override the size that
3982 	 * the driver requests, because that's what the BAR
3983 	 * actually uses and we would otherwise have a
3984 	 * situation where we might allocate the excess to
3985 	 * another driver, which won't work.
3986 	 */
3987 	count = (pci_addr_t)1 << mapsize;
3988 	if (RF_ALIGNMENT(flags) < mapsize)
3989 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3990 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3991 		flags |= RF_PREFETCHABLE;
3992 
3993 	/*
3994 	 * Allocate enough resource, and then write back the
3995 	 * appropriate BAR for that resource.
3996 	 */
3997 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3998 	    start, end, count, flags & ~RF_ACTIVE);
3999 	if (res == NULL) {
4000 		device_printf(child,
4001 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4002 		    count, *rid, type, start, end);
4003 		goto out;
4004 	}
4005 	resource_list_add(rl, type, *rid, start, end, count);
4006 	rle = resource_list_find(rl, type, *rid);
4007 	if (rle == NULL)
4008 		panic("pci_reserve_map: unexpectedly can't find resource.");
4009 	rle->res = res;
4010 	rle->start = rman_get_start(res);
4011 	rle->end = rman_get_end(res);
4012 	rle->count = count;
4013 	rle->flags = RLE_RESERVED;
4014 	if (bootverbose)
4015 		device_printf(child,
4016 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4017 		    count, *rid, type, rman_get_start(res));
4018 	map = rman_get_start(res);
4019 	pci_write_bar(child, pm, map);
4020 out:;
4021 	return (res);
4022 }
4023 
4024 
4025 struct resource *
4026 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4027 		   u_long start, u_long end, u_long count, u_int flags)
4028 {
4029 	struct pci_devinfo *dinfo = device_get_ivars(child);
4030 	struct resource_list *rl = &dinfo->resources;
4031 	struct resource_list_entry *rle;
4032 	struct resource *res;
4033 	pcicfgregs *cfg = &dinfo->cfg;
4034 
4035 	if (device_get_parent(child) != dev)
4036 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4037 		    type, rid, start, end, count, flags));
4038 
4039 	/*
4040 	 * Perform lazy resource allocation
4041 	 */
4042 	switch (type) {
4043 	case SYS_RES_IRQ:
4044 		/*
4045 		 * Can't alloc legacy interrupt once MSI messages have
4046 		 * been allocated.
4047 		 */
4048 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4049 		    cfg->msix.msix_alloc > 0))
4050 			return (NULL);
4051 
4052 		/*
4053 		 * If the child device doesn't have an interrupt
4054 		 * routed and is deserving of an interrupt, try to
4055 		 * assign it one.
4056 		 */
4057 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4058 		    (cfg->intpin != 0))
4059 			pci_assign_interrupt(dev, child, 0);
4060 		break;
4061 	case SYS_RES_IOPORT:
4062 	case SYS_RES_MEMORY:
4063 #ifdef NEW_PCIB
4064 		/*
4065 		 * PCI-PCI bridge I/O window resources are not BARs.
4066 		 * For those allocations just pass the request up the
4067 		 * tree.
4068 		 */
4069 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4070 			switch (*rid) {
4071 			case PCIR_IOBASEL_1:
4072 			case PCIR_MEMBASE_1:
4073 			case PCIR_PMBASEL_1:
4074 				/*
4075 				 * XXX: Should we bother creating a resource
4076 				 * list entry?
4077 				 */
4078 				return (bus_generic_alloc_resource(dev, child,
4079 				    type, rid, start, end, count, flags));
4080 			}
4081 		}
4082 #endif
4083 		/* Reserve resources for this BAR if needed. */
4084 		rle = resource_list_find(rl, type, *rid);
4085 		if (rle == NULL) {
4086 			res = pci_reserve_map(dev, child, type, rid, start, end,
4087 			    count, flags);
4088 			if (res == NULL)
4089 				return (NULL);
4090 		}
4091 	}
4092 	return (resource_list_alloc(rl, dev, child, type, rid,
4093 	    start, end, count, flags));
4094 }
4095 
4096 int
4097 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4098     struct resource *r)
4099 {
4100 	struct pci_devinfo *dinfo;
4101 	int error;
4102 
4103 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4104 	if (error)
4105 		return (error);
4106 
4107 	/* Enable decoding in the command register when activating BARs. */
4108 	if (device_get_parent(child) == dev) {
4109 		/* Device ROMs need their decoding explicitly enabled. */
4110 		dinfo = device_get_ivars(child);
4111 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4112 			pci_write_bar(child, pci_find_bar(child, rid),
4113 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4114 		switch (type) {
4115 		case SYS_RES_IOPORT:
4116 		case SYS_RES_MEMORY:
4117 			error = PCI_ENABLE_IO(dev, child, type);
4118 			break;
4119 		}
4120 	}
4121 	return (error);
4122 }
4123 
4124 int
4125 pci_deactivate_resource(device_t dev, device_t child, int type,
4126     int rid, struct resource *r)
4127 {
4128 	struct pci_devinfo *dinfo;
4129 	int error;
4130 
4131 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4132 	if (error)
4133 		return (error);
4134 
4135 	/* Disable decoding for device ROMs. */
4136 	if (device_get_parent(child) == dev) {
4137 		dinfo = device_get_ivars(child);
4138 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4139 			pci_write_bar(child, pci_find_bar(child, rid),
4140 			    rman_get_start(r));
4141 	}
4142 	return (0);
4143 }
4144 
4145 void
4146 pci_delete_child(device_t dev, device_t child)
4147 {
4148 	struct resource_list_entry *rle;
4149 	struct resource_list *rl;
4150 	struct pci_devinfo *dinfo;
4151 
4152 	dinfo = device_get_ivars(child);
4153 	rl = &dinfo->resources;
4154 
4155 	if (device_is_attached(child))
4156 		device_detach(child);
4157 
4158 	/* Turn off access to resources we're about to free */
4159 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4160 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4161 
4162 	/* Free all allocated resources */
4163 	STAILQ_FOREACH(rle, rl, link) {
4164 		if (rle->res) {
4165 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4166 			    resource_list_busy(rl, rle->type, rle->rid)) {
4167 				pci_printf(&dinfo->cfg,
4168 				    "Resource still owned, oops. "
4169 				    "(type=%d, rid=%d, addr=%lx)\n",
4170 				    rle->type, rle->rid,
4171 				    rman_get_start(rle->res));
4172 				bus_release_resource(child, rle->type, rle->rid,
4173 				    rle->res);
4174 			}
4175 			resource_list_unreserve(rl, dev, child, rle->type,
4176 			    rle->rid);
4177 		}
4178 	}
4179 	resource_list_free(rl);
4180 
4181 	device_delete_child(dev, child);
4182 	pci_freecfg(dinfo);
4183 }
4184 
4185 void
4186 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4187 {
4188 	struct pci_devinfo *dinfo;
4189 	struct resource_list *rl;
4190 	struct resource_list_entry *rle;
4191 
4192 	if (device_get_parent(child) != dev)
4193 		return;
4194 
4195 	dinfo = device_get_ivars(child);
4196 	rl = &dinfo->resources;
4197 	rle = resource_list_find(rl, type, rid);
4198 	if (rle == NULL)
4199 		return;
4200 
4201 	if (rle->res) {
4202 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4203 		    resource_list_busy(rl, type, rid)) {
4204 			device_printf(dev, "delete_resource: "
4205 			    "Resource still owned by child, oops. "
4206 			    "(type=%d, rid=%d, addr=%lx)\n",
4207 			    type, rid, rman_get_start(rle->res));
4208 			return;
4209 		}
4210 
4211 #ifndef __PCI_BAR_ZERO_VALID
4212 		/*
4213 		 * If this is a BAR, clear the BAR so it stops
4214 		 * decoding before releasing the resource.
4215 		 */
4216 		switch (type) {
4217 		case SYS_RES_IOPORT:
4218 		case SYS_RES_MEMORY:
4219 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4220 			break;
4221 		}
4222 #endif
4223 		resource_list_unreserve(rl, dev, child, type, rid);
4224 	}
4225 	resource_list_delete(rl, type, rid);
4226 }
4227 
4228 struct resource_list *
4229 pci_get_resource_list (device_t dev, device_t child)
4230 {
4231 	struct pci_devinfo *dinfo = device_get_ivars(child);
4232 
4233 	return (&dinfo->resources);
4234 }
4235 
4236 uint32_t
4237 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4238 {
4239 	struct pci_devinfo *dinfo = device_get_ivars(child);
4240 	pcicfgregs *cfg = &dinfo->cfg;
4241 
4242 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4243 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4244 }
4245 
4246 void
4247 pci_write_config_method(device_t dev, device_t child, int reg,
4248     uint32_t val, int width)
4249 {
4250 	struct pci_devinfo *dinfo = device_get_ivars(child);
4251 	pcicfgregs *cfg = &dinfo->cfg;
4252 
4253 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4254 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4255 }
4256 
4257 int
4258 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4259     size_t buflen)
4260 {
4261 
4262 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4263 	    pci_get_function(child));
4264 	return (0);
4265 }
4266 
4267 int
4268 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4269     size_t buflen)
4270 {
4271 	struct pci_devinfo *dinfo;
4272 	pcicfgregs *cfg;
4273 
4274 	dinfo = device_get_ivars(child);
4275 	cfg = &dinfo->cfg;
4276 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4277 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4278 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4279 	    cfg->progif);
4280 	return (0);
4281 }
4282 
4283 int
4284 pci_assign_interrupt_method(device_t dev, device_t child)
4285 {
4286 	struct pci_devinfo *dinfo = device_get_ivars(child);
4287 	pcicfgregs *cfg = &dinfo->cfg;
4288 
4289 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4290 	    cfg->intpin));
4291 }
4292 
4293 static int
4294 pci_modevent(module_t mod, int what, void *arg)
4295 {
4296 	static struct cdev *pci_cdev;
4297 
4298 	switch (what) {
4299 	case MOD_LOAD:
4300 		STAILQ_INIT(&pci_devq);
4301 		pci_generation = 0;
4302 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4303 		    "pci");
4304 		pci_load_vendor_data();
4305 		break;
4306 
4307 	case MOD_UNLOAD:
4308 		destroy_dev(pci_cdev);
4309 		break;
4310 	}
4311 
4312 	return (0);
4313 }
4314 
4315 void
4316 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4317 {
4318 
4319 	/*
4320 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4321 	 * which we know need special treatment.  Type 2 devices are
4322 	 * cardbus bridges which also require special treatment.
4323 	 * Other types are unknown, and we err on the side of safety
4324 	 * by ignoring them.
4325 	 */
4326 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4327 		return;
4328 
4329 	/*
4330 	 * Restore the device to full power mode.  We must do this
4331 	 * before we restore the registers because moving from D3 to
4332 	 * D0 will cause the chip's BARs and some other registers to
4333 	 * be reset to some unknown power on reset values.  Cut down
4334 	 * the noise on boot by doing nothing if we are already in
4335 	 * state D0.
4336 	 */
4337 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4338 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4339 	pci_restore_bars(dev);
4340 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4341 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4342 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4343 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4344 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4345 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4346 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4347 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4348 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4349 
4350 	/* Restore MSI and MSI-X configurations if they are present. */
4351 	if (dinfo->cfg.msi.msi_location != 0)
4352 		pci_resume_msi(dev);
4353 	if (dinfo->cfg.msix.msix_location != 0)
4354 		pci_resume_msix(dev);
4355 }
4356 
4357 void
4358 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4359 {
4360 	uint32_t cls;
4361 	int ps;
4362 
4363 	/*
4364 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4365 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4366 	 * which also require special treatment.  Other types are unknown, and
4367 	 * we err on the side of safety by ignoring them.  Powering down
4368 	 * bridges should not be undertaken lightly.
4369 	 */
4370 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4371 		return;
4372 
4373 	/*
4374 	 * Some drivers apparently write to these registers w/o updating our
4375 	 * cached copy.  No harm happens if we update the copy, so do so here
4376 	 * so we can restore them.  The COMMAND register is modified by the
4377 	 * bus w/o updating the cache.  This should represent the normally
4378 	 * writable portion of the 'defined' part of type 0 headers.  In
4379 	 * theory we also need to save/restore the PCI capability structures
4380 	 * we know about, but apart from power we don't know any that are
4381 	 * writable.
4382 	 */
4383 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4384 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4385 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4386 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4387 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4388 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4389 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4390 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4391 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4392 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4393 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4394 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4395 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4396 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4397 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4398 
4399 	/*
4400 	 * don't set the state for display devices, base peripherals and
4401 	 * memory devices since bad things happen when they are powered down.
4402 	 * We should (a) have drivers that can easily detach and (b) use
4403 	 * generic drivers for these devices so that some device actually
4404 	 * attaches.  We need to make sure that when we implement (a) we don't
4405 	 * power the device down on a reattach.
4406 	 */
4407 	cls = pci_get_class(dev);
4408 	if (!setstate)
4409 		return;
4410 	switch (pci_do_power_nodriver)
4411 	{
4412 		case 0:		/* NO powerdown at all */
4413 			return;
4414 		case 1:		/* Conservative about what to power down */
4415 			if (cls == PCIC_STORAGE)
4416 				return;
4417 			/*FALLTHROUGH*/
4418 		case 2:		/* Agressive about what to power down */
4419 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4420 			    cls == PCIC_BASEPERIPH)
4421 				return;
4422 			/*FALLTHROUGH*/
4423 		case 3:		/* Power down everything */
4424 			break;
4425 	}
4426 	/*
4427 	 * PCI spec says we can only go into D3 state from D0 state.
4428 	 * Transition from D[12] into D0 before going to D3 state.
4429 	 */
4430 	ps = pci_get_powerstate(dev);
4431 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4432 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4433 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4434 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4435 }
4436