xref: /freebsd/sys/dev/pci/pci.c (revision c96ae1968a6ab7056427a739bce81bf07447c2d4)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static int		pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg);
99 #if 0
100 static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static int		pci_msi_blacklisted(void);
105 
106 static device_method_t pci_methods[] = {
107 	/* Device interface */
108 	DEVMETHOD(device_probe,		pci_probe),
109 	DEVMETHOD(device_attach,	pci_attach),
110 	DEVMETHOD(device_detach,	bus_generic_detach),
111 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112 	DEVMETHOD(device_suspend,	pci_suspend),
113 	DEVMETHOD(device_resume,	pci_resume),
114 
115 	/* Bus interface */
116 	DEVMETHOD(bus_print_child,	pci_print_child),
117 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120 	DEVMETHOD(bus_driver_added,	pci_driver_added),
121 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123 
124 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134 
135 	/* PCI interface */
136 	DEVMETHOD(pci_read_config,	pci_read_config_method),
137 	DEVMETHOD(pci_write_config,	pci_write_config_method),
138 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154 
155 	{ 0, 0 }
156 };
157 
158 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159 
160 static devclass_t pci_devclass;
161 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162 MODULE_VERSION(pci, 1);
163 
164 static char	*pci_vendordata;
165 static size_t	pci_vendordata_size;
166 
167 
168 struct pci_quirk {
169 	uint32_t devid;	/* Vendor/device of the card */
170 	int	type;
171 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173 	int	arg1;
174 	int	arg2;
175 };
176 
177 struct pci_quirk pci_quirks[] = {
178 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183 
184 	/*
185 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187 	 */
188 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190 
191 	/*
192 	 * MSI doesn't work on earlier Intel chipsets including
193 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194 	 */
195 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 
203 	/*
204 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205 	 * bridge.
206 	 */
207 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 
209 	{ 0 }
210 };
211 
212 /* map register information */
213 #define	PCI_MAPMEM	0x01	/* memory map */
214 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215 #define	PCI_MAPPORT	0x04	/* port map */
216 
217 struct devlist pci_devq;
218 uint32_t pci_generation;
219 uint32_t pci_numdevs = 0;
220 
221 /* sysctl vars */
222 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
223 
224 static int pci_enable_io_modes = 1;
225 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
226 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
227     &pci_enable_io_modes, 1,
228     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
229 enable these bits correctly.  We'd like to do this all the time, but there\n\
230 are some peripherals that this causes problems with.");
231 
232 static int pci_do_power_nodriver = 0;
233 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
234 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
235     &pci_do_power_nodriver, 0,
236   "Place a function into D3 state when no driver attaches to it.  0 means\n\
237 disable.  1 means conservatively place devices into D3 state.  2 means\n\
238 agressively place devices into D3 state.  3 means put absolutely everything\n\
239 in D3 state.");
240 
241 static int pci_do_power_resume = 1;
242 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
243 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
244     &pci_do_power_resume, 1,
245   "Transition from D3 -> D0 on resume.");
246 
247 static int pci_do_msi = 1;
248 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
249 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
250     "Enable support for MSI interrupts");
251 
252 static int pci_do_msix = 1;
253 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
254 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
255     "Enable support for MSI-X interrupts");
256 
257 static int pci_honor_msi_blacklist = 1;
258 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
259 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
260     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
261 
262 /* Find a device_t by bus/slot/function */
263 
264 device_t
265 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
266 {
267 	struct pci_devinfo *dinfo;
268 
269 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
270 		if ((dinfo->cfg.bus == bus) &&
271 		    (dinfo->cfg.slot == slot) &&
272 		    (dinfo->cfg.func == func)) {
273 			return (dinfo->cfg.dev);
274 		}
275 	}
276 
277 	return (NULL);
278 }
279 
280 /* Find a device_t by vendor/device ID */
281 
282 device_t
283 pci_find_device(uint16_t vendor, uint16_t device)
284 {
285 	struct pci_devinfo *dinfo;
286 
287 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
288 		if ((dinfo->cfg.vendor == vendor) &&
289 		    (dinfo->cfg.device == device)) {
290 			return (dinfo->cfg.dev);
291 		}
292 	}
293 
294 	return (NULL);
295 }
296 
297 /* return base address of memory or port map */
298 
299 static uint32_t
300 pci_mapbase(uint32_t mapreg)
301 {
302 	int mask = 0x03;
303 	if ((mapreg & 0x01) == 0)
304 		mask = 0x0f;
305 	return (mapreg & ~mask);
306 }
307 
308 /* return map type of memory or port map */
309 
310 static int
311 pci_maptype(unsigned mapreg)
312 {
313 	static uint8_t maptype[0x10] = {
314 		PCI_MAPMEM,		PCI_MAPPORT,
315 		PCI_MAPMEM,		0,
316 		PCI_MAPMEM,		PCI_MAPPORT,
317 		0,			0,
318 		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
319 		PCI_MAPMEM|PCI_MAPMEMP, 0,
320 		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
321 		0,			0,
322 	};
323 
324 	return maptype[mapreg & 0x0f];
325 }
326 
327 /* return log2 of map size decoded for memory or port map */
328 
329 static int
330 pci_mapsize(uint32_t testval)
331 {
332 	int ln2size;
333 
334 	testval = pci_mapbase(testval);
335 	ln2size = 0;
336 	if (testval != 0) {
337 		while ((testval & 1) == 0)
338 		{
339 			ln2size++;
340 			testval >>= 1;
341 		}
342 	}
343 	return (ln2size);
344 }
345 
346 /* return log2 of address range supported by map register */
347 
348 static int
349 pci_maprange(unsigned mapreg)
350 {
351 	int ln2range = 0;
352 	switch (mapreg & 0x07) {
353 	case 0x00:
354 	case 0x01:
355 	case 0x05:
356 		ln2range = 32;
357 		break;
358 	case 0x02:
359 		ln2range = 20;
360 		break;
361 	case 0x04:
362 		ln2range = 64;
363 		break;
364 	}
365 	return (ln2range);
366 }
367 
368 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
369 
370 static void
371 pci_fixancient(pcicfgregs *cfg)
372 {
373 	if (cfg->hdrtype != 0)
374 		return;
375 
376 	/* PCI to PCI bridges use header type 1 */
377 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
378 		cfg->hdrtype = 1;
379 }
380 
381 /* extract header type specific config data */
382 
383 static void
384 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
385 {
386 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
387 	switch (cfg->hdrtype) {
388 	case 0:
389 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
390 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
391 		cfg->nummaps	    = PCI_MAXMAPS_0;
392 		break;
393 	case 1:
394 		cfg->nummaps	    = PCI_MAXMAPS_1;
395 		break;
396 	case 2:
397 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
398 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
399 		cfg->nummaps	    = PCI_MAXMAPS_2;
400 		break;
401 	}
402 #undef REG
403 }
404 
405 /* read configuration header into pcicfgregs structure */
406 struct pci_devinfo *
407 pci_read_device(device_t pcib, int b, int s, int f, size_t size)
408 {
409 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
410 	pcicfgregs *cfg = NULL;
411 	struct pci_devinfo *devlist_entry;
412 	struct devlist *devlist_head;
413 
414 	devlist_head = &pci_devq;
415 
416 	devlist_entry = NULL;
417 
418 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
419 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
420 		if (devlist_entry == NULL)
421 			return (NULL);
422 
423 		cfg = &devlist_entry->cfg;
424 
425 		cfg->bus		= b;
426 		cfg->slot		= s;
427 		cfg->func		= f;
428 		cfg->vendor		= REG(PCIR_VENDOR, 2);
429 		cfg->device		= REG(PCIR_DEVICE, 2);
430 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
431 		cfg->statreg		= REG(PCIR_STATUS, 2);
432 		cfg->baseclass		= REG(PCIR_CLASS, 1);
433 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
434 		cfg->progif		= REG(PCIR_PROGIF, 1);
435 		cfg->revid		= REG(PCIR_REVID, 1);
436 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
437 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
438 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
439 		cfg->intpin		= REG(PCIR_INTPIN, 1);
440 		cfg->intline		= REG(PCIR_INTLINE, 1);
441 
442 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
443 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
444 
445 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
446 		cfg->hdrtype		&= ~PCIM_MFDEV;
447 
448 		pci_fixancient(cfg);
449 		pci_hdrtypedata(pcib, b, s, f, cfg);
450 
451 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
452 			pci_read_extcap(pcib, cfg);
453 
454 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
455 
456 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
457 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
458 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
459 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
460 
461 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
462 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
463 		devlist_entry->conf.pc_vendor = cfg->vendor;
464 		devlist_entry->conf.pc_device = cfg->device;
465 
466 		devlist_entry->conf.pc_class = cfg->baseclass;
467 		devlist_entry->conf.pc_subclass = cfg->subclass;
468 		devlist_entry->conf.pc_progif = cfg->progif;
469 		devlist_entry->conf.pc_revid = cfg->revid;
470 
471 		pci_numdevs++;
472 		pci_generation++;
473 	}
474 	return (devlist_entry);
475 #undef REG
476 }
477 
478 static void
479 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
480 {
481 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
482 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
483 #if defined(__i386__) || defined(__amd64__)
484 	uint64_t addr;
485 #endif
486 	uint32_t val;
487 	int	ptr, nextptr, ptrptr;
488 
489 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
490 	case 0:
491 	case 1:
492 		ptrptr = PCIR_CAP_PTR;
493 		break;
494 	case 2:
495 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
496 		break;
497 	default:
498 		return;		/* no extended capabilities support */
499 	}
500 	nextptr = REG(ptrptr, 1);	/* sanity check? */
501 
502 	/*
503 	 * Read capability entries.
504 	 */
505 	while (nextptr != 0) {
506 		/* Sanity check */
507 		if (nextptr > 255) {
508 			printf("illegal PCI extended capability offset %d\n",
509 			    nextptr);
510 			return;
511 		}
512 		/* Find the next entry */
513 		ptr = nextptr;
514 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
515 
516 		/* Process this entry */
517 		switch (REG(ptr + PCICAP_ID, 1)) {
518 		case PCIY_PMG:		/* PCI power management */
519 			if (cfg->pp.pp_cap == 0) {
520 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
521 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
522 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
523 				if ((nextptr - ptr) > PCIR_POWER_DATA)
524 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
525 			}
526 			break;
527 #if defined(__i386__) || defined(__amd64__)
528 		case PCIY_HT:		/* HyperTransport */
529 			/* Determine HT-specific capability type. */
530 			val = REG(ptr + PCIR_HT_COMMAND, 2);
531 			switch (val & PCIM_HTCMD_CAP_MASK) {
532 			case PCIM_HTCAP_MSI_MAPPING:
533 				/* Sanity check the mapping window. */
534 				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
535 				addr <<= 32;
536 				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
537 				if (addr != MSI_INTEL_ADDR_BASE)
538 					device_printf(pcib,
539 		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
540 					    cfg->bus, cfg->slot, cfg->func,
541 					    (long long)addr);
542 
543 				/* Enable MSI -> HT mapping. */
544 				val |= PCIM_HTCMD_MSI_ENABLE;
545 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
546 				break;
547 			}
548 			break;
549 #endif
550 		case PCIY_MSI:		/* PCI MSI */
551 			cfg->msi.msi_location = ptr;
552 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
553 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
554 						     PCIM_MSICTRL_MMC_MASK)>>1);
555 			break;
556 		case PCIY_MSIX:		/* PCI MSI-X */
557 			cfg->msix.msix_location = ptr;
558 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
559 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
560 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
561 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
562 			cfg->msix.msix_table_bar = PCIR_BAR(val &
563 			    PCIM_MSIX_BIR_MASK);
564 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
565 			val = REG(ptr + PCIR_MSIX_PBA, 4);
566 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
567 			    PCIM_MSIX_BIR_MASK);
568 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
569 			break;
570 		case PCIY_VPD:		/* PCI Vital Product Data */
571 			cfg->vpd.vpd_reg = ptr;
572 			pci_read_vpd(pcib, cfg);
573 			break;
574 		case PCIY_SUBVENDOR:
575 			/* Should always be true. */
576 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
577 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
578 				cfg->subvendor = val & 0xffff;
579 				cfg->subdevice = val >> 16;
580 			}
581 		default:
582 			break;
583 		}
584 	}
585 /* REG and WREG use carry through to next functions */
586 }
587 
588 /*
589  * PCI Vital Product Data
590  */
591 static uint32_t
592 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
593 {
594 
595 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
596 
597 	WREG(cfg->vpd.vpd_reg + 2, reg, 2);
598 	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) != 0x8000)
599 		DELAY(1);	/* limit looping */
600 
601 	return REG(cfg->vpd.vpd_reg + 4, 4);
602 }
603 
604 #if 0
605 static void
606 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
607 {
608 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
609 
610 	WREG(cfg->vpd.vpd_reg + 4, data, 4);
611 	WREG(cfg->vpd.vpd_reg + 2, reg | 0x8000, 2);
612 	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) == 0x8000)
613 		DELAY(1);	/* limit looping */
614 
615 	return;
616 }
617 #endif
618 
619 struct vpd_readstate {
620 	device_t	pcib;
621 	pcicfgregs	*cfg;
622 	uint32_t	val;
623 	int		bytesinval;
624 	int		off;
625 	uint8_t		cksum;
626 };
627 
628 static uint8_t
629 vpd_nextbyte(struct vpd_readstate *vrs)
630 {
631 	uint8_t byte;
632 
633 	if (vrs->bytesinval == 0) {
634 		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
635 		    vrs->off));
636 		vrs->off += 4;
637 		byte = vrs->val & 0xff;
638 		vrs->bytesinval = 3;
639 	} else {
640 		vrs->val = vrs->val >> 8;
641 		byte = vrs->val & 0xff;
642 		vrs->bytesinval--;
643 	}
644 
645 	vrs->cksum += byte;
646 	return byte;
647 }
648 
649 static void
650 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
651 {
652 	struct vpd_readstate vrs;
653 	int state;
654 	int name;
655 	int remain;
656 	int end;
657 	int i;
658 	uint8_t byte;
659 	int alloc, off;		/* alloc/off for RO/W arrays */
660 	int cksumvalid;
661 	int dflen;
662 
663 	/* init vpd reader */
664 	vrs.bytesinval = 0;
665 	vrs.off = 0;
666 	vrs.pcib = pcib;
667 	vrs.cfg = cfg;
668 	vrs.cksum = 0;
669 
670 	state = 0;
671 	name = remain = i = 0;	/* shut up stupid gcc */
672 	alloc = off = 0;	/* shut up stupid gcc */
673 	dflen = 0;		/* shut up stupid gcc */
674 	end = 0;
675 	cksumvalid = -1;
676 	for (; !end;) {
677 		byte = vpd_nextbyte(&vrs);
678 #if 0
679 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
680 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
681 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
682 #endif
683 		switch (state) {
684 		case 0:		/* item name */
685 			if (byte & 0x80) {
686 				remain = vpd_nextbyte(&vrs);
687 				remain |= vpd_nextbyte(&vrs) << 8;
688 				if (remain > (0x7f*4 - vrs.off)) {
689 					end = 1;
690 					printf(
691 			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
692 					    cfg->bus, cfg->slot, cfg->func,
693 					    remain);
694 				}
695 				name = byte & 0x7f;
696 			} else {
697 				remain = byte & 0x7;
698 				name = (byte >> 3) & 0xf;
699 			}
700 			switch (name) {
701 			case 0x2:	/* String */
702 				cfg->vpd.vpd_ident = malloc(remain + 1,
703 				    M_DEVBUF, M_WAITOK);
704 				i = 0;
705 				state = 1;
706 				break;
707 			case 0xf:	/* End */
708 				end = 1;
709 				state = -1;
710 				break;
711 			case 0x10:	/* VPD-R */
712 				alloc = 8;
713 				off = 0;
714 				cfg->vpd.vpd_ros = malloc(alloc *
715 				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
716 				    M_WAITOK);
717 				state = 2;
718 				break;
719 			case 0x11:	/* VPD-W */
720 				alloc = 8;
721 				off = 0;
722 				cfg->vpd.vpd_w = malloc(alloc *
723 				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
724 				    M_WAITOK);
725 				state = 5;
726 				break;
727 			default:	/* Invalid data, abort */
728 				end = 1;
729 				continue;
730 			}
731 			break;
732 
733 		case 1:	/* Identifier String */
734 			cfg->vpd.vpd_ident[i++] = byte;
735 			remain--;
736 			if (remain == 0)  {
737 				cfg->vpd.vpd_ident[i] = '\0';
738 				state = 0;
739 			}
740 			break;
741 
742 		case 2:	/* VPD-R Keyword Header */
743 			if (off == alloc) {
744 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
745 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
746 				    M_DEVBUF, M_WAITOK);
747 			}
748 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
749 			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
750 			dflen = vpd_nextbyte(&vrs);
751 			if (dflen == 0 &&
752 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
753 			    2) == 0) {
754 				/*
755 				 * if this happens, we can't trust the rest
756 				 * of the VPD.
757 				 */
758 				printf("pci%d:%d:%d: bad keyword length: %d\n",
759 				    cfg->bus, cfg->slot, cfg->func, dflen);
760 				cksumvalid = 0;
761 				end = 1;
762 				break;
763 			} else if (dflen == 0) {
764 				cfg->vpd.vpd_ros[off].value = malloc(1 *
765 				    sizeof *cfg->vpd.vpd_ros[off].value,
766 				    M_DEVBUF, M_WAITOK);
767 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
768 			} else
769 				cfg->vpd.vpd_ros[off].value = malloc(
770 				    (dflen + 1) *
771 				    sizeof *cfg->vpd.vpd_ros[off].value,
772 				    M_DEVBUF, M_WAITOK);
773 			remain -= 3;
774 			i = 0;
775 			/* keep in sync w/ state 3's transistions */
776 			if (dflen == 0 && remain == 0)
777 				state = 0;
778 			else if (dflen == 0)
779 				state = 2;
780 			else
781 				state = 3;
782 			break;
783 
784 		case 3:	/* VPD-R Keyword Value */
785 			cfg->vpd.vpd_ros[off].value[i++] = byte;
786 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
787 			    "RV", 2) == 0 && cksumvalid == -1) {
788 				if (vrs.cksum == 0)
789 					cksumvalid = 1;
790 				else {
791 					printf(
792 				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
793 					    cfg->bus, cfg->slot, cfg->func,
794 					    vrs.cksum);
795 					cksumvalid = 0;
796 					end = 1;
797 					break;
798 				}
799 			}
800 			dflen--;
801 			remain--;
802 			/* keep in sync w/ state 2's transistions */
803 			if (dflen == 0)
804 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
805 			if (dflen == 0 && remain == 0) {
806 				cfg->vpd.vpd_rocnt = off;
807 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
808 				    off * sizeof *cfg->vpd.vpd_ros,
809 				    M_DEVBUF, M_WAITOK);
810 				state = 0;
811 			} else if (dflen == 0)
812 				state = 2;
813 			break;
814 
815 		case 4:
816 			remain--;
817 			if (remain == 0)
818 				state = 0;
819 			break;
820 
821 		case 5:	/* VPD-W Keyword Header */
822 			if (off == alloc) {
823 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
824 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
825 				    M_DEVBUF, M_WAITOK);
826 			}
827 			cfg->vpd.vpd_w[off].keyword[0] = byte;
828 			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
829 			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
830 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
831 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
832 			    sizeof *cfg->vpd.vpd_w[off].value,
833 			    M_DEVBUF, M_WAITOK);
834 			remain -= 3;
835 			i = 0;
836 			/* keep in sync w/ state 6's transistions */
837 			if (dflen == 0 && remain == 0)
838 				state = 0;
839 			else if (dflen == 0)
840 				state = 5;
841 			else
842 				state = 6;
843 			break;
844 
845 		case 6:	/* VPD-W Keyword Value */
846 			cfg->vpd.vpd_w[off].value[i++] = byte;
847 			dflen--;
848 			remain--;
849 			/* keep in sync w/ state 5's transistions */
850 			if (dflen == 0)
851 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
852 			if (dflen == 0 && remain == 0) {
853 				cfg->vpd.vpd_wcnt = off;
854 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
855 				    off * sizeof *cfg->vpd.vpd_w,
856 				    M_DEVBUF, M_WAITOK);
857 				state = 0;
858 			} else if (dflen == 0)
859 				state = 5;
860 			break;
861 
862 		default:
863 			printf("pci%d:%d:%d: invalid state: %d\n",
864 			    cfg->bus, cfg->slot, cfg->func, state);
865 			end = 1;
866 			break;
867 		}
868 	}
869 
870 	if (cksumvalid == 0) {
871 		/* read-only data bad, clean up */
872 		for (; off; off--)
873 			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
874 
875 		free(cfg->vpd.vpd_ros, M_DEVBUF);
876 		cfg->vpd.vpd_ros = NULL;
877 	}
878 #undef REG
879 #undef WREG
880 }
881 
882 int
883 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
884 {
885 	struct pci_devinfo *dinfo = device_get_ivars(child);
886 	pcicfgregs *cfg = &dinfo->cfg;
887 
888 	*identptr = cfg->vpd.vpd_ident;
889 
890 	if (*identptr == NULL)
891 		return ENXIO;
892 
893 	return 0;
894 }
895 
896 int
897 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
898 	const char **vptr)
899 {
900 	struct pci_devinfo *dinfo = device_get_ivars(child);
901 	pcicfgregs *cfg = &dinfo->cfg;
902 	int i;
903 
904 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
905 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
906 		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
907 			*vptr = cfg->vpd.vpd_ros[i].value;
908 		}
909 
910 	if (i != cfg->vpd.vpd_rocnt)
911 		return 0;
912 
913 	*vptr = NULL;
914 	return ENXIO;
915 }
916 
917 /*
918  * Return the offset in configuration space of the requested extended
919  * capability entry or 0 if the specified capability was not found.
920  */
921 int
922 pci_find_extcap_method(device_t dev, device_t child, int capability,
923     int *capreg)
924 {
925 	struct pci_devinfo *dinfo = device_get_ivars(child);
926 	pcicfgregs *cfg = &dinfo->cfg;
927 	u_int32_t status;
928 	u_int8_t ptr;
929 
930 	/*
931 	 * Check the CAP_LIST bit of the PCI status register first.
932 	 */
933 	status = pci_read_config(child, PCIR_STATUS, 2);
934 	if (!(status & PCIM_STATUS_CAPPRESENT))
935 		return (ENXIO);
936 
937 	/*
938 	 * Determine the start pointer of the capabilities list.
939 	 */
940 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
941 	case 0:
942 	case 1:
943 		ptr = PCIR_CAP_PTR;
944 		break;
945 	case 2:
946 		ptr = PCIR_CAP_PTR_2;
947 		break;
948 	default:
949 		/* XXX: panic? */
950 		return (ENXIO);		/* no extended capabilities support */
951 	}
952 	ptr = pci_read_config(child, ptr, 1);
953 
954 	/*
955 	 * Traverse the capabilities list.
956 	 */
957 	while (ptr != 0) {
958 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
959 			if (capreg != NULL)
960 				*capreg = ptr;
961 			return (0);
962 		}
963 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
964 	}
965 
966 	return (ENOENT);
967 }
968 
969 /*
970  * Support for MSI-X message interrupts.
971  */
972 void
973 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
974 {
975 	struct pci_devinfo *dinfo = device_get_ivars(dev);
976 	pcicfgregs *cfg = &dinfo->cfg;
977 	uint32_t offset;
978 
979 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
980 	offset = cfg->msix.msix_table_offset + index * 16;
981 	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
982 	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
983 	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
984 }
985 
986 void
987 pci_mask_msix(device_t dev, u_int index)
988 {
989 	struct pci_devinfo *dinfo = device_get_ivars(dev);
990 	pcicfgregs *cfg = &dinfo->cfg;
991 	uint32_t offset, val;
992 
993 	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
994 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
995 	val = bus_read_4(cfg->msix.msix_table_res, offset);
996 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
997 		val |= PCIM_MSIX_VCTRL_MASK;
998 		bus_write_4(cfg->msix.msix_table_res, offset, val);
999 	}
1000 }
1001 
1002 void
1003 pci_unmask_msix(device_t dev, u_int index)
1004 {
1005 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1006 	pcicfgregs *cfg = &dinfo->cfg;
1007 	uint32_t offset, val;
1008 
1009 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1010 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1011 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1012 	if (val & PCIM_MSIX_VCTRL_MASK) {
1013 		val &= ~PCIM_MSIX_VCTRL_MASK;
1014 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1015 	}
1016 }
1017 
1018 int
1019 pci_pending_msix(device_t dev, u_int index)
1020 {
1021 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1022 	pcicfgregs *cfg = &dinfo->cfg;
1023 	uint32_t offset, bit;
1024 
1025 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1026 	offset = cfg->msix.msix_pba_offset + (index / 4) * 4;
1027 	bit = 1 << index % 32;
1028 	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1029 }
1030 
1031 /*
1032  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1033  * returned in *count.  After this function returns, each message will be
1034  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1035  */
1036 int
1037 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1038 {
1039 	struct pci_devinfo *dinfo = device_get_ivars(child);
1040 	pcicfgregs *cfg = &dinfo->cfg;
1041 	struct resource_list_entry *rle;
1042 	int actual, error, i, irq, max;
1043 
1044 	/* Don't let count == 0 get us into trouble. */
1045 	if (*count == 0)
1046 		return (EINVAL);
1047 
1048 	/* If rid 0 is allocated, then fail. */
1049 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1050 	if (rle != NULL && rle->res != NULL)
1051 		return (ENXIO);
1052 
1053 	/* Already have allocated messages? */
1054 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1055 		return (ENXIO);
1056 
1057 	/* If MSI is blacklisted for this system, fail. */
1058 	if (pci_msi_blacklisted())
1059 		return (ENXIO);
1060 
1061 	/* MSI-X capability present? */
1062 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1063 		return (ENODEV);
1064 
1065 	/* Make sure the appropriate BARs are mapped. */
1066 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1067 	    cfg->msix.msix_table_bar);
1068 	if (rle == NULL || rle->res == NULL ||
1069 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1070 		return (ENXIO);
1071 	cfg->msix.msix_table_res = rle->res;
1072 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1073 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1074 		    cfg->msix.msix_pba_bar);
1075 		if (rle == NULL || rle->res == NULL ||
1076 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1077 			return (ENXIO);
1078 	}
1079 	cfg->msix.msix_pba_res = rle->res;
1080 
1081 	if (bootverbose)
1082 		device_printf(child,
1083 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1084 		    *count, cfg->msix.msix_msgnum);
1085 	max = min(*count, cfg->msix.msix_msgnum);
1086 	for (i = 0; i < max; i++) {
1087 		/* Allocate a message. */
1088 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1089 		    &irq);
1090 		if (error)
1091 			break;
1092 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1093 		    irq, 1);
1094 	}
1095 	actual = i;
1096 
1097 	if (bootverbose) {
1098 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1099 		if (actual == 1)
1100 			device_printf(child, "using IRQ %lu for MSI-X\n",
1101 			    rle->start);
1102 		else {
1103 			int run;
1104 
1105 			/*
1106 			 * Be fancy and try to print contiguous runs of
1107 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1108 			 * 'run' is true if we are in a range.
1109 			 */
1110 			device_printf(child, "using IRQs %lu", rle->start);
1111 			irq = rle->start;
1112 			run = 0;
1113 			for (i = 1; i < actual; i++) {
1114 				rle = resource_list_find(&dinfo->resources,
1115 				    SYS_RES_IRQ, i + 1);
1116 
1117 				/* Still in a run? */
1118 				if (rle->start == irq + 1) {
1119 					run = 1;
1120 					irq++;
1121 					continue;
1122 				}
1123 
1124 				/* Finish previous range. */
1125 				if (run) {
1126 					printf("-%d", irq);
1127 					run = 0;
1128 				}
1129 
1130 				/* Start new range. */
1131 				printf(",%lu", rle->start);
1132 				irq = rle->start;
1133 			}
1134 
1135 			/* Unfinished range? */
1136 			if (run)
1137 				printf("%d", irq);
1138 			printf(" for MSI-X\n");
1139 		}
1140 	}
1141 
1142 	/* Mask all vectors. */
1143 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1144 		pci_mask_msix(child, i);
1145 
1146 	/* Update control register to enable MSI-X. */
1147 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1148 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1149 	    cfg->msix.msix_ctrl, 2);
1150 
1151 	/* Update counts of alloc'd messages. */
1152 	cfg->msix.msix_alloc = actual;
1153 	*count = actual;
1154 	return (0);
1155 }
1156 
1157 /*
1158  * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1159  * the first N messages in the MSI-X table.  However, device drivers may
1160  * want to use different layouts in the case that they do not allocate a
1161  * full table.  This method allows the driver to specify what layout it
1162  * wants.  It must be called after a successful pci_alloc_msix() but
1163  * before any of the associated SYS_RES_IRQ resources are allocated via
1164  * bus_alloc_resource().  The 'indices' array contains N (where N equals
1165  * the 'count' returned from pci_alloc_msix()) message indices.  The
1166  * indices are 1-based (meaning the first message is at index 1).  On
1167  * successful return, each of the messages in the 'indices' array will
1168  * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1169  * if indices contains { 2, 4 }, then upon successful return, the 'child'
1170  * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1171  */
1172 int
1173 pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1174 {
1175 	struct pci_devinfo *dinfo = device_get_ivars(child);
1176 	pcicfgregs *cfg = &dinfo->cfg;
1177 	struct resource_list_entry *rle;
1178 	int count, error, i, j, *irqs;
1179 
1180 	/* Sanity check the indices. */
1181 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1182 		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1183 			return (EINVAL);
1184 
1185 	/* Check for duplicates. */
1186 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1187 		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1188 			if (indices[i] == indices[j])
1189 				return (EINVAL);
1190 
1191 	/* Make sure none of the resources are allocated. */
1192 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1193 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1194 		if (rle == NULL)
1195 			continue;
1196 		if (rle->res != NULL)
1197 			return (EBUSY);
1198 		count++;
1199 	}
1200 
1201 	/* Save the IRQ values and free the existing resources. */
1202 	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1203 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1204 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1205 		if (rle == NULL)
1206 			continue;
1207 		irqs[count] = rle->start;
1208 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1209 		count++;
1210 	}
1211 
1212 	/* Map the IRQ values to the new message indices and rids. */
1213 	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1214 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1215 		    irqs[i], irqs[i], 1);
1216 		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1217 		    indices[i], irqs[i]);
1218 		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1219 	}
1220 	if (bootverbose) {
1221 		if (cfg->msix.msix_alloc == 1)
1222 			device_printf(child,
1223 			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1224 		else {
1225 			device_printf(child, "Remapped MSI-X IRQs to indices");
1226 			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1227 				printf(" %d,", indices[i]);
1228 			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1229 		}
1230 	}
1231 	free(irqs, M_TEMP);
1232 
1233 	return (0);
1234 }
1235 
1236 static int
1237 pci_release_msix(device_t dev, device_t child)
1238 {
1239 	struct pci_devinfo *dinfo = device_get_ivars(child);
1240 	pcicfgregs *cfg = &dinfo->cfg;
1241 	struct resource_list_entry *rle;
1242 	int count, i;
1243 
1244 	/* Do we have any messages to release? */
1245 	if (cfg->msix.msix_alloc == 0)
1246 		return (ENODEV);
1247 
1248 	/* Make sure none of the resources are allocated. */
1249 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1250 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1251 		if (rle == NULL)
1252 			continue;
1253 		if (rle->res != NULL)
1254 			return (EBUSY);
1255 		count++;
1256 	}
1257 
1258 	/* Update control register with to disable MSI-X. */
1259 	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1260 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1261 	    cfg->msix.msix_ctrl, 2);
1262 
1263 	/* Release the messages. */
1264 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1265 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1266 		if (rle == NULL)
1267 			continue;
1268 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1269 		    rle->start);
1270 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1271 		count++;
1272 	}
1273 
1274 	/* Update alloc count. */
1275 	cfg->msix.msix_alloc = 0;
1276 	return (0);
1277 }
1278 
1279 /*
1280  * Return the max supported MSI-X messages this device supports.
1281  * Basically, assuming the MD code can alloc messages, this function
1282  * should return the maximum value that pci_alloc_msix() can return.
1283  * Thus, it is subject to the tunables, etc.
1284  */
1285 int
1286 pci_msix_count_method(device_t dev, device_t child)
1287 {
1288 	struct pci_devinfo *dinfo = device_get_ivars(child);
1289 	pcicfgregs *cfg = &dinfo->cfg;
1290 
1291 	if (pci_do_msix && cfg->msix.msix_location != 0)
1292 		return (cfg->msix.msix_msgnum);
1293 	return (0);
1294 }
1295 
1296 /*
1297  * Support for MSI message signalled interrupts.
1298  */
1299 void
1300 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1301 {
1302 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1303 	pcicfgregs *cfg = &dinfo->cfg;
1304 
1305 	/* Write data and address values. */
1306 	cfg->msi.msi_addr = address;
1307 	cfg->msi.msi_data = data;
1308 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1309 	    address & 0xffffffff, 4);
1310 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1311 		pci_write_config(dev, cfg->msi.msi_location +
1312 		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1313 		pci_write_config(dev, cfg->msi.msi_location +
1314 		    PCIR_MSI_DATA_64BIT, data, 2);
1315 	} else
1316 		pci_write_config(dev, cfg->msi.msi_location +
1317 		    PCIR_MSI_DATA, data, 2);
1318 
1319 	/* Enable MSI in the control register. */
1320 	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1321 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1322 	    cfg->msi.msi_ctrl, 2);
1323 }
1324 
1325 /*
1326  * Restore MSI registers during resume.  If MSI is enabled then
1327  * restore the data and address registers in addition to the control
1328  * register.
1329  */
1330 static void
1331 pci_resume_msi(device_t dev)
1332 {
1333 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1334 	pcicfgregs *cfg = &dinfo->cfg;
1335 	uint64_t address;
1336 	uint16_t data;
1337 
1338 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1339 		address = cfg->msi.msi_addr;
1340 		data = cfg->msi.msi_data;
1341 		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1342 		    address & 0xffffffff, 4);
1343 		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1344 			pci_write_config(dev, cfg->msi.msi_location +
1345 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1346 			pci_write_config(dev, cfg->msi.msi_location +
1347 			    PCIR_MSI_DATA_64BIT, data, 2);
1348 		} else
1349 			pci_write_config(dev, cfg->msi.msi_location +
1350 			    PCIR_MSI_DATA, data, 2);
1351 	}
1352 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1353 	    cfg->msi.msi_ctrl, 2);
1354 }
1355 
1356 /*
1357  * Returns true if the specified device is blacklisted because MSI
1358  * doesn't work.
1359  */
1360 int
1361 pci_msi_device_blacklisted(device_t dev)
1362 {
1363 	struct pci_quirk *q;
1364 
1365 	if (!pci_honor_msi_blacklist)
1366 		return (0);
1367 
1368 	for (q = &pci_quirks[0]; q->devid; q++) {
1369 		if (q->devid == pci_get_devid(dev) &&
1370 		    q->type == PCI_QUIRK_DISABLE_MSI)
1371 			return (1);
1372 	}
1373 	return (0);
1374 }
1375 
1376 /*
1377  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1378  * we just check for blacklisted chipsets as represented by the
1379  * host-PCI bridge at device 0:0:0.  In the future, it may become
1380  * necessary to check other system attributes, such as the kenv values
1381  * that give the motherboard manufacturer and model number.
1382  */
1383 static int
1384 pci_msi_blacklisted(void)
1385 {
1386 	device_t dev;
1387 
1388 	if (!pci_honor_msi_blacklist)
1389 		return (0);
1390 
1391 	dev = pci_find_bsf(0, 0, 0);
1392 	if (dev != NULL)
1393 		return (pci_msi_device_blacklisted(dev));
1394 	return (0);
1395 }
1396 
1397 /*
1398  * Attempt to allocate *count MSI messages.  The actual number allocated is
1399  * returned in *count.  After this function returns, each message will be
1400  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1401  */
1402 int
1403 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1404 {
1405 	struct pci_devinfo *dinfo = device_get_ivars(child);
1406 	pcicfgregs *cfg = &dinfo->cfg;
1407 	struct resource_list_entry *rle;
1408 	int actual, error, i, irqs[32];
1409 	uint16_t ctrl;
1410 
1411 	/* Don't let count == 0 get us into trouble. */
1412 	if (*count == 0)
1413 		return (EINVAL);
1414 
1415 	/* If rid 0 is allocated, then fail. */
1416 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1417 	if (rle != NULL && rle->res != NULL)
1418 		return (ENXIO);
1419 
1420 	/* Already have allocated messages? */
1421 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1422 		return (ENXIO);
1423 
1424 	/* If MSI is blacklisted for this system, fail. */
1425 	if (pci_msi_blacklisted())
1426 		return (ENXIO);
1427 
1428 	/* MSI capability present? */
1429 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1430 		return (ENODEV);
1431 
1432 	if (bootverbose)
1433 		device_printf(child,
1434 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1435 		    *count, cfg->msi.msi_msgnum);
1436 
1437 	/* Don't ask for more than the device supports. */
1438 	actual = min(*count, cfg->msi.msi_msgnum);
1439 
1440 	/* Don't ask for more than 32 messages. */
1441 	actual = min(actual, 32);
1442 
1443 	/* MSI requires power of 2 number of messages. */
1444 	if (!powerof2(actual))
1445 		return (EINVAL);
1446 
1447 	for (;;) {
1448 		/* Try to allocate N messages. */
1449 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1450 		    cfg->msi.msi_msgnum, irqs);
1451 		if (error == 0)
1452 			break;
1453 		if (actual == 1)
1454 			return (error);
1455 
1456 		/* Try N / 2. */
1457 		actual >>= 1;
1458 	}
1459 
1460 	/*
1461 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1462 	 * resources in the irqs[] array, so add new resources
1463 	 * starting at rid 1.
1464 	 */
1465 	for (i = 0; i < actual; i++)
1466 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1467 		    irqs[i], irqs[i], 1);
1468 
1469 	if (bootverbose) {
1470 		if (actual == 1)
1471 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1472 		else {
1473 			int run;
1474 
1475 			/*
1476 			 * Be fancy and try to print contiguous runs
1477 			 * of IRQ values as ranges.  'run' is true if
1478 			 * we are in a range.
1479 			 */
1480 			device_printf(child, "using IRQs %d", irqs[0]);
1481 			run = 0;
1482 			for (i = 1; i < actual; i++) {
1483 
1484 				/* Still in a run? */
1485 				if (irqs[i] == irqs[i - 1] + 1) {
1486 					run = 1;
1487 					continue;
1488 				}
1489 
1490 				/* Finish previous range. */
1491 				if (run) {
1492 					printf("-%d", irqs[i - 1]);
1493 					run = 0;
1494 				}
1495 
1496 				/* Start new range. */
1497 				printf(",%d", irqs[i]);
1498 			}
1499 
1500 			/* Unfinished range? */
1501 			if (run)
1502 				printf("%d", irqs[actual - 1]);
1503 			printf(" for MSI\n");
1504 		}
1505 	}
1506 
1507 	/* Update control register with actual count and enable MSI. */
1508 	ctrl = cfg->msi.msi_ctrl;
1509 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1510 	ctrl |= (ffs(actual) - 1) << 4;
1511 	cfg->msi.msi_ctrl = ctrl;
1512 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1513 
1514 	/* Update counts of alloc'd messages. */
1515 	cfg->msi.msi_alloc = actual;
1516 	*count = actual;
1517 	return (0);
1518 }
1519 
1520 /* Release the MSI messages associated with this device. */
1521 int
1522 pci_release_msi_method(device_t dev, device_t child)
1523 {
1524 	struct pci_devinfo *dinfo = device_get_ivars(child);
1525 	pcicfgregs *cfg = &dinfo->cfg;
1526 	struct resource_list_entry *rle;
1527 	int error, i, irqs[32];
1528 
1529 	/* Try MSI-X first. */
1530 	error = pci_release_msix(dev, child);
1531 	if (error != ENODEV)
1532 		return (error);
1533 
1534 	/* Do we have any messages to release? */
1535 	if (cfg->msi.msi_alloc == 0)
1536 		return (ENODEV);
1537 	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1538 
1539 	/* Make sure none of the resources are allocated. */
1540 	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1541 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1542 		KASSERT(rle != NULL, ("missing MSI resource"));
1543 		if (rle->res != NULL)
1544 			return (EBUSY);
1545 		irqs[i] = rle->start;
1546 	}
1547 
1548 	/* Update control register with 0 count and disable MSI. */
1549 	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1550 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1551 	    cfg->msi.msi_ctrl, 2);
1552 
1553 	/* Release the messages. */
1554 	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1555 	    irqs);
1556 	for (i = 0; i < cfg->msi.msi_alloc; i++)
1557 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1558 
1559 	/* Update alloc count. */
1560 	cfg->msi.msi_alloc = 0;
1561 	return (0);
1562 }
1563 
1564 /*
1565  * Return the max supported MSI messages this device supports.
1566  * Basically, assuming the MD code can alloc messages, this function
1567  * should return the maximum value that pci_alloc_msi() can return.
1568  * Thus, it is subject to the tunables, etc.
1569  */
1570 int
1571 pci_msi_count_method(device_t dev, device_t child)
1572 {
1573 	struct pci_devinfo *dinfo = device_get_ivars(child);
1574 	pcicfgregs *cfg = &dinfo->cfg;
1575 
1576 	if (pci_do_msi && cfg->msi.msi_location != 0)
1577 		return (cfg->msi.msi_msgnum);
1578 	return (0);
1579 }
1580 
1581 /* free pcicfgregs structure and all depending data structures */
1582 
1583 int
1584 pci_freecfg(struct pci_devinfo *dinfo)
1585 {
1586 	struct devlist *devlist_head;
1587 	int i;
1588 
1589 	devlist_head = &pci_devq;
1590 
1591 	if (dinfo->cfg.vpd.vpd_reg) {
1592 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1593 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1594 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1595 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1596 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1597 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1598 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1599 	}
1600 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1601 	free(dinfo, M_DEVBUF);
1602 
1603 	/* increment the generation count */
1604 	pci_generation++;
1605 
1606 	/* we're losing one device */
1607 	pci_numdevs--;
1608 	return (0);
1609 }
1610 
1611 /*
1612  * PCI power manangement
1613  */
1614 int
1615 pci_set_powerstate_method(device_t dev, device_t child, int state)
1616 {
1617 	struct pci_devinfo *dinfo = device_get_ivars(child);
1618 	pcicfgregs *cfg = &dinfo->cfg;
1619 	uint16_t status;
1620 	int result, oldstate, highest, delay;
1621 
1622 	if (cfg->pp.pp_cap == 0)
1623 		return (EOPNOTSUPP);
1624 
1625 	/*
1626 	 * Optimize a no state change request away.  While it would be OK to
1627 	 * write to the hardware in theory, some devices have shown odd
1628 	 * behavior when going from D3 -> D3.
1629 	 */
1630 	oldstate = pci_get_powerstate(child);
1631 	if (oldstate == state)
1632 		return (0);
1633 
1634 	/*
1635 	 * The PCI power management specification states that after a state
1636 	 * transition between PCI power states, system software must
1637 	 * guarantee a minimal delay before the function accesses the device.
1638 	 * Compute the worst case delay that we need to guarantee before we
1639 	 * access the device.  Many devices will be responsive much more
1640 	 * quickly than this delay, but there are some that don't respond
1641 	 * instantly to state changes.  Transitions to/from D3 state require
1642 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1643 	 * is done below with DELAY rather than a sleeper function because
1644 	 * this function can be called from contexts where we cannot sleep.
1645 	 */
1646 	highest = (oldstate > state) ? oldstate : state;
1647 	if (highest == PCI_POWERSTATE_D3)
1648 	    delay = 10000;
1649 	else if (highest == PCI_POWERSTATE_D2)
1650 	    delay = 200;
1651 	else
1652 	    delay = 0;
1653 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1654 	    & ~PCIM_PSTAT_DMASK;
1655 	result = 0;
1656 	switch (state) {
1657 	case PCI_POWERSTATE_D0:
1658 		status |= PCIM_PSTAT_D0;
1659 		break;
1660 	case PCI_POWERSTATE_D1:
1661 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1662 			return (EOPNOTSUPP);
1663 		status |= PCIM_PSTAT_D1;
1664 		break;
1665 	case PCI_POWERSTATE_D2:
1666 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1667 			return (EOPNOTSUPP);
1668 		status |= PCIM_PSTAT_D2;
1669 		break;
1670 	case PCI_POWERSTATE_D3:
1671 		status |= PCIM_PSTAT_D3;
1672 		break;
1673 	default:
1674 		return (EINVAL);
1675 	}
1676 
1677 	if (bootverbose)
1678 		printf(
1679 		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1680 		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1681 		    oldstate, state);
1682 
1683 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1684 	if (delay)
1685 		DELAY(delay);
1686 	return (0);
1687 }
1688 
1689 int
1690 pci_get_powerstate_method(device_t dev, device_t child)
1691 {
1692 	struct pci_devinfo *dinfo = device_get_ivars(child);
1693 	pcicfgregs *cfg = &dinfo->cfg;
1694 	uint16_t status;
1695 	int result;
1696 
1697 	if (cfg->pp.pp_cap != 0) {
1698 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1699 		switch (status & PCIM_PSTAT_DMASK) {
1700 		case PCIM_PSTAT_D0:
1701 			result = PCI_POWERSTATE_D0;
1702 			break;
1703 		case PCIM_PSTAT_D1:
1704 			result = PCI_POWERSTATE_D1;
1705 			break;
1706 		case PCIM_PSTAT_D2:
1707 			result = PCI_POWERSTATE_D2;
1708 			break;
1709 		case PCIM_PSTAT_D3:
1710 			result = PCI_POWERSTATE_D3;
1711 			break;
1712 		default:
1713 			result = PCI_POWERSTATE_UNKNOWN;
1714 			break;
1715 		}
1716 	} else {
1717 		/* No support, device is always at D0 */
1718 		result = PCI_POWERSTATE_D0;
1719 	}
1720 	return (result);
1721 }
1722 
1723 /*
1724  * Some convenience functions for PCI device drivers.
1725  */
1726 
1727 static __inline void
1728 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1729 {
1730 	uint16_t	command;
1731 
1732 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1733 	command |= bit;
1734 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1735 }
1736 
1737 static __inline void
1738 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1739 {
1740 	uint16_t	command;
1741 
1742 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1743 	command &= ~bit;
1744 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1745 }
1746 
1747 int
1748 pci_enable_busmaster_method(device_t dev, device_t child)
1749 {
1750 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1751 	return (0);
1752 }
1753 
1754 int
1755 pci_disable_busmaster_method(device_t dev, device_t child)
1756 {
1757 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1758 	return (0);
1759 }
1760 
1761 int
1762 pci_enable_io_method(device_t dev, device_t child, int space)
1763 {
1764 	uint16_t command;
1765 	uint16_t bit;
1766 	char *error;
1767 
1768 	bit = 0;
1769 	error = NULL;
1770 
1771 	switch(space) {
1772 	case SYS_RES_IOPORT:
1773 		bit = PCIM_CMD_PORTEN;
1774 		error = "port";
1775 		break;
1776 	case SYS_RES_MEMORY:
1777 		bit = PCIM_CMD_MEMEN;
1778 		error = "memory";
1779 		break;
1780 	default:
1781 		return (EINVAL);
1782 	}
1783 	pci_set_command_bit(dev, child, bit);
1784 	/* Some devices seem to need a brief stall here, what do to? */
1785 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1786 	if (command & bit)
1787 		return (0);
1788 	device_printf(child, "failed to enable %s mapping!\n", error);
1789 	return (ENXIO);
1790 }
1791 
1792 int
1793 pci_disable_io_method(device_t dev, device_t child, int space)
1794 {
1795 	uint16_t command;
1796 	uint16_t bit;
1797 	char *error;
1798 
1799 	bit = 0;
1800 	error = NULL;
1801 
1802 	switch(space) {
1803 	case SYS_RES_IOPORT:
1804 		bit = PCIM_CMD_PORTEN;
1805 		error = "port";
1806 		break;
1807 	case SYS_RES_MEMORY:
1808 		bit = PCIM_CMD_MEMEN;
1809 		error = "memory";
1810 		break;
1811 	default:
1812 		return (EINVAL);
1813 	}
1814 	pci_clear_command_bit(dev, child, bit);
1815 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1816 	if (command & bit) {
1817 		device_printf(child, "failed to disable %s mapping!\n", error);
1818 		return (ENXIO);
1819 	}
1820 	return (0);
1821 }
1822 
1823 /*
1824  * New style pci driver.  Parent device is either a pci-host-bridge or a
1825  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1826  */
1827 
1828 void
1829 pci_print_verbose(struct pci_devinfo *dinfo)
1830 {
1831 	int i;
1832 
1833 	if (bootverbose) {
1834 		pcicfgregs *cfg = &dinfo->cfg;
1835 
1836 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1837 		    cfg->vendor, cfg->device, cfg->revid);
1838 		printf("\tbus=%d, slot=%d, func=%d\n",
1839 		    cfg->bus, cfg->slot, cfg->func);
1840 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1841 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1842 		    cfg->mfdev);
1843 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1844 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1845 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1846 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1847 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1848 		if (cfg->intpin > 0)
1849 			printf("\tintpin=%c, irq=%d\n",
1850 			    cfg->intpin +'a' -1, cfg->intline);
1851 		if (cfg->pp.pp_cap) {
1852 			uint16_t status;
1853 
1854 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1855 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1856 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1857 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1858 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1859 			    status & PCIM_PSTAT_DMASK);
1860 		}
1861 		if (cfg->vpd.vpd_reg) {
1862 			printf("\tVPD Ident: %s\n", cfg->vpd.vpd_ident);
1863 			for (i = 0; i < cfg->vpd.vpd_rocnt; i++) {
1864 				struct vpd_readonly *vrop;
1865 				vrop = &cfg->vpd.vpd_ros[i];
1866 				if (strncmp("CP", vrop->keyword, 2) == 0)
1867 					printf("\tCP: id %d, BAR%d, off %#x\n",
1868 					    vrop->value[0], vrop->value[1],
1869 					    le16toh(
1870 					      *(uint16_t *)&vrop->value[2]));
1871 				else if (strncmp("RV", vrop->keyword, 2) == 0)
1872 					printf("\tRV: %#hhx\n", vrop->value[0]);
1873 				else
1874 					printf("\t%.2s: %s\n", vrop->keyword,
1875 					    vrop->value);
1876 			}
1877 			for (i = 0; i < cfg->vpd.vpd_wcnt; i++) {
1878 				struct vpd_write *vwp;
1879 				vwp = &cfg->vpd.vpd_w[i];
1880 				if (strncmp("RW", vwp->keyword, 2) != 0)
1881 					printf("\t%.2s(%#x-%#x): %s\n",
1882 					    vwp->keyword, vwp->start,
1883 					    vwp->start + vwp->len, vwp->value);
1884 			}
1885 		}
1886 		if (cfg->msi.msi_location) {
1887 			int ctrl;
1888 
1889 			ctrl = cfg->msi.msi_ctrl;
1890 			printf("\tMSI supports %d message%s%s%s\n",
1891 			    cfg->msi.msi_msgnum,
1892 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1893 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1894 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1895 		}
1896 		if (cfg->msix.msix_location) {
1897 			printf("\tMSI-X supports %d message%s ",
1898 			    cfg->msix.msix_msgnum,
1899 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1900 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1901 				printf("in map 0x%x\n",
1902 				    cfg->msix.msix_table_bar);
1903 			else
1904 				printf("in maps 0x%x and 0x%x\n",
1905 				    cfg->msix.msix_table_bar,
1906 				    cfg->msix.msix_pba_bar);
1907 		}
1908 	}
1909 }
1910 
1911 static int
1912 pci_porten(device_t pcib, int b, int s, int f)
1913 {
1914 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1915 		& PCIM_CMD_PORTEN) != 0;
1916 }
1917 
1918 static int
1919 pci_memen(device_t pcib, int b, int s, int f)
1920 {
1921 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1922 		& PCIM_CMD_MEMEN) != 0;
1923 }
1924 
1925 /*
1926  * Add a resource based on a pci map register. Return 1 if the map
1927  * register is a 32bit map register or 2 if it is a 64bit register.
1928  */
1929 static int
1930 pci_add_map(device_t pcib, device_t bus, device_t dev,
1931     int b, int s, int f, int reg, struct resource_list *rl, int force,
1932     int prefetch)
1933 {
1934 	uint32_t map;
1935 	pci_addr_t base;
1936 	pci_addr_t start, end, count;
1937 	uint8_t ln2size;
1938 	uint8_t ln2range;
1939 	uint32_t testval;
1940 	uint16_t cmd;
1941 	int type;
1942 	int barlen;
1943 	struct resource *res;
1944 
1945 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1946 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1947 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1948 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1949 
1950 	if (pci_maptype(map) & PCI_MAPMEM)
1951 		type = SYS_RES_MEMORY;
1952 	else
1953 		type = SYS_RES_IOPORT;
1954 	ln2size = pci_mapsize(testval);
1955 	ln2range = pci_maprange(testval);
1956 	base = pci_mapbase(map);
1957 	barlen = ln2range == 64 ? 2 : 1;
1958 
1959 	/*
1960 	 * For I/O registers, if bottom bit is set, and the next bit up
1961 	 * isn't clear, we know we have a BAR that doesn't conform to the
1962 	 * spec, so ignore it.  Also, sanity check the size of the data
1963 	 * areas to the type of memory involved.  Memory must be at least
1964 	 * 16 bytes in size, while I/O ranges must be at least 4.
1965 	 */
1966 	if ((testval & 0x1) == 0x1 &&
1967 	    (testval & 0x2) != 0)
1968 		return (barlen);
1969 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1970 	    (type == SYS_RES_IOPORT && ln2size < 2))
1971 		return (barlen);
1972 
1973 	if (ln2range == 64)
1974 		/* Read the other half of a 64bit map register */
1975 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
1976 	if (bootverbose) {
1977 		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
1978 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
1979 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
1980 			printf(", port disabled\n");
1981 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
1982 			printf(", memory disabled\n");
1983 		else
1984 			printf(", enabled\n");
1985 	}
1986 
1987 	/*
1988 	 * If base is 0, then we have problems.  It is best to ignore
1989 	 * such entries for the moment.  These will be allocated later if
1990 	 * the driver specifically requests them.  However, some
1991 	 * removable busses look better when all resources are allocated,
1992 	 * so allow '0' to be overriden.
1993 	 *
1994 	 * Similarly treat maps whose values is the same as the test value
1995 	 * read back.  These maps have had all f's written to them by the
1996 	 * BIOS in an attempt to disable the resources.
1997 	 */
1998 	if (!force && (base == 0 || map == testval))
1999 		return (barlen);
2000 	if ((u_long)base != base) {
2001 		device_printf(bus,
2002 		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2003 		return (barlen);
2004 	}
2005 
2006 	/*
2007 	 * This code theoretically does the right thing, but has
2008 	 * undesirable side effects in some cases where peripherals
2009 	 * respond oddly to having these bits enabled.  Let the user
2010 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2011 	 * default).
2012 	 */
2013 	if (pci_enable_io_modes) {
2014 		/* Turn on resources that have been left off by a lazy BIOS */
2015 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2016 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2017 			cmd |= PCIM_CMD_PORTEN;
2018 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2019 		}
2020 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2021 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2022 			cmd |= PCIM_CMD_MEMEN;
2023 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2024 		}
2025 	} else {
2026 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2027 			return (barlen);
2028 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2029 			return (barlen);
2030 	}
2031 
2032 	count = 1 << ln2size;
2033 	if (base == 0 || base == pci_mapbase(testval)) {
2034 		start = 0;	/* Let the parent deside */
2035 		end = ~0ULL;
2036 	} else {
2037 		start = base;
2038 		end = base + (1 << ln2size) - 1;
2039 	}
2040 	resource_list_add(rl, type, reg, start, end, count);
2041 
2042 	/*
2043 	 * Not quite sure what to do on failure of allocating the resource
2044 	 * since I can postulate several right answers.
2045 	 */
2046 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2047 	    prefetch ? RF_PREFETCHABLE : 0);
2048 	if (res == NULL)
2049 		return (barlen);
2050 	start = rman_get_start(res);
2051 	if ((u_long)start != start) {
2052 		/* Wait a minute!  this platform can't do this address. */
2053 		device_printf(bus,
2054 		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2055 		    b, s, f, reg, (uintmax_t)start);
2056 		resource_list_release(rl, bus, dev, type, reg, res);
2057 		return (barlen);
2058 	}
2059 	pci_write_config(dev, reg, start, 4);
2060 	if (ln2range == 64)
2061 		pci_write_config(dev, reg + 4, start >> 32, 4);
2062 	return (barlen);
2063 }
2064 
2065 /*
2066  * For ATA devices we need to decide early what addressing mode to use.
2067  * Legacy demands that the primary and secondary ATA ports sits on the
2068  * same addresses that old ISA hardware did. This dictates that we use
2069  * those addresses and ignore the BAR's if we cannot set PCI native
2070  * addressing mode.
2071  */
2072 static void
2073 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2074     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2075 {
2076 	int rid, type, progif;
2077 #if 0
2078 	/* if this device supports PCI native addressing use it */
2079 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2080 	if ((progif & 0x8a) == 0x8a) {
2081 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2082 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2083 			printf("Trying ATA native PCI addressing mode\n");
2084 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2085 		}
2086 	}
2087 #endif
2088 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2089 	type = SYS_RES_IOPORT;
2090 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2091 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2092 		    prefetchmask & (1 << 0));
2093 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2094 		    prefetchmask & (1 << 1));
2095 	} else {
2096 		rid = PCIR_BAR(0);
2097 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2098 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2099 		    0);
2100 		rid = PCIR_BAR(1);
2101 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2102 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2103 		    0);
2104 	}
2105 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2106 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2107 		    prefetchmask & (1 << 2));
2108 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2109 		    prefetchmask & (1 << 3));
2110 	} else {
2111 		rid = PCIR_BAR(2);
2112 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2113 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2114 		    0);
2115 		rid = PCIR_BAR(3);
2116 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2117 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2118 		    0);
2119 	}
2120 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2121 	    prefetchmask & (1 << 4));
2122 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2123 	    prefetchmask & (1 << 5));
2124 }
2125 
2126 static void
2127 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2128 {
2129 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2130 	pcicfgregs *cfg = &dinfo->cfg;
2131 	char tunable_name[64];
2132 	int irq;
2133 
2134 	/* Has to have an intpin to have an interrupt. */
2135 	if (cfg->intpin == 0)
2136 		return;
2137 
2138 	/* Let the user override the IRQ with a tunable. */
2139 	irq = PCI_INVALID_IRQ;
2140 	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2141 	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2142 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2143 		irq = PCI_INVALID_IRQ;
2144 
2145 	/*
2146 	 * If we didn't get an IRQ via the tunable, then we either use the
2147 	 * IRQ value in the intline register or we ask the bus to route an
2148 	 * interrupt for us.  If force_route is true, then we only use the
2149 	 * value in the intline register if the bus was unable to assign an
2150 	 * IRQ.
2151 	 */
2152 	if (!PCI_INTERRUPT_VALID(irq)) {
2153 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2154 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2155 		if (!PCI_INTERRUPT_VALID(irq))
2156 			irq = cfg->intline;
2157 	}
2158 
2159 	/* If after all that we don't have an IRQ, just bail. */
2160 	if (!PCI_INTERRUPT_VALID(irq))
2161 		return;
2162 
2163 	/* Update the config register if it changed. */
2164 	if (irq != cfg->intline) {
2165 		cfg->intline = irq;
2166 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2167 	}
2168 
2169 	/* Add this IRQ as rid 0 interrupt resource. */
2170 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2171 }
2172 
2173 void
2174 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2175 {
2176 	device_t pcib;
2177 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2178 	pcicfgregs *cfg = &dinfo->cfg;
2179 	struct resource_list *rl = &dinfo->resources;
2180 	struct pci_quirk *q;
2181 	int b, i, f, s;
2182 
2183 	pcib = device_get_parent(bus);
2184 
2185 	b = cfg->bus;
2186 	s = cfg->slot;
2187 	f = cfg->func;
2188 
2189 	/* ATA devices needs special map treatment */
2190 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2191 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2192 	    (pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV))
2193 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2194 	else
2195 		for (i = 0; i < cfg->nummaps;)
2196 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2197 			    rl, force, prefetchmask & (1 << i));
2198 
2199 	/*
2200 	 * Add additional, quirked resources.
2201 	 */
2202 	for (q = &pci_quirks[0]; q->devid; q++) {
2203 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2204 		    && q->type == PCI_QUIRK_MAP_REG)
2205 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2206 			  force, 0);
2207 	}
2208 
2209 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2210 #ifdef __PCI_REROUTE_INTERRUPT
2211 		/*
2212 		 * Try to re-route interrupts. Sometimes the BIOS or
2213 		 * firmware may leave bogus values in these registers.
2214 		 * If the re-route fails, then just stick with what we
2215 		 * have.
2216 		 */
2217 		pci_assign_interrupt(bus, dev, 1);
2218 #else
2219 		pci_assign_interrupt(bus, dev, 0);
2220 #endif
2221 	}
2222 }
2223 
2224 void
2225 pci_add_children(device_t dev, int busno, size_t dinfo_size)
2226 {
2227 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2228 	device_t pcib = device_get_parent(dev);
2229 	struct pci_devinfo *dinfo;
2230 	int maxslots;
2231 	int s, f, pcifunchigh;
2232 	uint8_t hdrtype;
2233 
2234 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2235 	    ("dinfo_size too small"));
2236 	maxslots = PCIB_MAXSLOTS(pcib);
2237 	for (s = 0; s <= maxslots; s++) {
2238 		pcifunchigh = 0;
2239 		f = 0;
2240 		DELAY(1);
2241 		hdrtype = REG(PCIR_HDRTYPE, 1);
2242 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2243 			continue;
2244 		if (hdrtype & PCIM_MFDEV)
2245 			pcifunchigh = PCI_FUNCMAX;
2246 		for (f = 0; f <= pcifunchigh; f++) {
2247 			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2248 			if (dinfo != NULL) {
2249 				pci_add_child(dev, dinfo);
2250 			}
2251 		}
2252 	}
2253 #undef REG
2254 }
2255 
2256 void
2257 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2258 {
2259 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2260 	device_set_ivars(dinfo->cfg.dev, dinfo);
2261 	resource_list_init(&dinfo->resources);
2262 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2263 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2264 	pci_print_verbose(dinfo);
2265 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2266 }
2267 
2268 static int
2269 pci_probe(device_t dev)
2270 {
2271 
2272 	device_set_desc(dev, "PCI bus");
2273 
2274 	/* Allow other subclasses to override this driver. */
2275 	return (-1000);
2276 }
2277 
2278 static int
2279 pci_attach(device_t dev)
2280 {
2281 	int busno;
2282 
2283 	/*
2284 	 * Since there can be multiple independantly numbered PCI
2285 	 * busses on systems with multiple PCI domains, we can't use
2286 	 * the unit number to decide which bus we are probing. We ask
2287 	 * the parent pcib what our bus number is.
2288 	 */
2289 	busno = pcib_get_bus(dev);
2290 	if (bootverbose)
2291 		device_printf(dev, "physical bus=%d\n", busno);
2292 
2293 	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2294 
2295 	return (bus_generic_attach(dev));
2296 }
2297 
2298 int
2299 pci_suspend(device_t dev)
2300 {
2301 	int dstate, error, i, numdevs;
2302 	device_t acpi_dev, child, *devlist;
2303 	struct pci_devinfo *dinfo;
2304 
2305 	/*
2306 	 * Save the PCI configuration space for each child and set the
2307 	 * device in the appropriate power state for this sleep state.
2308 	 */
2309 	acpi_dev = NULL;
2310 	if (pci_do_power_resume)
2311 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2312 	device_get_children(dev, &devlist, &numdevs);
2313 	for (i = 0; i < numdevs; i++) {
2314 		child = devlist[i];
2315 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2316 		pci_cfg_save(child, dinfo, 0);
2317 	}
2318 
2319 	/* Suspend devices before potentially powering them down. */
2320 	error = bus_generic_suspend(dev);
2321 	if (error) {
2322 		free(devlist, M_TEMP);
2323 		return (error);
2324 	}
2325 
2326 	/*
2327 	 * Always set the device to D3.  If ACPI suggests a different
2328 	 * power state, use it instead.  If ACPI is not present, the
2329 	 * firmware is responsible for managing device power.  Skip
2330 	 * children who aren't attached since they are powered down
2331 	 * separately.  Only manage type 0 devices for now.
2332 	 */
2333 	for (i = 0; acpi_dev && i < numdevs; i++) {
2334 		child = devlist[i];
2335 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2336 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2337 			dstate = PCI_POWERSTATE_D3;
2338 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2339 			pci_set_powerstate(child, dstate);
2340 		}
2341 	}
2342 	free(devlist, M_TEMP);
2343 	return (0);
2344 }
2345 
2346 int
2347 pci_resume(device_t dev)
2348 {
2349 	int i, numdevs;
2350 	device_t acpi_dev, child, *devlist;
2351 	struct pci_devinfo *dinfo;
2352 
2353 	/*
2354 	 * Set each child to D0 and restore its PCI configuration space.
2355 	 */
2356 	acpi_dev = NULL;
2357 	if (pci_do_power_resume)
2358 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2359 	device_get_children(dev, &devlist, &numdevs);
2360 	for (i = 0; i < numdevs; i++) {
2361 		/*
2362 		 * Notify ACPI we're going to D0 but ignore the result.  If
2363 		 * ACPI is not present, the firmware is responsible for
2364 		 * managing device power.  Only manage type 0 devices for now.
2365 		 */
2366 		child = devlist[i];
2367 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2368 		if (acpi_dev && device_is_attached(child) &&
2369 		    dinfo->cfg.hdrtype == 0) {
2370 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2371 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2372 		}
2373 
2374 		/* Now the device is powered up, restore its config space. */
2375 		pci_cfg_restore(child, dinfo);
2376 	}
2377 	free(devlist, M_TEMP);
2378 	return (bus_generic_resume(dev));
2379 }
2380 
2381 static void
2382 pci_load_vendor_data(void)
2383 {
2384 	caddr_t vendordata, info;
2385 
2386 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2387 		info = preload_search_info(vendordata, MODINFO_ADDR);
2388 		pci_vendordata = *(char **)info;
2389 		info = preload_search_info(vendordata, MODINFO_SIZE);
2390 		pci_vendordata_size = *(size_t *)info;
2391 		/* terminate the database */
2392 		pci_vendordata[pci_vendordata_size] = '\n';
2393 	}
2394 }
2395 
2396 void
2397 pci_driver_added(device_t dev, driver_t *driver)
2398 {
2399 	int numdevs;
2400 	device_t *devlist;
2401 	device_t child;
2402 	struct pci_devinfo *dinfo;
2403 	int i;
2404 
2405 	if (bootverbose)
2406 		device_printf(dev, "driver added\n");
2407 	DEVICE_IDENTIFY(driver, dev);
2408 	device_get_children(dev, &devlist, &numdevs);
2409 	for (i = 0; i < numdevs; i++) {
2410 		child = devlist[i];
2411 		if (device_get_state(child) != DS_NOTPRESENT)
2412 			continue;
2413 		dinfo = device_get_ivars(child);
2414 		pci_print_verbose(dinfo);
2415 		if (bootverbose)
2416 			printf("pci%d:%d:%d: reprobing on driver added\n",
2417 			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2418 		pci_cfg_restore(child, dinfo);
2419 		if (device_probe_and_attach(child) != 0)
2420 			pci_cfg_save(child, dinfo, 1);
2421 	}
2422 	free(devlist, M_TEMP);
2423 }
2424 
2425 int
2426 pci_print_child(device_t dev, device_t child)
2427 {
2428 	struct pci_devinfo *dinfo;
2429 	struct resource_list *rl;
2430 	int retval = 0;
2431 
2432 	dinfo = device_get_ivars(child);
2433 	rl = &dinfo->resources;
2434 
2435 	retval += bus_print_child_header(dev, child);
2436 
2437 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2438 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2439 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2440 	if (device_get_flags(dev))
2441 		retval += printf(" flags %#x", device_get_flags(dev));
2442 
2443 	retval += printf(" at device %d.%d", pci_get_slot(child),
2444 	    pci_get_function(child));
2445 
2446 	retval += bus_print_child_footer(dev, child);
2447 
2448 	return (retval);
2449 }
2450 
2451 static struct
2452 {
2453 	int	class;
2454 	int	subclass;
2455 	char	*desc;
2456 } pci_nomatch_tab[] = {
2457 	{PCIC_OLD,		-1,			"old"},
2458 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2459 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2460 	{PCIC_STORAGE,		-1,			"mass storage"},
2461 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2462 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2463 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2464 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2465 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2466 	{PCIC_NETWORK,		-1,			"network"},
2467 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2468 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2469 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2470 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2471 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2472 	{PCIC_DISPLAY,		-1,			"display"},
2473 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2474 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2475 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2476 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2477 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2478 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2479 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2480 	{PCIC_MEMORY,		-1,			"memory"},
2481 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2482 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2483 	{PCIC_BRIDGE,		-1,			"bridge"},
2484 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2485 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2486 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2487 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2488 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2489 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2490 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2491 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2492 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2493 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2494 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2495 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2496 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2497 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2498 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2499 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2500 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2501 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2502 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2503 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2504 	{PCIC_INPUTDEV,		-1,			"input device"},
2505 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2506 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2507 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2508 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2509 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2510 	{PCIC_DOCKING,		-1,			"docking station"},
2511 	{PCIC_PROCESSOR,	-1,			"processor"},
2512 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2513 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2514 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2515 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2516 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2517 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2518 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2519 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2520 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2521 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2522 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2523 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2524 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2525 	{PCIC_SATCOM,		-1,			"satellite communication"},
2526 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2527 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2528 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2529 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2530 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2531 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2532 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2533 	{PCIC_DASP,		-1,			"dasp"},
2534 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2535 	{0, 0,		NULL}
2536 };
2537 
2538 void
2539 pci_probe_nomatch(device_t dev, device_t child)
2540 {
2541 	int	i;
2542 	char	*cp, *scp, *device;
2543 
2544 	/*
2545 	 * Look for a listing for this device in a loaded device database.
2546 	 */
2547 	if ((device = pci_describe_device(child)) != NULL) {
2548 		device_printf(dev, "<%s>", device);
2549 		free(device, M_DEVBUF);
2550 	} else {
2551 		/*
2552 		 * Scan the class/subclass descriptions for a general
2553 		 * description.
2554 		 */
2555 		cp = "unknown";
2556 		scp = NULL;
2557 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2558 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2559 				if (pci_nomatch_tab[i].subclass == -1) {
2560 					cp = pci_nomatch_tab[i].desc;
2561 				} else if (pci_nomatch_tab[i].subclass ==
2562 				    pci_get_subclass(child)) {
2563 					scp = pci_nomatch_tab[i].desc;
2564 				}
2565 			}
2566 		}
2567 		device_printf(dev, "<%s%s%s>",
2568 		    cp ? cp : "",
2569 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2570 		    scp ? scp : "");
2571 	}
2572 	printf(" at device %d.%d (no driver attached)\n",
2573 	    pci_get_slot(child), pci_get_function(child));
2574 	if (pci_do_power_nodriver)
2575 		pci_cfg_save(child,
2576 		    (struct pci_devinfo *) device_get_ivars(child), 1);
2577 	return;
2578 }
2579 
2580 /*
2581  * Parse the PCI device database, if loaded, and return a pointer to a
2582  * description of the device.
2583  *
2584  * The database is flat text formatted as follows:
2585  *
2586  * Any line not in a valid format is ignored.
2587  * Lines are terminated with newline '\n' characters.
2588  *
2589  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2590  * the vendor name.
2591  *
2592  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2593  * - devices cannot be listed without a corresponding VENDOR line.
2594  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2595  * another TAB, then the device name.
2596  */
2597 
2598 /*
2599  * Assuming (ptr) points to the beginning of a line in the database,
2600  * return the vendor or device and description of the next entry.
2601  * The value of (vendor) or (device) inappropriate for the entry type
2602  * is set to -1.  Returns nonzero at the end of the database.
2603  *
2604  * Note that this is slightly unrobust in the face of corrupt data;
2605  * we attempt to safeguard against this by spamming the end of the
2606  * database with a newline when we initialise.
2607  */
2608 static int
2609 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2610 {
2611 	char	*cp = *ptr;
2612 	int	left;
2613 
2614 	*device = -1;
2615 	*vendor = -1;
2616 	**desc = '\0';
2617 	for (;;) {
2618 		left = pci_vendordata_size - (cp - pci_vendordata);
2619 		if (left <= 0) {
2620 			*ptr = cp;
2621 			return(1);
2622 		}
2623 
2624 		/* vendor entry? */
2625 		if (*cp != '\t' &&
2626 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2627 			break;
2628 		/* device entry? */
2629 		if (*cp == '\t' &&
2630 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2631 			break;
2632 
2633 		/* skip to next line */
2634 		while (*cp != '\n' && left > 0) {
2635 			cp++;
2636 			left--;
2637 		}
2638 		if (*cp == '\n') {
2639 			cp++;
2640 			left--;
2641 		}
2642 	}
2643 	/* skip to next line */
2644 	while (*cp != '\n' && left > 0) {
2645 		cp++;
2646 		left--;
2647 	}
2648 	if (*cp == '\n' && left > 0)
2649 		cp++;
2650 	*ptr = cp;
2651 	return(0);
2652 }
2653 
2654 static char *
2655 pci_describe_device(device_t dev)
2656 {
2657 	int	vendor, device;
2658 	char	*desc, *vp, *dp, *line;
2659 
2660 	desc = vp = dp = NULL;
2661 
2662 	/*
2663 	 * If we have no vendor data, we can't do anything.
2664 	 */
2665 	if (pci_vendordata == NULL)
2666 		goto out;
2667 
2668 	/*
2669 	 * Scan the vendor data looking for this device
2670 	 */
2671 	line = pci_vendordata;
2672 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2673 		goto out;
2674 	for (;;) {
2675 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2676 			goto out;
2677 		if (vendor == pci_get_vendor(dev))
2678 			break;
2679 	}
2680 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2681 		goto out;
2682 	for (;;) {
2683 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2684 			*dp = 0;
2685 			break;
2686 		}
2687 		if (vendor != -1) {
2688 			*dp = 0;
2689 			break;
2690 		}
2691 		if (device == pci_get_device(dev))
2692 			break;
2693 	}
2694 	if (dp[0] == '\0')
2695 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2696 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2697 	    NULL)
2698 		sprintf(desc, "%s, %s", vp, dp);
2699  out:
2700 	if (vp != NULL)
2701 		free(vp, M_DEVBUF);
2702 	if (dp != NULL)
2703 		free(dp, M_DEVBUF);
2704 	return(desc);
2705 }
2706 
2707 int
2708 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2709 {
2710 	struct pci_devinfo *dinfo;
2711 	pcicfgregs *cfg;
2712 
2713 	dinfo = device_get_ivars(child);
2714 	cfg = &dinfo->cfg;
2715 
2716 	switch (which) {
2717 	case PCI_IVAR_ETHADDR:
2718 		/*
2719 		 * The generic accessor doesn't deal with failure, so
2720 		 * we set the return value, then return an error.
2721 		 */
2722 		*((uint8_t **) result) = NULL;
2723 		return (EINVAL);
2724 	case PCI_IVAR_SUBVENDOR:
2725 		*result = cfg->subvendor;
2726 		break;
2727 	case PCI_IVAR_SUBDEVICE:
2728 		*result = cfg->subdevice;
2729 		break;
2730 	case PCI_IVAR_VENDOR:
2731 		*result = cfg->vendor;
2732 		break;
2733 	case PCI_IVAR_DEVICE:
2734 		*result = cfg->device;
2735 		break;
2736 	case PCI_IVAR_DEVID:
2737 		*result = (cfg->device << 16) | cfg->vendor;
2738 		break;
2739 	case PCI_IVAR_CLASS:
2740 		*result = cfg->baseclass;
2741 		break;
2742 	case PCI_IVAR_SUBCLASS:
2743 		*result = cfg->subclass;
2744 		break;
2745 	case PCI_IVAR_PROGIF:
2746 		*result = cfg->progif;
2747 		break;
2748 	case PCI_IVAR_REVID:
2749 		*result = cfg->revid;
2750 		break;
2751 	case PCI_IVAR_INTPIN:
2752 		*result = cfg->intpin;
2753 		break;
2754 	case PCI_IVAR_IRQ:
2755 		*result = cfg->intline;
2756 		break;
2757 	case PCI_IVAR_BUS:
2758 		*result = cfg->bus;
2759 		break;
2760 	case PCI_IVAR_SLOT:
2761 		*result = cfg->slot;
2762 		break;
2763 	case PCI_IVAR_FUNCTION:
2764 		*result = cfg->func;
2765 		break;
2766 	case PCI_IVAR_CMDREG:
2767 		*result = cfg->cmdreg;
2768 		break;
2769 	case PCI_IVAR_CACHELNSZ:
2770 		*result = cfg->cachelnsz;
2771 		break;
2772 	case PCI_IVAR_MINGNT:
2773 		*result = cfg->mingnt;
2774 		break;
2775 	case PCI_IVAR_MAXLAT:
2776 		*result = cfg->maxlat;
2777 		break;
2778 	case PCI_IVAR_LATTIMER:
2779 		*result = cfg->lattimer;
2780 		break;
2781 	default:
2782 		return (ENOENT);
2783 	}
2784 	return (0);
2785 }
2786 
2787 int
2788 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2789 {
2790 	struct pci_devinfo *dinfo;
2791 
2792 	dinfo = device_get_ivars(child);
2793 
2794 	switch (which) {
2795 	case PCI_IVAR_INTPIN:
2796 		dinfo->cfg.intpin = value;
2797 		return (0);
2798 	case PCI_IVAR_ETHADDR:
2799 	case PCI_IVAR_SUBVENDOR:
2800 	case PCI_IVAR_SUBDEVICE:
2801 	case PCI_IVAR_VENDOR:
2802 	case PCI_IVAR_DEVICE:
2803 	case PCI_IVAR_DEVID:
2804 	case PCI_IVAR_CLASS:
2805 	case PCI_IVAR_SUBCLASS:
2806 	case PCI_IVAR_PROGIF:
2807 	case PCI_IVAR_REVID:
2808 	case PCI_IVAR_IRQ:
2809 	case PCI_IVAR_BUS:
2810 	case PCI_IVAR_SLOT:
2811 	case PCI_IVAR_FUNCTION:
2812 		return (EINVAL);	/* disallow for now */
2813 
2814 	default:
2815 		return (ENOENT);
2816 	}
2817 }
2818 
2819 
2820 #include "opt_ddb.h"
2821 #ifdef DDB
2822 #include <ddb/ddb.h>
2823 #include <sys/cons.h>
2824 
2825 /*
2826  * List resources based on pci map registers, used for within ddb
2827  */
2828 
2829 DB_SHOW_COMMAND(pciregs, db_pci_dump)
2830 {
2831 	struct pci_devinfo *dinfo;
2832 	struct devlist *devlist_head;
2833 	struct pci_conf *p;
2834 	const char *name;
2835 	int i, error, none_count;
2836 
2837 	none_count = 0;
2838 	/* get the head of the device queue */
2839 	devlist_head = &pci_devq;
2840 
2841 	/*
2842 	 * Go through the list of devices and print out devices
2843 	 */
2844 	for (error = 0, i = 0,
2845 	     dinfo = STAILQ_FIRST(devlist_head);
2846 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2847 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2848 
2849 		/* Populate pd_name and pd_unit */
2850 		name = NULL;
2851 		if (dinfo->cfg.dev)
2852 			name = device_get_name(dinfo->cfg.dev);
2853 
2854 		p = &dinfo->conf;
2855 		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2856 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2857 			(name && *name) ? name : "none",
2858 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2859 			none_count++,
2860 			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2861 			p->pc_sel.pc_func, (p->pc_class << 16) |
2862 			(p->pc_subclass << 8) | p->pc_progif,
2863 			(p->pc_subdevice << 16) | p->pc_subvendor,
2864 			(p->pc_device << 16) | p->pc_vendor,
2865 			p->pc_revid, p->pc_hdr);
2866 	}
2867 }
2868 #endif /* DDB */
2869 
2870 static struct resource *
2871 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2872     u_long start, u_long end, u_long count, u_int flags)
2873 {
2874 	struct pci_devinfo *dinfo = device_get_ivars(child);
2875 	struct resource_list *rl = &dinfo->resources;
2876 	struct resource_list_entry *rle;
2877 	struct resource *res;
2878 	pci_addr_t map, testval;
2879 	int mapsize;
2880 
2881 	/*
2882 	 * Weed out the bogons, and figure out how large the BAR/map
2883 	 * is.  Bars that read back 0 here are bogus and unimplemented.
2884 	 * Note: atapci in legacy mode are special and handled elsewhere
2885 	 * in the code.  If you have a atapci device in legacy mode and
2886 	 * it fails here, that other code is broken.
2887 	 */
2888 	res = NULL;
2889 	map = pci_read_config(child, *rid, 4);
2890 	pci_write_config(child, *rid, 0xffffffff, 4);
2891 	testval = pci_read_config(child, *rid, 4);
2892 	if (pci_maprange(testval) == 64)
2893 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2894 	if (pci_mapbase(testval) == 0)
2895 		goto out;
2896 	if (pci_maptype(testval) & PCI_MAPMEM) {
2897 		if (type != SYS_RES_MEMORY) {
2898 			if (bootverbose)
2899 				device_printf(dev,
2900 				    "child %s requested type %d for rid %#x,"
2901 				    " but the BAR says it is an memio\n",
2902 				    device_get_nameunit(child), type, *rid);
2903 			goto out;
2904 		}
2905 	} else {
2906 		if (type != SYS_RES_IOPORT) {
2907 			if (bootverbose)
2908 				device_printf(dev,
2909 				    "child %s requested type %d for rid %#x,"
2910 				    " but the BAR says it is an ioport\n",
2911 				    device_get_nameunit(child), type, *rid);
2912 			goto out;
2913 		}
2914 	}
2915 	/*
2916 	 * For real BARs, we need to override the size that
2917 	 * the driver requests, because that's what the BAR
2918 	 * actually uses and we would otherwise have a
2919 	 * situation where we might allocate the excess to
2920 	 * another driver, which won't work.
2921 	 */
2922 	mapsize = pci_mapsize(testval);
2923 	count = 1UL << mapsize;
2924 	if (RF_ALIGNMENT(flags) < mapsize)
2925 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2926 
2927 	/*
2928 	 * Allocate enough resource, and then write back the
2929 	 * appropriate bar for that resource.
2930 	 */
2931 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2932 	    start, end, count, flags);
2933 	if (res == NULL) {
2934 		device_printf(child,
2935 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2936 		    count, *rid, type, start, end);
2937 		goto out;
2938 	}
2939 	resource_list_add(rl, type, *rid, start, end, count);
2940 	rle = resource_list_find(rl, type, *rid);
2941 	if (rle == NULL)
2942 		panic("pci_alloc_map: unexpectedly can't find resource.");
2943 	rle->res = res;
2944 	rle->start = rman_get_start(res);
2945 	rle->end = rman_get_end(res);
2946 	rle->count = count;
2947 	if (bootverbose)
2948 		device_printf(child,
2949 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2950 		    count, *rid, type, rman_get_start(res));
2951 	map = rman_get_start(res);
2952 out:;
2953 	pci_write_config(child, *rid, map, 4);
2954 	if (pci_maprange(testval) == 64)
2955 		pci_write_config(child, *rid + 4, map >> 32, 4);
2956 	return (res);
2957 }
2958 
2959 
2960 struct resource *
2961 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2962 		   u_long start, u_long end, u_long count, u_int flags)
2963 {
2964 	struct pci_devinfo *dinfo = device_get_ivars(child);
2965 	struct resource_list *rl = &dinfo->resources;
2966 	struct resource_list_entry *rle;
2967 	pcicfgregs *cfg = &dinfo->cfg;
2968 
2969 	/*
2970 	 * Perform lazy resource allocation
2971 	 */
2972 	if (device_get_parent(child) == dev) {
2973 		switch (type) {
2974 		case SYS_RES_IRQ:
2975 			/*
2976 			 * Can't alloc legacy interrupt once MSI messages
2977 			 * have been allocated.
2978 			 */
2979 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
2980 			    cfg->msix.msix_alloc > 0))
2981 				return (NULL);
2982 			/*
2983 			 * If the child device doesn't have an
2984 			 * interrupt routed and is deserving of an
2985 			 * interrupt, try to assign it one.
2986 			 */
2987 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
2988 			    (cfg->intpin != 0))
2989 				pci_assign_interrupt(dev, child, 0);
2990 			break;
2991 		case SYS_RES_IOPORT:
2992 		case SYS_RES_MEMORY:
2993 			if (*rid < PCIR_BAR(cfg->nummaps)) {
2994 				/*
2995 				 * Enable the I/O mode.  We should
2996 				 * also be assigning resources too
2997 				 * when none are present.  The
2998 				 * resource_list_alloc kind of sorta does
2999 				 * this...
3000 				 */
3001 				if (PCI_ENABLE_IO(dev, child, type))
3002 					return (NULL);
3003 			}
3004 			rle = resource_list_find(rl, type, *rid);
3005 			if (rle == NULL)
3006 				return (pci_alloc_map(dev, child, type, rid,
3007 				    start, end, count, flags));
3008 			break;
3009 		}
3010 		/*
3011 		 * If we've already allocated the resource, then
3012 		 * return it now.  But first we may need to activate
3013 		 * it, since we don't allocate the resource as active
3014 		 * above.  Normally this would be done down in the
3015 		 * nexus, but since we short-circuit that path we have
3016 		 * to do its job here.  Not sure if we should free the
3017 		 * resource if it fails to activate.
3018 		 */
3019 		rle = resource_list_find(rl, type, *rid);
3020 		if (rle != NULL && rle->res != NULL) {
3021 			if (bootverbose)
3022 				device_printf(child,
3023 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3024 				    rman_get_size(rle->res), *rid, type,
3025 				    rman_get_start(rle->res));
3026 			if ((flags & RF_ACTIVE) &&
3027 			    bus_generic_activate_resource(dev, child, type,
3028 			    *rid, rle->res) != 0)
3029 				return NULL;
3030 			return (rle->res);
3031 		}
3032 	}
3033 	return (resource_list_alloc(rl, dev, child, type, rid,
3034 	    start, end, count, flags));
3035 }
3036 
3037 void
3038 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3039 {
3040 	struct pci_devinfo *dinfo;
3041 	struct resource_list *rl;
3042 	struct resource_list_entry *rle;
3043 
3044 	if (device_get_parent(child) != dev)
3045 		return;
3046 
3047 	dinfo = device_get_ivars(child);
3048 	rl = &dinfo->resources;
3049 	rle = resource_list_find(rl, type, rid);
3050 	if (rle) {
3051 		if (rle->res) {
3052 			if (rman_get_device(rle->res) != dev ||
3053 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3054 				device_printf(dev, "delete_resource: "
3055 				    "Resource still owned by child, oops. "
3056 				    "(type=%d, rid=%d, addr=%lx)\n",
3057 				    rle->type, rle->rid,
3058 				    rman_get_start(rle->res));
3059 				return;
3060 			}
3061 			bus_release_resource(dev, type, rid, rle->res);
3062 		}
3063 		resource_list_delete(rl, type, rid);
3064 	}
3065 	/*
3066 	 * Why do we turn off the PCI configuration BAR when we delete a
3067 	 * resource? -- imp
3068 	 */
3069 	pci_write_config(child, rid, 0, 4);
3070 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3071 }
3072 
3073 struct resource_list *
3074 pci_get_resource_list (device_t dev, device_t child)
3075 {
3076 	struct pci_devinfo *dinfo = device_get_ivars(child);
3077 
3078 	return (&dinfo->resources);
3079 }
3080 
3081 uint32_t
3082 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3083 {
3084 	struct pci_devinfo *dinfo = device_get_ivars(child);
3085 	pcicfgregs *cfg = &dinfo->cfg;
3086 
3087 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3088 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3089 }
3090 
3091 void
3092 pci_write_config_method(device_t dev, device_t child, int reg,
3093     uint32_t val, int width)
3094 {
3095 	struct pci_devinfo *dinfo = device_get_ivars(child);
3096 	pcicfgregs *cfg = &dinfo->cfg;
3097 
3098 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3099 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3100 }
3101 
3102 int
3103 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3104     size_t buflen)
3105 {
3106 
3107 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3108 	    pci_get_function(child));
3109 	return (0);
3110 }
3111 
3112 int
3113 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3114     size_t buflen)
3115 {
3116 	struct pci_devinfo *dinfo;
3117 	pcicfgregs *cfg;
3118 
3119 	dinfo = device_get_ivars(child);
3120 	cfg = &dinfo->cfg;
3121 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3122 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3123 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3124 	    cfg->progif);
3125 	return (0);
3126 }
3127 
3128 int
3129 pci_assign_interrupt_method(device_t dev, device_t child)
3130 {
3131 	struct pci_devinfo *dinfo = device_get_ivars(child);
3132 	pcicfgregs *cfg = &dinfo->cfg;
3133 
3134 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3135 	    cfg->intpin));
3136 }
3137 
3138 static int
3139 pci_modevent(module_t mod, int what, void *arg)
3140 {
3141 	static struct cdev *pci_cdev;
3142 
3143 	switch (what) {
3144 	case MOD_LOAD:
3145 		STAILQ_INIT(&pci_devq);
3146 		pci_generation = 0;
3147 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3148 		    "pci");
3149 		pci_load_vendor_data();
3150 		break;
3151 
3152 	case MOD_UNLOAD:
3153 		destroy_dev(pci_cdev);
3154 		break;
3155 	}
3156 
3157 	return (0);
3158 }
3159 
3160 void
3161 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3162 {
3163 	int i;
3164 
3165 	/*
3166 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3167 	 * which we know need special treatment.  Type 2 devices are
3168 	 * cardbus bridges which also require special treatment.
3169 	 * Other types are unknown, and we err on the side of safety
3170 	 * by ignoring them.
3171 	 */
3172 	if (dinfo->cfg.hdrtype != 0)
3173 		return;
3174 
3175 	/*
3176 	 * Restore the device to full power mode.  We must do this
3177 	 * before we restore the registers because moving from D3 to
3178 	 * D0 will cause the chip's BARs and some other registers to
3179 	 * be reset to some unknown power on reset values.  Cut down
3180 	 * the noise on boot by doing nothing if we are already in
3181 	 * state D0.
3182 	 */
3183 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3184 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3185 	}
3186 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3187 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3188 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3189 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3190 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3191 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3192 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3193 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3194 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3195 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3196 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3197 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3198 
3199 	/*
3200 	 * Restore MSI configuration if it is present.  If MSI is enabled,
3201 	 * then restore the data and addr registers.
3202 	 */
3203 	if (dinfo->cfg.msi.msi_location != 0)
3204 		pci_resume_msi(dev);
3205 }
3206 
3207 void
3208 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3209 {
3210 	int i;
3211 	uint32_t cls;
3212 	int ps;
3213 
3214 	/*
3215 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3216 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3217 	 * which also require special treatment.  Other types are unknown, and
3218 	 * we err on the side of safety by ignoring them.  Powering down
3219 	 * bridges should not be undertaken lightly.
3220 	 */
3221 	if (dinfo->cfg.hdrtype != 0)
3222 		return;
3223 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3224 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3225 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3226 
3227 	/*
3228 	 * Some drivers apparently write to these registers w/o updating our
3229 	 * cached copy.  No harm happens if we update the copy, so do so here
3230 	 * so we can restore them.  The COMMAND register is modified by the
3231 	 * bus w/o updating the cache.  This should represent the normally
3232 	 * writable portion of the 'defined' part of type 0 headers.  In
3233 	 * theory we also need to save/restore the PCI capability structures
3234 	 * we know about, but apart from power we don't know any that are
3235 	 * writable.
3236 	 */
3237 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3238 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3239 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3240 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3241 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3242 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3243 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3244 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3245 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3246 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3247 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3248 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3249 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3250 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3251 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3252 
3253 	/*
3254 	 * don't set the state for display devices, base peripherals and
3255 	 * memory devices since bad things happen when they are powered down.
3256 	 * We should (a) have drivers that can easily detach and (b) use
3257 	 * generic drivers for these devices so that some device actually
3258 	 * attaches.  We need to make sure that when we implement (a) we don't
3259 	 * power the device down on a reattach.
3260 	 */
3261 	cls = pci_get_class(dev);
3262 	if (!setstate)
3263 		return;
3264 	switch (pci_do_power_nodriver)
3265 	{
3266 		case 0:		/* NO powerdown at all */
3267 			return;
3268 		case 1:		/* Conservative about what to power down */
3269 			if (cls == PCIC_STORAGE)
3270 				return;
3271 			/*FALLTHROUGH*/
3272 		case 2:		/* Agressive about what to power down */
3273 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3274 			    cls == PCIC_BASEPERIPH)
3275 				return;
3276 			/*FALLTHROUGH*/
3277 		case 3:		/* Power down everything */
3278 			break;
3279 	}
3280 	/*
3281 	 * PCI spec says we can only go into D3 state from D0 state.
3282 	 * Transition from D[12] into D0 before going to D3 state.
3283 	 */
3284 	ps = pci_get_powerstate(dev);
3285 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3286 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3287 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3288 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3289 }
3290