xref: /illumos-gate/usr/src/uts/common/io/pciex/pcieb.c (revision 9164a50bf932130cbb5097a16f6986873ce0e6e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  * Copyright 2023 Oxide Computer Company
28  */
29 
30 /*
31  * Common x86 and SPARC PCI-E to PCI bus bridge nexus driver
32  *
33  * Background
34  * ----------
35  *
36  * The PCI Express (PCIe) specification defines that all of the PCIe devices in
37  * the system are connected together in a series of different fabrics. A way to
38  * think of these fabrics is that they are small networks where there are links
39  * between different devices and switches that allow fan out or fan in of the
40  * fabric. The entry point to that fabric is called a root complex and the
41  * fabric terminates at a what is called an endpoint, which is really just PCIe
42  * terminology for the common cards that are inserted into the system (HBAs,
43  * NICs, USB, NVMe, etc.).
44  *
45  * The PCIe specification states that every link on the system has a virtual
46  * PCI-to-PCI bridge. This allows PCIe devices to still be configured the same
47  * way traditional PCI devices are to the operating system and allows them to
48  * have a traditional PCI bus, device, and function associated with them, even
49  * though there is no actual shared bus. In addition, bridges are also used to
50  * connect traditional PCI and PCI-X devices into them.
51  *
52  * The PCIe specification refers to upstream and downstream ports. Upstream
53  * ports are considered closer the root complex and downstream ports are closer
54  * to the endpoint. We can divide the devices that the bridge driver attaches to
55  * into two groups. Those that are considered upstream ports, these include root
56  * complexes and parts of PCIe switches. And downstream ports, which are the
57  * other half of PCIe switches and endpoints (which this driver does not attach
58  * to, normal hardware-specific or class-specific drivers attach to those).
59  *
60  * Interrupt Management
61  * --------------------
62  *
63  * Upstream ports of bridges have additional things that we care about.
64  * Specifically they're the means through which we find out about:
65  *
66  *  - Advanced Error Reporting (AERs)
67  *  - Hotplug events
68  *  - Link Bandwidth Events
69  *  - Power Management Events (PME)
70  *
71  * Each of these features is an optional feature (though ones we hope are
72  * implemented). The features above are grouped into two different buckets based
73  * on which PCI capability they appear in. AER management is done through a PCI
74  * Express extended configuration header (it lives in extended PCI configuration
75  * space) called the 'Advanced Error Reporting Extended Capability'. The other
76  * events are all managed as part of the 'PCI Express Capability Structure'.
77  * This structure is found in traditional PCI configuration space.
78  *
79  * The way that the interrupts are programmed for these types of events differs
80  * a bit from the way one might expect a normal device to operate. For most
81  * devices, one allocates a number of interrupts based on a combination of what
82  * the device supports, what the OS supports per device, and the number the
83  * driver needs. Then the driver programs the device in a device-specific manner
84  * to indicate which events should trigger a specific interrupt vector.
85  *
86  * However, for both the AER and PCI capabilities, the driver has to do
87  * something different. The driver first allocates interrupts by programming the
88  * MSI or MSI-X table and then asks the device which interrupts have been
89  * assigned to these purposes. Because these events are only supported in
90  * 'upstream' devices, this does not interfere with the traditional management
91  * of MSI and MSI-X interrupts. At this time, the pcieb driver only supports the
92  * use of MSI interrupts.
93  *
94  * Once the interrupts have been allocated, we read back which vectors have been
95  * nominated by the device to cover the corresponding capability. The interrupt
96  * is allocated on a per-capability basis. Therefore, one interrupt would cover
97  * AERs, while another interrupt would cover the rest of the desired functions.
98  * Importantly, there is no guarantee that a bridge supports more than one
99  * vector; in particular, at least some AMD bridges do not.  In this case,
100  * interrupts associated with all the available capabilities will be routed to
101  * the same shared vector.
102  *
103  * To track which interrupts cover which behaviors, each driver state
104  * (pcieb_devstate_t) has a member called 'pcieb_isr_tab'. Each index represents
105  * an interrupt vector and there are a series of flags that represent the
106  * different possible interrupt sources: PCIEB_INTR_SRC_HP (hotplug),
107  * PCEIB_INTR_SRC_PME (power management event), PCIEB_INTR_SRC_AER (error
108  * reporting), PCIEB_INTR_SRC_LBW (link bandwidth).
109  *
110  * Because the hotplug, link bandwidth, and power management events all share
111  * the same vector, if an interrupt comes in, we must check all of the enabled
112  * sources that might generate this interrupt. It is highly likely that more
113  * than one will fire at the same time, for example, a hotplug event that fires
114  * because a device has been inserted or removed, will likely trigger a link
115  * bandwidth event.
116  *
117  * The pcieb driver itself does not actually have much logic to deal with and
118  * clear the interrupts in question. It generally speaking will vector most
119  * events back to the more general pcie driver or, in the case of AERs, initiate
120  * a scan of the fabric itself (also part of the pcie driver).
121  *
122  * Link Management
123  * ---------------
124  *
125  * The pcieb driver is used to take care of two different aspects of link
126  * management. The first of these, as described briefly above, is to monitor for
127  * changes to the negotiated link bandwidth. These events are managed by
128  * enabling support for the interrupts in the PCI Express Capability Structure.
129  * This is all taken care of by the pcie driver through functions like
130  * pcie_link_bw_enable().
131  *
132  * The second aspect of link management the pcieb driver enables is the ability
133  * to retrain the link and optionally limit the speed. This is enabled through a
134  * series of private ioctls that are driven through a private userland utility,
135  * /usr/lib/pci/pcieb. Eventually, this should be more fleshed out and a more
136  * uniform interface based around the devctls that can be leveraged across
137  * different classes of devices should be used.
138  *
139  * Under the hood this basically leverages the ability of the upstream port to
140  * retrain a link by writing a bit to the PCIe link control register. See
141  * pcieb_ioctl_retrain(). From there, if the driver ever receives a request to
142  * change the maximum speed, that is updated in the card; however, it does not
143  * immediately retrain the link. A separate ioctl request is required to do so.
144  * Once the speed has been changed, regardless of whether or not it has been
145  * retrained, that fact will always be noted.
146  */
147 
148 #include <sys/sysmacros.h>
149 #include <sys/conf.h>
150 #include <sys/kmem.h>
151 #include <sys/debug.h>
152 #include <sys/modctl.h>
153 #include <sys/autoconf.h>
154 #include <sys/ddi_impldefs.h>
155 #include <sys/pci.h>
156 #include <sys/ddi.h>
157 #include <sys/sunddi.h>
158 #include <sys/sunndi.h>
159 #include <sys/fm/util.h>
160 #include <sys/pci_cap.h>
161 #include <sys/pci_impl.h>
162 #include <sys/pcie_impl.h>
163 #include <sys/open.h>
164 #include <sys/stat.h>
165 #include <sys/file.h>
166 #include <sys/promif.h>		/* prom_printf */
167 #include <sys/disp.h>
168 #include <sys/pcie_pwr.h>
169 #include <sys/hotplug/pci/pcie_hp.h>
170 #include "pcieb.h"
171 #include "pcieb_ioctl.h"
172 #ifdef PX_PLX
173 #include <io/pciex/pcieb_plx.h>
174 #endif /* PX_PLX */
175 
176 /*LINTLIBRARY*/
177 
178 /* panic flag */
179 int pcieb_die = PF_ERR_FATAL_FLAGS;
180 int pcieb_disable_41210_wkarnd = 0;
181 
182 /* flag to turn on MSI support */
183 int pcieb_enable_msi = 1;
184 
185 #if defined(DEBUG)
186 uint_t pcieb_dbg_print = 0;
187 
188 static char *pcieb_debug_sym [] = {	/* same sequence as pcieb_debug_bit */
189 	/*  0 */ "attach",
190 	/*  1 */ "pwr",
191 	/*  2 */ "intr"
192 };
193 #endif /* DEBUG */
194 
195 static int pcieb_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *, off_t,
196 	off_t, caddr_t *);
197 static int pcieb_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
198 	void *);
199 static int pcieb_fm_init(pcieb_devstate_t *pcieb_p);
200 static void pcieb_fm_fini(pcieb_devstate_t *pcieb_p);
201 static int pcieb_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
202     ddi_iblock_cookie_t *ibc_p);
203 static int pcieb_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
204 	ddi_dma_attr_t *attr_p, int (*waitfp)(caddr_t), caddr_t arg,
205 	ddi_dma_handle_t *handlep);
206 static int pcieb_dma_mctl(dev_info_t *dip, dev_info_t *rdip,
207 	ddi_dma_handle_t handle, enum ddi_dma_ctlops cmd, off_t *offp,
208 	size_t *lenp, caddr_t *objp, uint_t cache_flags);
209 static int pcieb_intr_ops(dev_info_t *dip, dev_info_t *rdip,
210 	ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
211 
212 static struct bus_ops pcieb_bus_ops = {
213 	BUSO_REV,
214 	pcieb_bus_map,
215 	0,
216 	0,
217 	0,
218 	i_ddi_map_fault,
219 	0,
220 	pcieb_dma_allochdl,
221 	ddi_dma_freehdl,
222 	ddi_dma_bindhdl,
223 	ddi_dma_unbindhdl,
224 	ddi_dma_flush,
225 	ddi_dma_win,
226 	pcieb_dma_mctl,
227 	pcieb_ctlops,
228 	ddi_bus_prop_op,
229 	ndi_busop_get_eventcookie,	/* (*bus_get_eventcookie)();	*/
230 	ndi_busop_add_eventcall,	/* (*bus_add_eventcall)();	*/
231 	ndi_busop_remove_eventcall,	/* (*bus_remove_eventcall)();	*/
232 	ndi_post_event,			/* (*bus_post_event)();		*/
233 	NULL,				/* (*bus_intr_ctl)();		*/
234 	NULL,				/* (*bus_config)();		*/
235 	NULL,				/* (*bus_unconfig)();		*/
236 	pcieb_fm_init_child,		/* (*bus_fm_init)();		*/
237 	NULL,				/* (*bus_fm_fini)();		*/
238 	i_ndi_busop_access_enter,	/* (*bus_fm_access_enter)();	*/
239 	i_ndi_busop_access_exit,	/* (*bus_fm_access_exit)();	*/
240 	pcie_bus_power,			/* (*bus_power)();		*/
241 	pcieb_intr_ops,			/* (*bus_intr_op)();		*/
242 	pcie_hp_common_ops		/* (*bus_hp_op)();		*/
243 };
244 
245 static int	pcieb_open(dev_t *, int, int, cred_t *);
246 static int	pcieb_close(dev_t, int, int, cred_t *);
247 static int	pcieb_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
248 static int	pcieb_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
249 static uint_t	pcieb_intr_handler(caddr_t arg1, caddr_t arg2);
250 
251 /* PM related functions */
252 static int	pcieb_pwr_setup(dev_info_t *dip);
253 static int	pcieb_pwr_init_and_raise(dev_info_t *dip, pcie_pwr_t *pwr_p);
254 static void	pcieb_pwr_teardown(dev_info_t *dip);
255 static int	pcieb_pwr_disable(dev_info_t *dip);
256 
257 /* Hotplug related functions */
258 static void pcieb_id_props(pcieb_devstate_t *pcieb);
259 
260 /*
261  * soft state pointer
262  */
263 void *pcieb_state;
264 
265 static struct cb_ops pcieb_cb_ops = {
266 	pcieb_open,			/* open */
267 	pcieb_close,			/* close */
268 	nodev,				/* strategy */
269 	nodev,				/* print */
270 	nodev,				/* dump */
271 	nodev,				/* read */
272 	nodev,				/* write */
273 	pcieb_ioctl,			/* ioctl */
274 	nodev,				/* devmap */
275 	nodev,				/* mmap */
276 	nodev,				/* segmap */
277 	nochpoll,			/* poll */
278 	pcie_prop_op,			/* cb_prop_op */
279 	NULL,				/* streamtab */
280 	D_NEW | D_MP | D_HOTPLUG,	/* Driver compatibility flag */
281 	CB_REV,				/* rev */
282 	nodev,				/* int (*cb_aread)() */
283 	nodev				/* int (*cb_awrite)() */
284 };
285 
286 static int	pcieb_probe(dev_info_t *);
287 static int	pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
288 static int	pcieb_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
289 
290 static struct dev_ops pcieb_ops = {
291 	DEVO_REV,		/* devo_rev */
292 	0,			/* refcnt  */
293 	pcieb_info,		/* info */
294 	nulldev,		/* identify */
295 	pcieb_probe,		/* probe */
296 	pcieb_attach,		/* attach */
297 	pcieb_detach,		/* detach */
298 	nulldev,		/* reset */
299 	&pcieb_cb_ops,		/* driver operations */
300 	&pcieb_bus_ops,		/* bus operations */
301 	pcie_power,		/* power */
302 	ddi_quiesce_not_needed,		/* quiesce */
303 };
304 
305 /*
306  * Module linkage information for the kernel.
307  */
308 
309 static struct modldrv modldrv = {
310 	&mod_driverops, /* Type of module */
311 	"PCIe bridge/switch driver",
312 	&pcieb_ops,	/* driver ops */
313 };
314 
315 static struct modlinkage modlinkage = {
316 	MODREV_1,
317 	(void *)&modldrv,
318 	NULL
319 };
320 
321 /*
322  * forward function declarations:
323  */
324 static void	pcieb_uninitchild(dev_info_t *);
325 static int	pcieb_initchild(dev_info_t *child);
326 static void	pcieb_create_ranges_prop(dev_info_t *, ddi_acc_handle_t);
327 static boolean_t pcieb_is_pcie_device_type(dev_info_t *dip);
328 
329 /* interrupt related declarations */
330 static int	pcieb_msi_supported(dev_info_t *);
331 static int	pcieb_intr_attach(pcieb_devstate_t *pcieb);
332 static int	pcieb_intr_init(pcieb_devstate_t *pcieb_p, int intr_type);
333 static void	pcieb_intr_fini(pcieb_devstate_t *pcieb_p);
334 
335 int
336 _init(void)
337 {
338 	int e;
339 
340 	if ((e = ddi_soft_state_init(&pcieb_state, sizeof (pcieb_devstate_t),
341 	    1)) == 0 && (e = mod_install(&modlinkage)) != 0)
342 		ddi_soft_state_fini(&pcieb_state);
343 	return (e);
344 }
345 
346 int
347 _fini(void)
348 {
349 	int e;
350 
351 	if ((e = mod_remove(&modlinkage)) == 0) {
352 		ddi_soft_state_fini(&pcieb_state);
353 	}
354 	return (e);
355 }
356 
357 int
358 _info(struct modinfo *modinfop)
359 {
360 	return (mod_info(&modlinkage, modinfop));
361 }
362 
363 /* ARGSUSED */
364 static int
365 pcieb_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
366 {
367 	minor_t		minor = getminor((dev_t)arg);
368 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
369 	pcieb_devstate_t *pcieb = ddi_get_soft_state(pcieb_state, instance);
370 	int		ret = DDI_SUCCESS;
371 
372 	switch (infocmd) {
373 	case DDI_INFO_DEVT2INSTANCE:
374 		*result = (void *)(intptr_t)instance;
375 		break;
376 	case DDI_INFO_DEVT2DEVINFO:
377 		if (pcieb == NULL) {
378 			ret = DDI_FAILURE;
379 			break;
380 		}
381 
382 		*result = (void *)pcieb->pcieb_dip;
383 		break;
384 	default:
385 		ret = DDI_FAILURE;
386 		break;
387 	}
388 
389 	return (ret);
390 }
391 
392 
393 /*ARGSUSED*/
394 static int
395 pcieb_probe(dev_info_t *devi)
396 {
397 	return (DDI_PROBE_SUCCESS);
398 }
399 
400 /*
401  * This is a workaround for an undocumented HW erratum with the
402  * multi-function, F0 and F2, Intel 41210 PCIe-to-PCI bridge. When
403  * Fn (cdip) attaches, this workaround is called to initialize Fn's
404  * sibling (sdip) with MPS/MRRS if it isn't already configured.
405  * Doing so prevents a malformed TLP panic.
406  */
407 static void
408 pcieb_41210_mps_wkrnd(dev_info_t *cdip)
409 {
410 	dev_info_t *sdip;
411 	ddi_acc_handle_t cfg_hdl;
412 	uint16_t cdip_dev_ctrl, cdip_mrrs_mps;
413 	pcie_bus_t *cdip_bus_p = PCIE_DIP2BUS(cdip);
414 
415 	/* Get cdip's MPS/MRRS already setup by pcie_initchild_mps() */
416 	ASSERT(cdip_bus_p);
417 	cdip_dev_ctrl  = PCIE_CAP_GET(16, cdip_bus_p, PCIE_DEVCTL);
418 	cdip_mrrs_mps  = cdip_dev_ctrl &
419 	    (PCIE_DEVCTL_MAX_READ_REQ_MASK | PCIE_DEVCTL_MAX_PAYLOAD_MASK);
420 
421 	/* Locate sdip and set its MPS/MRRS when applicable */
422 	for (sdip = ddi_get_child(ddi_get_parent(cdip)); sdip;
423 	    sdip = ddi_get_next_sibling(sdip)) {
424 		uint16_t sdip_dev_ctrl, sdip_mrrs_mps, cap_ptr;
425 		uint32_t bus_dev_ven_id;
426 
427 		if (sdip == cdip || pci_config_setup(sdip, &cfg_hdl)
428 		    != DDI_SUCCESS)
429 			continue;
430 
431 		/* must be an Intel 41210 bridge */
432 		bus_dev_ven_id = pci_config_get32(cfg_hdl, PCI_CONF_VENID);
433 		if (!PCIEB_IS_41210_BRIDGE(bus_dev_ven_id)) {
434 			pci_config_teardown(&cfg_hdl);
435 			continue;
436 		}
437 
438 		if (PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)
439 		    != DDI_SUCCESS) {
440 			pci_config_teardown(&cfg_hdl);
441 			continue;
442 		}
443 
444 		/* get sdip's MPS/MRRS to compare to cdip's */
445 		sdip_dev_ctrl = PCI_CAP_GET16(cfg_hdl, 0, cap_ptr,
446 		    PCIE_DEVCTL);
447 		sdip_mrrs_mps = sdip_dev_ctrl &
448 		    (PCIE_DEVCTL_MAX_READ_REQ_MASK |
449 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK);
450 
451 		/* if sdip already attached then its MPS/MRRS is configured */
452 		if (i_ddi_devi_attached(sdip)) {
453 			ASSERT(sdip_mrrs_mps == cdip_mrrs_mps);
454 			pci_config_teardown(&cfg_hdl);
455 			continue;
456 		}
457 
458 		/* otherwise, update sdip's MPS/MRRS if different from cdip's */
459 		if (sdip_mrrs_mps != cdip_mrrs_mps) {
460 			sdip_dev_ctrl = (sdip_dev_ctrl &
461 			    ~(PCIE_DEVCTL_MAX_READ_REQ_MASK |
462 			    PCIE_DEVCTL_MAX_PAYLOAD_MASK)) | cdip_mrrs_mps;
463 
464 			(void) PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL,
465 			    sdip_dev_ctrl);
466 		}
467 
468 		/*
469 		 * note: sdip's bus_mps will be updated by
470 		 * pcie_initchild_mps()
471 		 */
472 
473 		pci_config_teardown(&cfg_hdl);
474 
475 		break;
476 	}
477 }
478 
479 static int
480 pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
481 {
482 	int			instance;
483 	char			device_type[8];
484 	pcieb_devstate_t	*pcieb;
485 	pcie_bus_t		*bus_p = PCIE_DIP2UPBUS(devi);
486 	ddi_acc_handle_t	config_handle = bus_p->bus_cfg_hdl;
487 
488 	switch (cmd) {
489 	case DDI_RESUME:
490 		(void) pcie_pwr_resume(devi);
491 		return (DDI_SUCCESS);
492 
493 	default:
494 		return (DDI_FAILURE);
495 
496 	case DDI_ATTACH:
497 		break;
498 	}
499 
500 	if (!(PCIE_IS_BDG(bus_p))) {
501 		PCIEB_DEBUG(DBG_ATTACH, devi, "This is not a switch or"
502 		" bridge\n");
503 		return (DDI_FAILURE);
504 	}
505 
506 	/*
507 	 * If PCIE_LINKCTL_LINK_DISABLE bit in the PCIe Config
508 	 * Space (PCIe Capability Link Control Register) is set,
509 	 * then do not bind the driver.
510 	 */
511 	if (PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL) & PCIE_LINKCTL_LINK_DISABLE)
512 		return (DDI_FAILURE);
513 
514 	/*
515 	 * Allocate and get soft state structure.
516 	 */
517 	instance = ddi_get_instance(devi);
518 	if (ddi_soft_state_zalloc(pcieb_state, instance) != DDI_SUCCESS)
519 		return (DDI_FAILURE);
520 	pcieb = ddi_get_soft_state(pcieb_state, instance);
521 	pcieb->pcieb_dip = devi;
522 
523 	if ((pcieb_fm_init(pcieb)) != DDI_SUCCESS) {
524 		PCIEB_DEBUG(DBG_ATTACH, devi, "Failed in pcieb_fm_init\n");
525 		goto fail;
526 	}
527 	pcieb->pcieb_init_flags |= PCIEB_INIT_FM;
528 
529 	mutex_init(&pcieb->pcieb_mutex, NULL, MUTEX_DRIVER, NULL);
530 	mutex_init(&pcieb->pcieb_err_mutex, NULL, MUTEX_DRIVER,
531 	    (void *)pcieb->pcieb_fm_ibc);
532 	mutex_init(&pcieb->pcieb_peek_poke_mutex, NULL, MUTEX_DRIVER,
533 	    (void *)pcieb->pcieb_fm_ibc);
534 
535 	/* create special properties for device identification */
536 	pcieb_id_props(pcieb);
537 
538 	/*
539 	 * Power management setup. This also makes sure that switch/bridge
540 	 * is at D0 during attach.
541 	 */
542 	if (pwr_common_setup(devi) != DDI_SUCCESS) {
543 		PCIEB_DEBUG(DBG_PWR, devi, "pwr_common_setup failed\n");
544 		goto fail;
545 	}
546 
547 	if (pcieb_pwr_setup(devi) != DDI_SUCCESS) {
548 		PCIEB_DEBUG(DBG_PWR, devi, "pxb_pwr_setup failed \n");
549 		goto fail;
550 	}
551 
552 	/*
553 	 * Make sure the "device_type" property exists.
554 	 */
555 	if (pcieb_is_pcie_device_type(devi))
556 		(void) strcpy(device_type, "pciex");
557 	else
558 		(void) strcpy(device_type, "pci");
559 
560 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
561 	    "device_type", device_type);
562 
563 	/*
564 	 * Check whether the "ranges" property is present.
565 	 * Otherwise create the ranges property by reading
566 	 * the configuration registers
567 	 */
568 	if (ddi_prop_exists(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
569 	    "ranges") == 0) {
570 		pcieb_create_ranges_prop(devi, config_handle);
571 	}
572 
573 	if (PCIE_IS_PCI_BDG(bus_p))
574 		pcieb_set_pci_perf_parameters(devi, config_handle);
575 
576 #ifdef PX_PLX
577 	pcieb_attach_plx_workarounds(pcieb);
578 #endif /* PX_PLX */
579 
580 	if (pcie_init(devi, NULL) != DDI_SUCCESS)
581 		goto fail;
582 
583 	/* Intel PCIe-to-PCI 41210 bridge workaround -- if applicable */
584 	if (pcieb_disable_41210_wkarnd == 0 &&
585 	    PCIEB_IS_41210_BRIDGE(bus_p->bus_dev_ven_id))
586 		pcieb_41210_mps_wkrnd(devi);
587 
588 	/*
589 	 * Initialize interrupt handlers. Ignore return value.
590 	 */
591 	(void) pcieb_intr_attach(pcieb);
592 
593 	(void) pcie_hpintr_enable(devi);
594 
595 	(void) pcie_link_bw_enable(devi);
596 
597 	/* Do any platform specific workarounds needed at this time */
598 	pcieb_plat_attach_workaround(devi);
599 
600 	/*
601 	 * If this is a root port, we need to go through and at this point in
602 	 * time set up and initialize all fabric-wide settings such as the max
603 	 * packet size, tagging, etc. Since this will involve scanning the
604 	 * fabric, all error enabling and sw workarounds should be in place
605 	 * before doing this. For hotplug-capable bridges, this will happen
606 	 * again when a hotplug event occurs. See the pcie theory statement in
607 	 * uts/common/io/pciex/pcie.c for more information.
608 	 */
609 	if (PCIE_IS_RP(bus_p))
610 		pcie_fabric_setup(devi);
611 
612 	ddi_report_dev(devi);
613 	return (DDI_SUCCESS);
614 
615 fail:
616 	(void) pcieb_detach(devi, DDI_DETACH);
617 	return (DDI_FAILURE);
618 }
619 
620 static int
621 pcieb_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
622 {
623 	pcieb_devstate_t *pcieb;
624 	int error = DDI_SUCCESS;
625 
626 	switch (cmd) {
627 	case DDI_SUSPEND:
628 		error = pcie_pwr_suspend(devi);
629 		return (error);
630 
631 	case DDI_DETACH:
632 		break;
633 
634 	default:
635 		return (DDI_FAILURE);
636 	}
637 
638 	pcieb = ddi_get_soft_state(pcieb_state, ddi_get_instance(devi));
639 
640 	/* disable hotplug interrupt */
641 	(void) pcie_hpintr_disable(devi);
642 
643 	/* remove interrupt handlers */
644 	pcieb_intr_fini(pcieb);
645 
646 	/* uninitialize inband PCI-E HPC if present */
647 	(void) pcie_uninit(devi);
648 
649 	(void) ddi_prop_remove(DDI_DEV_T_NONE, devi, "device_type");
650 
651 	(void) ndi_prop_remove(DDI_DEV_T_NONE, pcieb->pcieb_dip,
652 	    "pcie_ce_mask");
653 
654 	if (pcieb->pcieb_init_flags & PCIEB_INIT_FM)
655 		pcieb_fm_fini(pcieb);
656 
657 	pcieb_pwr_teardown(devi);
658 	pwr_common_teardown(devi);
659 
660 	mutex_destroy(&pcieb->pcieb_peek_poke_mutex);
661 	mutex_destroy(&pcieb->pcieb_err_mutex);
662 	mutex_destroy(&pcieb->pcieb_mutex);
663 
664 	/*
665 	 * And finally free the per-pci soft state.
666 	 */
667 	ddi_soft_state_free(pcieb_state, ddi_get_instance(devi));
668 
669 	return (DDI_SUCCESS);
670 }
671 
672 static int
673 pcieb_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
674     off_t offset, off_t len, caddr_t *vaddrp)
675 {
676 	dev_info_t *pdip;
677 
678 	if (PCIE_IS_RP(PCIE_DIP2BUS(dip)) && mp->map_handlep != NULL) {
679 		ddi_acc_impl_t *hdlp =
680 		    (ddi_acc_impl_t *)(mp->map_handlep)->ah_platform_private;
681 
682 		pcieb_set_prot_scan(dip, hdlp);
683 	}
684 	pdip = (dev_info_t *)DEVI(dip)->devi_parent;
685 	return ((DEVI(pdip)->devi_ops->devo_bus_ops->bus_map)(pdip, rdip, mp,
686 	    offset, len, vaddrp));
687 }
688 
689 static int
690 pcieb_ctlops(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
691     void *arg, void *result)
692 {
693 	pci_regspec_t *drv_regp;
694 	int	reglen;
695 	int	rn;
696 	int	totreg;
697 	pcieb_devstate_t *pcieb = ddi_get_soft_state(pcieb_state,
698 	    ddi_get_instance(dip));
699 	struct detachspec *ds;
700 	struct attachspec *as;
701 
702 	switch (ctlop) {
703 	case DDI_CTLOPS_REPORTDEV:
704 		if (rdip == (dev_info_t *)0)
705 			return (DDI_FAILURE);
706 
707 		if (ddi_get_parent(rdip) == dip) {
708 			cmn_err(CE_CONT, "?PCIE-device: %s@%s, %s%d\n",
709 			    ddi_node_name(rdip), ddi_get_name_addr(rdip),
710 			    ddi_driver_name(rdip), ddi_get_instance(rdip));
711 		}
712 
713 		/* Pass it up for fabric sync */
714 		(void) ddi_ctlops(dip, rdip, ctlop, arg, result);
715 		return (DDI_SUCCESS);
716 
717 	case DDI_CTLOPS_INITCHILD:
718 		return (pcieb_initchild((dev_info_t *)arg));
719 
720 	case DDI_CTLOPS_UNINITCHILD:
721 		pcieb_uninitchild((dev_info_t *)arg);
722 		return (DDI_SUCCESS);
723 
724 	case DDI_CTLOPS_SIDDEV:
725 		return (DDI_SUCCESS);
726 
727 	case DDI_CTLOPS_REGSIZE:
728 	case DDI_CTLOPS_NREGS:
729 		if (rdip == (dev_info_t *)0)
730 			return (DDI_FAILURE);
731 		break;
732 
733 	case DDI_CTLOPS_PEEK:
734 	case DDI_CTLOPS_POKE:
735 		return (pcieb_plat_peekpoke(dip, rdip, ctlop, arg, result));
736 	case DDI_CTLOPS_ATTACH:
737 		if (!pcie_is_child(dip, rdip))
738 			return (DDI_SUCCESS);
739 
740 		as = (struct attachspec *)arg;
741 		switch (as->when) {
742 		case DDI_PRE:
743 			if (as->cmd == DDI_RESUME) {
744 				pcie_clear_errors(rdip);
745 				if (pcieb_plat_ctlops(rdip, ctlop, arg) !=
746 				    DDI_SUCCESS)
747 					return (DDI_FAILURE);
748 			}
749 
750 			if (as->cmd == DDI_ATTACH)
751 				return (pcie_pm_hold(dip));
752 
753 			return (DDI_SUCCESS);
754 
755 		case DDI_POST:
756 			if (as->cmd == DDI_ATTACH &&
757 			    as->result != DDI_SUCCESS) {
758 				/*
759 				 * Attach failed for the child device. The child
760 				 * driver may have made PM calls before the
761 				 * attach failed. pcie_pm_remove_child() should
762 				 * cleanup PM state and holds (if any)
763 				 * associated with the child device.
764 				 */
765 				return (pcie_pm_remove_child(dip, rdip));
766 			}
767 
768 			if (as->result == DDI_SUCCESS) {
769 				pf_init(rdip, (void *)pcieb->pcieb_fm_ibc,
770 				    as->cmd);
771 
772 				(void) pcieb_plat_ctlops(rdip, ctlop, arg);
773 			}
774 
775 			/*
776 			 * For empty hotplug-capable slots, we should explicitly
777 			 * disable the errors, so that we won't panic upon
778 			 * unsupported hotplug messages.
779 			 */
780 			if ((!ddi_prop_exists(DDI_DEV_T_ANY, rdip,
781 			    DDI_PROP_DONTPASS, "hotplug-capable")) ||
782 			    ddi_get_child(rdip)) {
783 				(void) pcie_postattach_child(rdip);
784 				return (DDI_SUCCESS);
785 			}
786 
787 			pcie_disable_errors(rdip);
788 
789 			return (DDI_SUCCESS);
790 		default:
791 			break;
792 		}
793 		return (DDI_SUCCESS);
794 
795 	case DDI_CTLOPS_DETACH:
796 		if (!pcie_is_child(dip, rdip))
797 			return (DDI_SUCCESS);
798 
799 		ds = (struct detachspec *)arg;
800 		switch (ds->when) {
801 		case DDI_PRE:
802 			pf_fini(rdip, ds->cmd);
803 			return (DDI_SUCCESS);
804 
805 		case DDI_POST:
806 			if (pcieb_plat_ctlops(rdip, ctlop, arg) != DDI_SUCCESS)
807 				return (DDI_FAILURE);
808 			if (ds->cmd == DDI_DETACH &&
809 			    ds->result == DDI_SUCCESS) {
810 				return (pcie_pm_remove_child(dip, rdip));
811 			}
812 			return (DDI_SUCCESS);
813 		default:
814 			break;
815 		}
816 		return (DDI_SUCCESS);
817 	default:
818 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
819 	}
820 
821 	*(int *)result = 0;
822 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip,
823 	    DDI_PROP_DONTPASS | DDI_PROP_CANSLEEP, "reg", (caddr_t)&drv_regp,
824 	    &reglen) != DDI_SUCCESS)
825 		return (DDI_FAILURE);
826 
827 	totreg = reglen / sizeof (pci_regspec_t);
828 	if (ctlop == DDI_CTLOPS_NREGS)
829 		*(int *)result = totreg;
830 	else if (ctlop == DDI_CTLOPS_REGSIZE) {
831 		rn = *(int *)arg;
832 		if (rn >= totreg) {
833 			kmem_free(drv_regp, reglen);
834 			return (DDI_FAILURE);
835 		}
836 
837 		*(off_t *)result = drv_regp[rn].pci_size_low |
838 		    ((uint64_t)drv_regp[rn].pci_size_hi << 32);
839 	}
840 
841 	kmem_free(drv_regp, reglen);
842 	return (DDI_SUCCESS);
843 }
844 
845 /*
846  * name_child
847  *
848  * This function is called from init_child to name a node. It is
849  * also passed as a callback for node merging functions.
850  *
851  * return value: DDI_SUCCESS, DDI_FAILURE
852  */
853 static int
854 pcieb_name_child(dev_info_t *child, char *name, int namelen)
855 {
856 	pci_regspec_t *pci_rp;
857 	uint_t device, func;
858 	char **unit_addr;
859 	uint_t n;
860 
861 	/*
862 	 * For .conf nodes, use unit-address property as name
863 	 */
864 	if (ndi_dev_is_persistent_node(child) == 0) {
865 		if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, child,
866 		    DDI_PROP_DONTPASS, "unit-address", &unit_addr, &n) !=
867 		    DDI_PROP_SUCCESS) {
868 			cmn_err(CE_WARN,
869 			    "cannot find unit-address in %s.conf",
870 			    ddi_driver_name(child));
871 			return (DDI_FAILURE);
872 		}
873 		if (n != 1 || *unit_addr == NULL || **unit_addr == 0) {
874 			cmn_err(CE_WARN, "unit-address property in %s.conf"
875 			    " not well-formed", ddi_driver_name(child));
876 			ddi_prop_free(unit_addr);
877 			return (DDI_FAILURE);
878 		}
879 		(void) snprintf(name, namelen, "%s", *unit_addr);
880 		ddi_prop_free(unit_addr);
881 		return (DDI_SUCCESS);
882 	}
883 
884 	/*
885 	 * Get the address portion of the node name based on
886 	 * the function and device number.
887 	 */
888 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, child,
889 	    DDI_PROP_DONTPASS, "reg", (int **)&pci_rp, &n) != DDI_SUCCESS) {
890 		return (DDI_FAILURE);
891 	}
892 
893 	/* copy the device identifications */
894 	device = PCI_REG_DEV_G(pci_rp[0].pci_phys_hi);
895 	func = PCI_REG_FUNC_G(pci_rp[0].pci_phys_hi);
896 
897 	if (pcie_ari_is_enabled(ddi_get_parent(child))
898 	    == PCIE_ARI_FORW_ENABLED) {
899 		func = (device << 3) | func;
900 		device = 0;
901 	}
902 
903 	if (func != 0)
904 		(void) snprintf(name, namelen, "%x,%x", device, func);
905 	else
906 		(void) snprintf(name, namelen, "%x", device);
907 
908 	ddi_prop_free(pci_rp);
909 	return (DDI_SUCCESS);
910 }
911 
912 static int
913 pcieb_initchild(dev_info_t *child)
914 {
915 	char name[MAXNAMELEN];
916 	int result = DDI_FAILURE;
917 	pcieb_devstate_t *pcieb =
918 	    (pcieb_devstate_t *)ddi_get_soft_state(pcieb_state,
919 	    ddi_get_instance(ddi_get_parent(child)));
920 
921 	/*
922 	 * Name the child
923 	 */
924 	if (pcieb_name_child(child, name, MAXNAMELEN) != DDI_SUCCESS) {
925 		result = DDI_FAILURE;
926 		goto done;
927 	}
928 	ddi_set_name_addr(child, name);
929 
930 	/*
931 	 * Pseudo nodes indicate a prototype node with per-instance
932 	 * properties to be merged into the real h/w device node.
933 	 * The interpretation of the unit-address is DD[,F]
934 	 * where DD is the device id and F is the function.
935 	 */
936 	if (ndi_dev_is_persistent_node(child) == 0) {
937 		extern int pci_allow_pseudo_children;
938 
939 		/*
940 		 * Try to merge the properties from this prototype
941 		 * node into real h/w nodes.
942 		 */
943 		if (ndi_merge_node(child, pcieb_name_child) == DDI_SUCCESS) {
944 			/*
945 			 * Merged ok - return failure to remove the node.
946 			 */
947 			ddi_set_name_addr(child, NULL);
948 			result = DDI_FAILURE;
949 			goto done;
950 		}
951 
952 		/* workaround for ddivs to run under PCI-E */
953 		if (pci_allow_pseudo_children) {
954 			result = DDI_SUCCESS;
955 			goto done;
956 		}
957 
958 		/*
959 		 * The child was not merged into a h/w node,
960 		 * but there's not much we can do with it other
961 		 * than return failure to cause the node to be removed.
962 		 */
963 		cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged",
964 		    ddi_driver_name(child), ddi_get_name_addr(child),
965 		    ddi_driver_name(child));
966 		ddi_set_name_addr(child, NULL);
967 		result = DDI_NOT_WELL_FORMED;
968 		goto done;
969 	}
970 
971 	/* platform specific initchild */
972 	pcieb_plat_initchild(child);
973 
974 	if (pcie_pm_hold(pcieb->pcieb_dip) != DDI_SUCCESS) {
975 		PCIEB_DEBUG(DBG_PWR, pcieb->pcieb_dip,
976 		    "INITCHILD: px_pm_hold failed\n");
977 		result = DDI_FAILURE;
978 		goto done;
979 	}
980 	/* Any return from here must call pcie_pm_release */
981 
982 	/*
983 	 * If configuration registers were previously saved by
984 	 * child (before it entered D3), then let the child do the
985 	 * restore to set up the config regs as it'll first need to
986 	 * power the device out of D3.
987 	 */
988 	if (ddi_prop_exists(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
989 	    "config-regs-saved-by-child") == 1) {
990 		PCIEB_DEBUG(DBG_PWR, ddi_get_parent(child),
991 		    "INITCHILD: config regs to be restored by child"
992 		    " for %s@%s\n", ddi_node_name(child),
993 		    ddi_get_name_addr(child));
994 
995 		result = DDI_SUCCESS;
996 		goto cleanup;
997 	}
998 
999 	PCIEB_DEBUG(DBG_PWR, ddi_get_parent(child),
1000 	    "INITCHILD: config regs setup for %s@%s\n",
1001 	    ddi_node_name(child), ddi_get_name_addr(child));
1002 
1003 	pcie_init_dom(child);
1004 
1005 	if (pcie_initchild(child) != DDI_SUCCESS) {
1006 		result = DDI_FAILURE;
1007 		pcie_fini_dom(child);
1008 		goto cleanup;
1009 	}
1010 
1011 #ifdef PX_PLX
1012 	if (pcieb_init_plx_workarounds(pcieb, child) == DDI_FAILURE) {
1013 		result = DDI_FAILURE;
1014 		pcie_fini_dom(child);
1015 		goto cleanup;
1016 	}
1017 #endif /* PX_PLX */
1018 
1019 	result = DDI_SUCCESS;
1020 cleanup:
1021 	pcie_pm_release(pcieb->pcieb_dip);
1022 done:
1023 	return (result);
1024 }
1025 
1026 static void
1027 pcieb_uninitchild(dev_info_t *dip)
1028 {
1029 
1030 	pcie_uninitchild(dip);
1031 
1032 	pcieb_plat_uninitchild(dip);
1033 
1034 	ddi_set_name_addr(dip, NULL);
1035 
1036 	/*
1037 	 * Strip the node to properly convert it back to prototype form
1038 	 */
1039 	ddi_remove_minor_node(dip, NULL);
1040 
1041 	ddi_prop_remove_all(dip);
1042 }
1043 
1044 static boolean_t
1045 pcieb_is_pcie_device_type(dev_info_t *dip)
1046 {
1047 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1048 
1049 	if (PCIE_IS_SW(bus_p) || PCIE_IS_RP(bus_p) || PCIE_IS_PCI2PCIE(bus_p))
1050 		return (B_TRUE);
1051 
1052 	return (B_FALSE);
1053 }
1054 
1055 static int
1056 pcieb_intr_attach(pcieb_devstate_t *pcieb)
1057 {
1058 	int			intr_types;
1059 	dev_info_t		*dip = pcieb->pcieb_dip;
1060 
1061 	/* Allow platform specific code to do any initialization first */
1062 	pcieb_plat_intr_attach(pcieb);
1063 
1064 	/*
1065 	 * Initialize interrupt handlers.
1066 	 * If both MSI and FIXED are supported, try to attach MSI first.
1067 	 * If MSI fails for any reason, then try FIXED, but only allow one
1068 	 * type to be attached.
1069 	 */
1070 	if (ddi_intr_get_supported_types(dip, &intr_types) != DDI_SUCCESS) {
1071 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_supported_types"
1072 		    " failed\n");
1073 		goto FAIL;
1074 	}
1075 
1076 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
1077 	    (pcieb_msi_supported(dip) == DDI_SUCCESS)) {
1078 		if (pcieb_intr_init(pcieb, DDI_INTR_TYPE_MSI) == DDI_SUCCESS)
1079 			intr_types = DDI_INTR_TYPE_MSI;
1080 		else {
1081 			PCIEB_DEBUG(DBG_ATTACH, dip, "Unable to attach MSI"
1082 			    " handler\n");
1083 		}
1084 	}
1085 
1086 	if (intr_types != DDI_INTR_TYPE_MSI) {
1087 		/*
1088 		 * MSIs are not supported or MSI initialization failed. For Root
1089 		 * Ports mark this so error handling might try to fallback to
1090 		 * some other mechanism if available (machinecheck etc.).
1091 		 */
1092 		if (PCIE_IS_RP(PCIE_DIP2UPBUS(dip)))
1093 			pcieb->pcieb_no_aer_msi = B_TRUE;
1094 	}
1095 
1096 	if (intr_types & DDI_INTR_TYPE_FIXED) {
1097 		if (pcieb_intr_init(pcieb, DDI_INTR_TYPE_FIXED) !=
1098 		    DDI_SUCCESS) {
1099 			PCIEB_DEBUG(DBG_ATTACH, dip,
1100 			    "Unable to attach INTx handler\n");
1101 			goto FAIL;
1102 		}
1103 	}
1104 	return (DDI_SUCCESS);
1105 
1106 FAIL:
1107 	return (DDI_FAILURE);
1108 }
1109 
1110 /*
1111  * This function initializes internally generated interrupts only.
1112  * It does not affect any interrupts generated by downstream devices
1113  * or the forwarding of them.
1114  *
1115  * Enable Device Specific Interrupts or Hotplug features here.
1116  * Enabling features may change how many interrupts are requested
1117  * by the device.  If features are not enabled first, the
1118  * device might not ask for any interrupts.
1119  */
1120 static int
1121 pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type)
1122 {
1123 	dev_info_t	*dip = pcieb->pcieb_dip;
1124 	int		nintrs, request, count, x;
1125 	int		intr_cap = 0;
1126 	int		inum = 0;
1127 	int		ret;
1128 	pcie_bus_t	*bus_p = PCIE_DIP2UPBUS(dip);
1129 	uint16_t	vendorid = bus_p->bus_dev_ven_id & 0xFFFF;
1130 	boolean_t	is_hp = B_FALSE;
1131 	boolean_t	is_pme = B_FALSE;
1132 	boolean_t	is_lbw = B_FALSE;
1133 
1134 	PCIEB_DEBUG(DBG_ATTACH, dip, "pcieb_intr_init: Attaching %s handler\n",
1135 	    (intr_type == DDI_INTR_TYPE_MSI) ? "MSI" : "INTx");
1136 
1137 	request = 0;
1138 	if (PCIE_IS_HOTPLUG_ENABLED(dip)) {
1139 		is_hp = B_TRUE;
1140 	}
1141 
1142 	if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p) &&
1143 	    (vendorid == NVIDIA_VENDOR_ID)) {
1144 		is_pme = B_TRUE;
1145 	}
1146 
1147 	if (intr_type == DDI_INTR_TYPE_MSI && pcie_link_bw_supported(dip)) {
1148 		is_lbw = B_TRUE;
1149 	}
1150 
1151 	/*
1152 	 * The hot-plug, link bandwidth, and power management events all are
1153 	 * based on the PCI Express capability. Therefore, they all share their
1154 	 * own interrupt.
1155 	 */
1156 	if (is_hp || is_pme || is_lbw) {
1157 		request++;
1158 	}
1159 
1160 	/*
1161 	 * If this device is a root port, which means it can have MSI interrupts
1162 	 * enabled for AERs, then we need to request one.
1163 	 */
1164 	if (intr_type == DDI_INTR_TYPE_MSI) {
1165 		if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
1166 			request++;
1167 		}
1168 	}
1169 
1170 	if (request == 0)
1171 		return (DDI_SUCCESS);
1172 
1173 	/*
1174 	 * Get number of supported interrupts.
1175 	 *
1176 	 * Several Bridges/Switches will not have this property set, resulting
1177 	 * in a FAILURE, if the device is not configured in a way that
1178 	 * interrupts are needed. (eg. hotplugging)
1179 	 */
1180 	ret = ddi_intr_get_nintrs(dip, intr_type, &nintrs);
1181 	if ((ret != DDI_SUCCESS) || (nintrs == 0)) {
1182 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_nintrs ret:%d"
1183 		    " req:%d\n", ret, nintrs);
1184 		return (DDI_FAILURE);
1185 	}
1186 
1187 	PCIEB_DEBUG(DBG_ATTACH, dip, "bdf 0x%x: ddi_intr_get_nintrs: nintrs %d",
1188 	    " request %d\n", bus_p->bus_bdf, nintrs, request);
1189 
1190 	if (request > nintrs)
1191 		request = nintrs;
1192 
1193 	/* Allocate an array of interrupt handlers */
1194 	pcieb->pcieb_htable_size = sizeof (ddi_intr_handle_t) * request;
1195 	pcieb->pcieb_htable = kmem_zalloc(pcieb->pcieb_htable_size,
1196 	    KM_SLEEP);
1197 	pcieb->pcieb_init_flags |= PCIEB_INIT_HTABLE;
1198 
1199 	ret = ddi_intr_alloc(dip, pcieb->pcieb_htable, intr_type, inum,
1200 	    request, &count, DDI_INTR_ALLOC_NORMAL);
1201 	if ((ret != DDI_SUCCESS) || (count == 0)) {
1202 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_alloc() ret: %d ask: %d"
1203 		    " actual: %d\n", ret, request, count);
1204 		goto FAIL;
1205 	}
1206 	pcieb->pcieb_init_flags |= PCIEB_INIT_ALLOC;
1207 
1208 	/* Save the actual number of interrupts allocated */
1209 	pcieb->pcieb_intr_count = count;
1210 	if (count < request) {
1211 		PCIEB_DEBUG(DBG_ATTACH, dip, "bdf 0%x: Requested Intr: %d"
1212 		    " Received: %d\n", bus_p->bus_bdf, request, count);
1213 	}
1214 
1215 	/*
1216 	 * NVidia (MCP55 and other) chipsets have a errata that if the number
1217 	 * of requested MSI intrs is not allocated we have to fall back to INTx.
1218 	 */
1219 	if (intr_type == DDI_INTR_TYPE_MSI) {
1220 		if (PCIE_IS_RP(bus_p) && (vendorid == NVIDIA_VENDOR_ID)) {
1221 			if (request != count)
1222 				goto FAIL;
1223 		}
1224 	}
1225 
1226 	/* Get interrupt priority */
1227 	ret = ddi_intr_get_pri(pcieb->pcieb_htable[0],
1228 	    &pcieb->pcieb_intr_priority);
1229 	if (ret != DDI_SUCCESS) {
1230 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_pri() ret: %d\n",
1231 		    ret);
1232 		goto FAIL;
1233 	}
1234 
1235 	if (pcieb->pcieb_intr_priority >= LOCK_LEVEL) {
1236 		pcieb->pcieb_intr_priority = LOCK_LEVEL - 1;
1237 		ret = ddi_intr_set_pri(pcieb->pcieb_htable[0],
1238 		    pcieb->pcieb_intr_priority);
1239 		if (ret != DDI_SUCCESS) {
1240 			PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_set_pri() ret:"
1241 			" %d\n", ret);
1242 
1243 			goto FAIL;
1244 		}
1245 	}
1246 
1247 	mutex_init(&pcieb->pcieb_intr_mutex, NULL, MUTEX_DRIVER, NULL);
1248 
1249 	pcieb->pcieb_init_flags |= PCIEB_INIT_MUTEX;
1250 
1251 	for (count = 0; count < pcieb->pcieb_intr_count; count++) {
1252 		ret = ddi_intr_add_handler(pcieb->pcieb_htable[count],
1253 		    pcieb_intr_handler, (caddr_t)pcieb,
1254 		    (caddr_t)(uintptr_t)(inum + count));
1255 
1256 		if (ret != DDI_SUCCESS) {
1257 			PCIEB_DEBUG(DBG_ATTACH, dip, "Cannot add "
1258 			    "interrupt(%d)\n", ret);
1259 			break;
1260 		}
1261 	}
1262 
1263 	/* If unsucessful, remove the added handlers */
1264 	if (ret != DDI_SUCCESS) {
1265 		for (x = 0; x < count; x++) {
1266 			(void) ddi_intr_remove_handler(pcieb->pcieb_htable[x]);
1267 		}
1268 		goto FAIL;
1269 	}
1270 
1271 	pcieb->pcieb_init_flags |= PCIEB_INIT_HANDLER;
1272 
1273 	(void) ddi_intr_get_cap(pcieb->pcieb_htable[0], &intr_cap);
1274 
1275 	/*
1276 	 * Get this intr lock because we are not quite ready to handle
1277 	 * interrupts immediately after enabling it. The MSI multi register
1278 	 * gets programmed in ddi_intr_enable after which we need to get the
1279 	 * MSI offsets for Hotplug/AER.
1280 	 */
1281 	mutex_enter(&pcieb->pcieb_intr_mutex);
1282 
1283 	if (intr_cap & DDI_INTR_FLAG_BLOCK) {
1284 		(void) ddi_intr_block_enable(pcieb->pcieb_htable,
1285 		    pcieb->pcieb_intr_count);
1286 		pcieb->pcieb_init_flags |= PCIEB_INIT_BLOCK;
1287 	} else {
1288 		for (count = 0; count < pcieb->pcieb_intr_count; count++) {
1289 			(void) ddi_intr_enable(pcieb->pcieb_htable[count]);
1290 		}
1291 	}
1292 	pcieb->pcieb_init_flags |= PCIEB_INIT_ENABLE;
1293 
1294 	/* Save the interrupt type */
1295 	pcieb->pcieb_intr_type = intr_type;
1296 
1297 	/* Get the MSI offset for hotplug/PME from the PCIe cap reg */
1298 	if (intr_type == DDI_INTR_TYPE_MSI) {
1299 		uint16_t pcie_msi_off;
1300 		pcie_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0,
1301 		    bus_p->bus_pcie_off, PCIE_PCIECAP) &
1302 		    PCIE_PCIECAP_INT_MSG_NUM;
1303 
1304 		if (pcie_msi_off >= count) {
1305 			PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %u in PCIe "
1306 			    "cap > max allocated %d\n", pcie_msi_off, count);
1307 			mutex_exit(&pcieb->pcieb_intr_mutex);
1308 			goto FAIL;
1309 		}
1310 
1311 		if (is_hp) {
1312 			pcieb->pcieb_isr_tab[pcie_msi_off] |= PCIEB_INTR_SRC_HP;
1313 		}
1314 
1315 		if (is_pme) {
1316 			pcieb->pcieb_isr_tab[pcie_msi_off] |=
1317 			    PCIEB_INTR_SRC_PME;
1318 		}
1319 
1320 		if (is_lbw) {
1321 			pcieb->pcieb_isr_tab[pcie_msi_off] |=
1322 			    PCIEB_INTR_SRC_LBW;
1323 		}
1324 	} else {
1325 		/* INTx handles only Hotplug interrupts */
1326 		if (is_hp)
1327 			pcieb->pcieb_isr_tab[0] |= PCIEB_INTR_SRC_HP;
1328 	}
1329 
1330 
1331 	/*
1332 	 * Get the MSI offset for errors from the AER Root Error status
1333 	 * register.
1334 	 */
1335 	if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p)) {
1336 		if (PCIE_HAS_AER(bus_p)) {
1337 			int aer_msi_off;
1338 			aer_msi_off = (PCI_XCAP_GET32(bus_p->bus_cfg_hdl, 0,
1339 			    bus_p->bus_aer_off, PCIE_AER_RE_STS) >>
1340 			    PCIE_AER_RE_STS_MSG_NUM_SHIFT) &
1341 			    PCIE_AER_RE_STS_MSG_NUM_MASK;
1342 
1343 			if (aer_msi_off >= count) {
1344 				PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %d in"
1345 				    " AER cap > max allocated %d\n",
1346 				    aer_msi_off, count);
1347 				mutex_exit(&pcieb->pcieb_intr_mutex);
1348 				goto FAIL;
1349 			}
1350 			pcieb->pcieb_isr_tab[aer_msi_off] |= PCIEB_INTR_SRC_AER;
1351 		} else {
1352 			/*
1353 			 * This RP does not have AER. Fallback to the
1354 			 * SERR+Machinecheck approach if available.
1355 			 */
1356 			pcieb->pcieb_no_aer_msi = B_TRUE;
1357 		}
1358 	}
1359 
1360 	mutex_exit(&pcieb->pcieb_intr_mutex);
1361 	return (DDI_SUCCESS);
1362 
1363 FAIL:
1364 	pcieb_intr_fini(pcieb);
1365 	return (DDI_FAILURE);
1366 }
1367 
1368 static void
1369 pcieb_intr_fini(pcieb_devstate_t *pcieb)
1370 {
1371 	int x;
1372 	int count = pcieb->pcieb_intr_count;
1373 	int flags = pcieb->pcieb_init_flags;
1374 
1375 	if ((flags & PCIEB_INIT_ENABLE) &&
1376 	    (flags & PCIEB_INIT_BLOCK)) {
1377 		(void) ddi_intr_block_disable(pcieb->pcieb_htable, count);
1378 		flags &= ~(PCIEB_INIT_ENABLE |
1379 		    PCIEB_INIT_BLOCK);
1380 	}
1381 
1382 	if (flags & PCIEB_INIT_MUTEX)
1383 		mutex_destroy(&pcieb->pcieb_intr_mutex);
1384 
1385 	for (x = 0; x < count; x++) {
1386 		if (flags & PCIEB_INIT_ENABLE)
1387 			(void) ddi_intr_disable(pcieb->pcieb_htable[x]);
1388 
1389 		if (flags & PCIEB_INIT_HANDLER)
1390 			(void) ddi_intr_remove_handler(pcieb->pcieb_htable[x]);
1391 
1392 		if (flags & PCIEB_INIT_ALLOC)
1393 			(void) ddi_intr_free(pcieb->pcieb_htable[x]);
1394 	}
1395 
1396 	flags &= ~(PCIEB_INIT_ENABLE | PCIEB_INIT_HANDLER | PCIEB_INIT_ALLOC |
1397 	    PCIEB_INIT_MUTEX);
1398 
1399 	if (flags & PCIEB_INIT_HTABLE)
1400 		kmem_free(pcieb->pcieb_htable, pcieb->pcieb_htable_size);
1401 
1402 	flags &= ~PCIEB_INIT_HTABLE;
1403 
1404 	pcieb->pcieb_init_flags &= flags;
1405 }
1406 
1407 /*
1408  * Checks if this device needs MSIs enabled or not.
1409  */
1410 /*ARGSUSED*/
1411 static int
1412 pcieb_msi_supported(dev_info_t *dip)
1413 {
1414 	return ((pcieb_enable_msi && pcieb_plat_msi_supported(dip)) ?
1415 	    DDI_SUCCESS: DDI_FAILURE);
1416 }
1417 
1418 /*ARGSUSED*/
1419 static int
1420 pcieb_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
1421     ddi_iblock_cookie_t *ibc)
1422 {
1423 	pcieb_devstate_t  *pcieb = ddi_get_soft_state(pcieb_state,
1424 	    ddi_get_instance(dip));
1425 
1426 	ASSERT(ibc != NULL);
1427 	*ibc = pcieb->pcieb_fm_ibc;
1428 
1429 	return (DEVI(dip)->devi_fmhdl->fh_cap | DDI_FM_ACCCHK_CAPABLE |
1430 	    DDI_FM_DMACHK_CAPABLE);
1431 }
1432 
1433 static int
1434 pcieb_fm_init(pcieb_devstate_t *pcieb_p)
1435 {
1436 	dev_info_t	*dip = pcieb_p->pcieb_dip;
1437 	int		fm_cap = DDI_FM_EREPORT_CAPABLE;
1438 
1439 	/*
1440 	 * Request our capability level and get our parents capability
1441 	 * and ibc.
1442 	 */
1443 	ddi_fm_init(dip, &fm_cap, &pcieb_p->pcieb_fm_ibc);
1444 
1445 	return (DDI_SUCCESS);
1446 }
1447 
1448 /*
1449  * Breakdown our FMA resources
1450  */
1451 static void
1452 pcieb_fm_fini(pcieb_devstate_t *pcieb_p)
1453 {
1454 	/*
1455 	 * Clean up allocated fm structures
1456 	 */
1457 	ddi_fm_fini(pcieb_p->pcieb_dip);
1458 }
1459 
1460 static int
1461 pcieb_open(dev_t *devp, int flags, int otyp, cred_t *credp)
1462 {
1463 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(*devp));
1464 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1465 	int	rv;
1466 
1467 	if (pcieb == NULL)
1468 		return (ENXIO);
1469 
1470 	mutex_enter(&pcieb->pcieb_mutex);
1471 	rv = pcie_open(pcieb->pcieb_dip, devp, flags, otyp, credp);
1472 	mutex_exit(&pcieb->pcieb_mutex);
1473 
1474 	return (rv);
1475 }
1476 
1477 static int
1478 pcieb_close(dev_t dev, int flags, int otyp, cred_t *credp)
1479 {
1480 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(dev));
1481 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1482 	int	rv;
1483 
1484 	if (pcieb == NULL)
1485 		return (ENXIO);
1486 
1487 	mutex_enter(&pcieb->pcieb_mutex);
1488 	rv = pcie_close(pcieb->pcieb_dip, dev, flags, otyp, credp);
1489 	mutex_exit(&pcieb->pcieb_mutex);
1490 
1491 	return (rv);
1492 }
1493 
1494 static int
1495 pcieb_ioctl_retrain(pcieb_devstate_t *pcieb, cred_t *credp)
1496 {
1497 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1498 
1499 	if (drv_priv(credp) != 0) {
1500 		return (EPERM);
1501 	}
1502 
1503 	if (!PCIE_IS_PCIE(bus_p)) {
1504 		return (ENOTSUP);
1505 	}
1506 
1507 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1508 		return (ENOTSUP);
1509 	}
1510 
1511 	return (pcie_link_retrain(pcieb->pcieb_dip));
1512 }
1513 
1514 static int
1515 pcieb_ioctl_get_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
1516     cred_t *credp)
1517 {
1518 	pcie_bus_t			*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1519 	pcieb_ioctl_target_speed_t	pits;
1520 
1521 	if (drv_priv(credp) != 0) {
1522 		return (EPERM);
1523 	}
1524 
1525 	if (!PCIE_IS_PCIE(bus_p)) {
1526 		return (ENOTSUP);
1527 	}
1528 
1529 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1530 		return (ENOTSUP);
1531 	}
1532 
1533 	pits.pits_flags = 0;
1534 	pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
1535 
1536 	mutex_enter(&bus_p->bus_speed_mutex);
1537 	if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
1538 		pits.pits_flags |= PCIEB_FLAGS_ADMIN_SET;
1539 	}
1540 	switch (bus_p->bus_target_speed) {
1541 	case PCIE_LINK_SPEED_2_5:
1542 		pits.pits_speed = PCIEB_LINK_SPEED_GEN1;
1543 		break;
1544 	case PCIE_LINK_SPEED_5:
1545 		pits.pits_speed = PCIEB_LINK_SPEED_GEN2;
1546 		break;
1547 	case PCIE_LINK_SPEED_8:
1548 		pits.pits_speed = PCIEB_LINK_SPEED_GEN3;
1549 		break;
1550 	case PCIE_LINK_SPEED_16:
1551 		pits.pits_speed = PCIEB_LINK_SPEED_GEN4;
1552 		break;
1553 	case PCIE_LINK_SPEED_32:
1554 		pits.pits_speed = PCIEB_LINK_SPEED_GEN5;
1555 		break;
1556 	case PCIE_LINK_SPEED_64:
1557 		pits.pits_speed = PCIEB_LINK_SPEED_GEN6;
1558 		break;
1559 	default:
1560 		pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
1561 		break;
1562 	}
1563 	mutex_exit(&bus_p->bus_speed_mutex);
1564 
1565 	if (ddi_copyout(&pits, (void *)arg, sizeof (pits),
1566 	    mode & FKIOCTL) != 0) {
1567 		return (EFAULT);
1568 	}
1569 
1570 	return (0);
1571 }
1572 
1573 static int
1574 pcieb_ioctl_set_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
1575     cred_t *credp)
1576 {
1577 	pcie_bus_t			*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1578 	pcieb_ioctl_target_speed_t	pits;
1579 	pcie_link_speed_t		speed;
1580 
1581 	if (drv_priv(credp) != 0) {
1582 		return (EPERM);
1583 	}
1584 
1585 	if (!PCIE_IS_PCIE(bus_p)) {
1586 		return (ENOTSUP);
1587 	}
1588 
1589 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1590 		return (ENOTSUP);
1591 	}
1592 
1593 	if (ddi_copyin((void *)arg, &pits, sizeof (pits),
1594 	    mode & FKIOCTL) != 0) {
1595 		return (EFAULT);
1596 	}
1597 
1598 	if (pits.pits_flags != 0) {
1599 		return (EINVAL);
1600 	}
1601 
1602 	switch (pits.pits_speed) {
1603 	case PCIEB_LINK_SPEED_GEN1:
1604 		speed = PCIE_LINK_SPEED_2_5;
1605 		break;
1606 	case PCIEB_LINK_SPEED_GEN2:
1607 		speed = PCIE_LINK_SPEED_5;
1608 		break;
1609 	case PCIEB_LINK_SPEED_GEN3:
1610 		speed = PCIE_LINK_SPEED_8;
1611 		break;
1612 	case PCIEB_LINK_SPEED_GEN4:
1613 		speed = PCIE_LINK_SPEED_16;
1614 		break;
1615 	case PCIEB_LINK_SPEED_GEN5:
1616 		speed = PCIE_LINK_SPEED_32;
1617 		break;
1618 	case PCIEB_LINK_SPEED_GEN6:
1619 		speed = PCIE_LINK_SPEED_64;
1620 		break;
1621 	default:
1622 		return (EINVAL);
1623 	}
1624 
1625 	return (pcie_link_set_target(pcieb->pcieb_dip, speed));
1626 }
1627 
1628 static int
1629 pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1630     int *rvalp)
1631 {
1632 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(dev));
1633 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1634 	int		rv;
1635 
1636 	if (pcieb == NULL)
1637 		return (ENXIO);
1638 
1639 	/*
1640 	 * Check if this is one of the commands that the bridge driver natively
1641 	 * understands. There are only a handful of such private ioctls defined
1642 	 * in pcieb_ioctl.h. Otherwise, this ioctl should be handled by the
1643 	 * general pcie driver.
1644 	 */
1645 	switch (cmd) {
1646 	case PCIEB_IOCTL_RETRAIN:
1647 		rv = pcieb_ioctl_retrain(pcieb, credp);
1648 		break;
1649 	case PCIEB_IOCTL_GET_TARGET_SPEED:
1650 		rv = pcieb_ioctl_get_speed(pcieb, arg, mode, credp);
1651 		break;
1652 	case PCIEB_IOCTL_SET_TARGET_SPEED:
1653 		rv = pcieb_ioctl_set_speed(pcieb, arg, mode, credp);
1654 		break;
1655 	default:
1656 		/* To handle devctl and hotplug related ioctls */
1657 		rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp,
1658 		    rvalp);
1659 		break;
1660 	}
1661 
1662 	return (rv);
1663 }
1664 
1665 /*
1666  * Common interrupt handler for hotplug, PME and errors.
1667  */
1668 static uint_t
1669 pcieb_intr_handler(caddr_t arg1, caddr_t arg2)
1670 {
1671 	pcieb_devstate_t *pcieb_p = (pcieb_devstate_t *)arg1;
1672 	dev_info_t	*dip = pcieb_p->pcieb_dip;
1673 	ddi_fm_error_t	derr;
1674 	int		sts = 0;
1675 	int		ret = DDI_INTR_UNCLAIMED;
1676 	int		isrc;
1677 
1678 	if (!(pcieb_p->pcieb_init_flags & PCIEB_INIT_ENABLE))
1679 		goto FAIL;
1680 
1681 	mutex_enter(&pcieb_p->pcieb_intr_mutex);
1682 	isrc = pcieb_p->pcieb_isr_tab[(int)(uintptr_t)arg2];
1683 	mutex_exit(&pcieb_p->pcieb_intr_mutex);
1684 
1685 	PCIEB_DEBUG(DBG_INTR, dip, "Received intr number %d\n",
1686 	    (int)(uintptr_t)arg2);
1687 
1688 	if (isrc == PCIEB_INTR_SRC_UNKNOWN)
1689 		goto FAIL;
1690 
1691 	if (isrc & (PCIEB_INTR_SRC_HP | PCIEB_INTR_SRC_LBW))
1692 		ret = pcie_intr(dip);
1693 
1694 	if (isrc & PCIEB_INTR_SRC_PME)
1695 		ret = DDI_INTR_CLAIMED;
1696 
1697 	/* AER Error */
1698 	if (isrc & PCIEB_INTR_SRC_AER) {
1699 		/*
1700 		 * AERs can interrupt on this vector, so if error reporting is
1701 		 * possible we need to scan the fabric to determine whether to
1702 		 * claim it.  This process will also generate ereports if the
1703 		 * bridge has error data for us.  Checking for EREPORT_CAPABLE
1704 		 * is perhaps a bit of a formality here but if it's not set we
1705 		 * aren't going to be able to do much that's useful with the AER
1706 		 * data; we initialise sts to 0 rather than PF_ERR_NO_ERROR so
1707 		 * that we'll claim this interrupt in that case, since we aren't
1708 		 * going to do the scan.  It may be more correct to check the
1709 		 * root port status ourselves in that case, but we aren't
1710 		 * terribly worried about the case where we don't have FMA
1711 		 * capabilities.
1712 		 */
1713 		bzero(&derr, sizeof (ddi_fm_error_t));
1714 		derr.fme_version = DDI_FME_VERSION;
1715 		mutex_enter(&pcieb_p->pcieb_peek_poke_mutex);
1716 		mutex_enter(&pcieb_p->pcieb_err_mutex);
1717 
1718 		pf_eh_enter(PCIE_DIP2BUS(dip));
1719 		PCIE_ROOT_EH_SRC(PCIE_DIP2PFD(dip))->intr_type =
1720 		    PF_INTR_TYPE_AER;
1721 
1722 		if ((DEVI(dip)->devi_fmhdl->fh_cap) & DDI_FM_EREPORT_CAPABLE)
1723 			sts = pf_scan_fabric(dip, &derr, NULL);
1724 		pf_eh_exit(PCIE_DIP2BUS(dip));
1725 
1726 		mutex_exit(&pcieb_p->pcieb_err_mutex);
1727 		mutex_exit(&pcieb_p->pcieb_peek_poke_mutex);
1728 		if ((pcieb_die & sts) != 0) {
1729 			fm_panic("%s-%d: PCI(-X) Express Fatal Error. (0x%x)",
1730 			    ddi_driver_name(dip), ddi_get_instance(dip), sts);
1731 		}
1732 
1733 		if ((sts & ~PF_ERR_NO_ERROR) != 0)
1734 			ret = DDI_INTR_CLAIMED;
1735 	}
1736 FAIL:
1737 	return (ret);
1738 }
1739 
1740 /*
1741  * Some PCI-X to PCI-E bridges do not support full 64-bit addressing on the
1742  * PCI-X side of the bridge.  We build a special version of this driver for
1743  * those bridges, which uses PCIEB_ADDR_LIMIT_LO and/or PCIEB_ADDR_LIMIT_HI
1744  * to define the range of values which the chip can handle.  The code below
1745  * then clamps the DMA address range supplied by the driver, preventing the
1746  * PCI-E nexus driver from allocating any memory the bridge can't deal
1747  * with.
1748  */
1749 static int
1750 pcieb_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
1751     ddi_dma_attr_t *attr_p, int (*waitfp)(caddr_t), caddr_t arg,
1752     ddi_dma_handle_t *handlep)
1753 {
1754 	int		ret;
1755 #ifdef	PCIEB_BCM
1756 	uint64_t	lim;
1757 
1758 	/*
1759 	 * If the leaf device's limits are outside than what the Broadcom
1760 	 * bridge can handle, we need to clip the values passed up the chain.
1761 	 */
1762 	lim = attr_p->dma_attr_addr_lo;
1763 	attr_p->dma_attr_addr_lo = MAX(lim, PCIEB_ADDR_LIMIT_LO);
1764 
1765 	lim = attr_p->dma_attr_addr_hi;
1766 	attr_p->dma_attr_addr_hi = MIN(lim, PCIEB_ADDR_LIMIT_HI);
1767 
1768 #endif	/* PCIEB_BCM */
1769 
1770 	/*
1771 	 * This is a software workaround to fix the Broadcom 5714/5715 PCIe-PCI
1772 	 * bridge prefetch bug. Intercept the DMA alloc handle request and set
1773 	 * PX_DMAI_FLAGS_MAP_BUFZONE flag in the handle. If this flag is set,
1774 	 * the px nexus driver will allocate an extra page & make it valid one,
1775 	 * for any DVMA request that comes from any of the Broadcom bridge child
1776 	 * devices.
1777 	 */
1778 	if ((ret = ddi_dma_allochdl(dip, rdip, attr_p, waitfp, arg,
1779 	    handlep)) == DDI_SUCCESS) {
1780 		ddi_dma_impl_t	*mp = (ddi_dma_impl_t *)*handlep;
1781 #ifdef	PCIEB_BCM
1782 		mp->dmai_inuse |= PX_DMAI_FLAGS_MAP_BUFZONE;
1783 #endif	/* PCIEB_BCM */
1784 		/*
1785 		 * For a given rdip, update mp->dmai_bdf with the bdf value
1786 		 * of pcieb's immediate child or secondary bus-id of the
1787 		 * PCIe2PCI bridge.
1788 		 */
1789 		mp->dmai_minxfer = pcie_get_bdf_for_dma_xfer(dip, rdip);
1790 	}
1791 
1792 	return (ret);
1793 }
1794 
1795 /*
1796  * FDVMA feature is not supported for any child device of Broadcom 5714/5715
1797  * PCIe-PCI bridge due to prefetch bug. Return failure immediately, so that
1798  * these drivers will switch to regular DVMA path.
1799  */
1800 /*ARGSUSED*/
1801 static int
1802 pcieb_dma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
1803     enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1804     uint_t cache_flags)
1805 {
1806 	int	ret;
1807 
1808 #ifdef	PCIEB_BCM
1809 	if (cmd == DDI_DMA_RESERVE)
1810 		return (DDI_FAILURE);
1811 #endif	/* PCIEB_BCM */
1812 
1813 	if (((ret = ddi_dma_mctl(dip, rdip, handle, cmd, offp, lenp, objp,
1814 	    cache_flags)) == DDI_SUCCESS) && (cmd == DDI_DMA_RESERVE)) {
1815 		ddi_dma_impl_t	*mp = (ddi_dma_impl_t *)*objp;
1816 
1817 		/*
1818 		 * For a given rdip, update mp->dmai_bdf with the bdf value
1819 		 * of pcieb's immediate child or secondary bus-id of the
1820 		 * PCIe2PCI bridge.
1821 		 */
1822 		mp->dmai_minxfer = pcie_get_bdf_for_dma_xfer(dip, rdip);
1823 	}
1824 
1825 	return (ret);
1826 }
1827 
1828 static int
1829 pcieb_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1830     ddi_intr_handle_impl_t *hdlp, void *result)
1831 {
1832 	return (pcieb_plat_intr_ops(dip, rdip, intr_op, hdlp, result));
1833 
1834 }
1835 
1836 /*
1837  * Power management related initialization specific to pcieb.
1838  * Called by pcieb_attach()
1839  */
1840 static int
1841 pcieb_pwr_setup(dev_info_t *dip)
1842 {
1843 	char *comp_array[5];
1844 	int i;
1845 	ddi_acc_handle_t conf_hdl;
1846 	uint16_t pmcap, cap_ptr;
1847 	pcie_pwr_t *pwr_p;
1848 
1849 	/* Some platforms/devices may choose to disable PM */
1850 	if (pcieb_plat_pwr_disable(dip)) {
1851 		(void) pcieb_pwr_disable(dip);
1852 		return (DDI_SUCCESS);
1853 	}
1854 
1855 	ASSERT(PCIE_PMINFO(dip));
1856 	pwr_p = PCIE_NEXUS_PMINFO(dip);
1857 	ASSERT(pwr_p);
1858 
1859 	/* Code taken from pci_pci driver */
1860 	if (pci_config_setup(dip, &pwr_p->pwr_conf_hdl) != DDI_SUCCESS) {
1861 		PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_setup: pci_config_setup "
1862 		    "failed\n");
1863 		return (DDI_FAILURE);
1864 	}
1865 	conf_hdl = pwr_p->pwr_conf_hdl;
1866 
1867 	/*
1868 	 * Walk the capabilities searching for a PM entry.
1869 	 */
1870 	if ((PCI_CAP_LOCATE(conf_hdl, PCI_CAP_ID_PM, &cap_ptr)) ==
1871 	    DDI_FAILURE) {
1872 		PCIEB_DEBUG(DBG_PWR, dip, "switch/bridge does not support PM. "
1873 		    " PCI PM data structure not found in config header\n");
1874 		pci_config_teardown(&conf_hdl);
1875 		return (DDI_SUCCESS);
1876 	}
1877 	/*
1878 	 * Save offset to pmcsr for future references.
1879 	 */
1880 	pwr_p->pwr_pmcsr_offset = cap_ptr + PCI_PMCSR;
1881 	pmcap = PCI_CAP_GET16(conf_hdl, 0, cap_ptr, PCI_PMCAP);
1882 	if (pmcap & PCI_PMCAP_D1) {
1883 		PCIEB_DEBUG(DBG_PWR, dip, "D1 state supported\n");
1884 		pwr_p->pwr_pmcaps |= PCIE_SUPPORTS_D1;
1885 	}
1886 	if (pmcap & PCI_PMCAP_D2) {
1887 		PCIEB_DEBUG(DBG_PWR, dip, "D2 state supported\n");
1888 		pwr_p->pwr_pmcaps |= PCIE_SUPPORTS_D2;
1889 	}
1890 
1891 	i = 0;
1892 	comp_array[i++] = "NAME=PCIe switch/bridge PM";
1893 	comp_array[i++] = "0=Power Off (D3)";
1894 	if (pwr_p->pwr_pmcaps & PCIE_SUPPORTS_D2)
1895 		comp_array[i++] = "1=D2";
1896 	if (pwr_p->pwr_pmcaps & PCIE_SUPPORTS_D1)
1897 		comp_array[i++] = "2=D1";
1898 	comp_array[i++] = "3=Full Power D0";
1899 
1900 	/*
1901 	 * Create pm-components property, if it does not exist already.
1902 	 */
1903 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, dip,
1904 	    "pm-components", comp_array, i) != DDI_PROP_SUCCESS) {
1905 		PCIEB_DEBUG(DBG_PWR, dip, "could not create pm-components "
1906 		    " prop\n");
1907 		pci_config_teardown(&conf_hdl);
1908 		return (DDI_FAILURE);
1909 	}
1910 	return (pcieb_pwr_init_and_raise(dip, pwr_p));
1911 }
1912 
1913 /*
1914  * undo whatever is done in pcieb_pwr_setup. called by pcieb_detach()
1915  */
1916 static void
1917 pcieb_pwr_teardown(dev_info_t *dip)
1918 {
1919 	pcie_pwr_t	*pwr_p;
1920 
1921 	if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
1922 		return;
1923 
1924 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "pm-components");
1925 	if (pwr_p->pwr_conf_hdl)
1926 		pci_config_teardown(&pwr_p->pwr_conf_hdl);
1927 }
1928 
1929 /*
1930  * Initializes the power level and raise the power to D0, if it is
1931  * not at D0.
1932  */
1933 static int
1934 pcieb_pwr_init_and_raise(dev_info_t *dip, pcie_pwr_t *pwr_p)
1935 {
1936 	uint16_t pmcsr;
1937 	int ret = DDI_SUCCESS;
1938 
1939 	/*
1940 	 * Intialize our power level from PMCSR. The common code initializes
1941 	 * this to UNKNOWN. There is no guarantee that we will be at full
1942 	 * power at attach. If we are not at D0, raise the power.
1943 	 */
1944 	pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset);
1945 	pmcsr &= PCI_PMCSR_STATE_MASK;
1946 	switch (pmcsr) {
1947 	case PCI_PMCSR_D0:
1948 		pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1949 		break;
1950 
1951 	case PCI_PMCSR_D1:
1952 		pwr_p->pwr_func_lvl = PM_LEVEL_D1;
1953 		break;
1954 
1955 	case PCI_PMCSR_D2:
1956 		pwr_p->pwr_func_lvl = PM_LEVEL_D2;
1957 		break;
1958 
1959 	case PCI_PMCSR_D3HOT:
1960 		pwr_p->pwr_func_lvl = PM_LEVEL_D3;
1961 		break;
1962 
1963 	default:
1964 		break;
1965 	}
1966 
1967 	/* Raise the power to D0. */
1968 	if (pwr_p->pwr_func_lvl != PM_LEVEL_D0 &&
1969 	    ((ret = pm_raise_power(dip, 0, PM_LEVEL_D0)) != DDI_SUCCESS)) {
1970 		/*
1971 		 * Read PMCSR again. If it is at D0, ignore the return
1972 		 * value from pm_raise_power.
1973 		 */
1974 		pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl,
1975 		    pwr_p->pwr_pmcsr_offset);
1976 		if ((pmcsr & PCI_PMCSR_STATE_MASK) == PCI_PMCSR_D0)
1977 			ret = DDI_SUCCESS;
1978 		else {
1979 			PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_setup: could not "
1980 			    "raise power to D0 \n");
1981 		}
1982 	}
1983 	if (ret == DDI_SUCCESS)
1984 		pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1985 	return (ret);
1986 }
1987 
1988 /*
1989  * Disable PM for x86 and PLX 8532 switch.
1990  * For PLX Transitioning one port on this switch to low power causes links
1991  * on other ports on the same station to die. Due to PLX erratum #34, we
1992  * can't allow the downstream device go to non-D0 state.
1993  */
1994 static int
1995 pcieb_pwr_disable(dev_info_t *dip)
1996 {
1997 	pcie_pwr_t *pwr_p;
1998 
1999 	ASSERT(PCIE_PMINFO(dip));
2000 	pwr_p = PCIE_NEXUS_PMINFO(dip);
2001 	ASSERT(pwr_p);
2002 	PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_disable: disabling PM\n");
2003 	pwr_p->pwr_func_lvl = PM_LEVEL_D0;
2004 	pwr_p->pwr_flags = PCIE_NO_CHILD_PM;
2005 	return (DDI_SUCCESS);
2006 }
2007 
2008 #ifdef DEBUG
2009 int pcieb_dbg_intr_print = 0;
2010 void
2011 pcieb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...)
2012 {
2013 	va_list ap;
2014 
2015 	if (!pcieb_dbg_print)
2016 		return;
2017 
2018 	if (dip)
2019 		prom_printf("%s(%d): %s", ddi_driver_name(dip),
2020 		    ddi_get_instance(dip), pcieb_debug_sym[bit]);
2021 
2022 	va_start(ap, fmt);
2023 	if (servicing_interrupt()) {
2024 		if (pcieb_dbg_intr_print)
2025 			prom_vprintf(fmt, ap);
2026 	} else {
2027 		prom_vprintf(fmt, ap);
2028 	}
2029 
2030 	va_end(ap);
2031 }
2032 #endif
2033 
2034 static void
2035 pcieb_id_props(pcieb_devstate_t *pcieb)
2036 {
2037 	uint64_t serialid = 0;	/* 40b field of EUI-64 serial no. register */
2038 	uint16_t cap_ptr;
2039 	uint8_t fic = 0;	/* 1 = first in chassis device */
2040 	pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
2041 	ddi_acc_handle_t config_handle = bus_p->bus_cfg_hdl;
2042 
2043 	/*
2044 	 * Identify first in chassis.  In the special case of a Sun branded
2045 	 * PLX device, it obviously is first in chassis.  Otherwise, in the
2046 	 * general case, look for an Expansion Slot Register and check its
2047 	 * first-in-chassis bit.
2048 	 */
2049 #ifdef	PX_PLX
2050 	uint16_t vendor_id = bus_p->bus_dev_ven_id & 0xFFFF;
2051 	uint16_t device_id = bus_p->bus_dev_ven_id >> 16;
2052 	if ((vendor_id == PXB_VENDOR_SUN) &&
2053 	    ((device_id == PXB_DEVICE_PLX_PCIX) ||
2054 	    (device_id == PXB_DEVICE_PLX_PCIE))) {
2055 		fic = 1;
2056 	}
2057 #endif	/* PX_PLX */
2058 	if ((fic == 0) && ((PCI_CAP_LOCATE(config_handle,
2059 	    PCI_CAP_ID_SLOT_ID, &cap_ptr)) != DDI_FAILURE)) {
2060 		uint8_t esr = PCI_CAP_GET8(config_handle, 0,
2061 		    cap_ptr, PCI_CAP_ID_REGS_OFF);
2062 		if (PCI_CAPSLOT_FIC(esr))
2063 			fic = 1;
2064 	}
2065 
2066 	if ((PCI_CAP_LOCATE(config_handle,
2067 	    PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_SER), &cap_ptr)) != DDI_FAILURE) {
2068 		/* Serialid can be 0 thru a full 40b number */
2069 		serialid = PCI_XCAP_GET32(config_handle, 0,
2070 		    cap_ptr, PCIE_SER_SID_UPPER_DW);
2071 		serialid <<= 32;
2072 		serialid |= PCI_XCAP_GET32(config_handle, 0,
2073 		    cap_ptr, PCIE_SER_SID_LOWER_DW);
2074 	}
2075 
2076 	if (fic)
2077 		(void) ndi_prop_create_boolean(DDI_DEV_T_NONE, pcieb->pcieb_dip,
2078 		    "first-in-chassis");
2079 	if (serialid)
2080 		(void) ddi_prop_update_int64(DDI_DEV_T_NONE, pcieb->pcieb_dip,
2081 		    "serialid#", serialid);
2082 }
2083 
2084 static void
2085 pcieb_create_ranges_prop(dev_info_t *dip,
2086     ddi_acc_handle_t config_handle)
2087 {
2088 	uint32_t base, limit;
2089 	ppb_ranges_t	ranges[PCIEB_RANGE_LEN];
2090 	uint8_t io_base_lo, io_limit_lo;
2091 	uint16_t io_base_hi, io_limit_hi, mem_base, mem_limit;
2092 	int i = 0, rangelen = sizeof (ppb_ranges_t)/sizeof (int);
2093 
2094 	io_base_lo = pci_config_get8(config_handle, PCI_BCNF_IO_BASE_LOW);
2095 	io_limit_lo = pci_config_get8(config_handle, PCI_BCNF_IO_LIMIT_LOW);
2096 	io_base_hi = pci_config_get16(config_handle, PCI_BCNF_IO_BASE_HI);
2097 	io_limit_hi = pci_config_get16(config_handle, PCI_BCNF_IO_LIMIT_HI);
2098 	mem_base = pci_config_get16(config_handle, PCI_BCNF_MEM_BASE);
2099 	mem_limit = pci_config_get16(config_handle, PCI_BCNF_MEM_LIMIT);
2100 
2101 	/*
2102 	 * Create ranges for IO space
2103 	 */
2104 	ranges[i].size_low = ranges[i].size_high = 0;
2105 	ranges[i].parent_mid = ranges[i].child_mid = ranges[i].parent_high = 0;
2106 	ranges[i].child_high = ranges[i].parent_high |=
2107 	    (PCI_REG_REL_M | PCI_ADDR_IO);
2108 	base = PCIEB_16bit_IOADDR(io_base_lo);
2109 	limit = PCIEB_16bit_IOADDR(io_limit_lo);
2110 
2111 	if ((io_base_lo & 0xf) == PCIEB_32BIT_IO) {
2112 		base = PCIEB_LADDR(base, io_base_hi);
2113 	}
2114 	if ((io_limit_lo & 0xf) == PCIEB_32BIT_IO) {
2115 		limit = PCIEB_LADDR(limit, io_limit_hi);
2116 	}
2117 
2118 	if ((io_base_lo & PCIEB_32BIT_IO) && (io_limit_hi > 0)) {
2119 		base = PCIEB_LADDR(base, io_base_hi);
2120 		limit = PCIEB_LADDR(limit, io_limit_hi);
2121 	}
2122 
2123 	/*
2124 	 * Create ranges for 32bit memory space
2125 	 */
2126 	base = PCIEB_32bit_MEMADDR(mem_base);
2127 	limit = PCIEB_32bit_MEMADDR(mem_limit);
2128 	ranges[i].size_low = ranges[i].size_high = 0;
2129 	ranges[i].parent_mid = ranges[i].child_mid = ranges[i].parent_high = 0;
2130 	ranges[i].child_high = ranges[i].parent_high |=
2131 	    (PCI_REG_REL_M | PCI_ADDR_MEM32);
2132 	ranges[i].child_low = ranges[i].parent_low = base;
2133 	if (limit >= base) {
2134 		ranges[i].size_low = limit - base + PCIEB_MEMGRAIN;
2135 		i++;
2136 	}
2137 
2138 	if (i) {
2139 		(void) ndi_prop_update_int_array(DDI_DEV_T_NONE, dip, "ranges",
2140 		    (int *)ranges, i * rangelen);
2141 	}
2142 }
2143 
2144 /*
2145  * For PCI and PCI-X devices including PCIe2PCI bridge, initialize
2146  * cache-line-size and latency timer configuration registers.
2147  */
2148 void
2149 pcieb_set_pci_perf_parameters(dev_info_t *dip, ddi_acc_handle_t cfg_hdl)
2150 {
2151 	uint_t	n;
2152 
2153 	/* Initialize cache-line-size configuration register if needed */
2154 	if (ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2155 	    "cache-line-size", 0) == 0) {
2156 		pci_config_put8(cfg_hdl, PCI_CONF_CACHE_LINESZ,
2157 		    PCIEB_CACHE_LINE_SIZE);
2158 		n = pci_config_get8(cfg_hdl, PCI_CONF_CACHE_LINESZ);
2159 		if (n != 0) {
2160 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
2161 			    "cache-line-size", n);
2162 		}
2163 	}
2164 
2165 	/* Initialize latency timer configuration registers if needed */
2166 	if (ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2167 	    "latency-timer", 0) == 0) {
2168 		uchar_t	min_gnt, latency_timer;
2169 		uchar_t header_type;
2170 
2171 		/* Determine the configuration header type */
2172 		header_type = pci_config_get8(cfg_hdl, PCI_CONF_HEADER);
2173 
2174 		if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) {
2175 			latency_timer = PCIEB_LATENCY_TIMER;
2176 			pci_config_put8(cfg_hdl, PCI_BCNF_LATENCY_TIMER,
2177 			    latency_timer);
2178 		} else {
2179 			min_gnt = pci_config_get8(cfg_hdl, PCI_CONF_MIN_G);
2180 			latency_timer = min_gnt * 8;
2181 		}
2182 
2183 		pci_config_put8(cfg_hdl, PCI_CONF_LATENCY_TIMER,
2184 		    latency_timer);
2185 		n = pci_config_get8(cfg_hdl, PCI_CONF_LATENCY_TIMER);
2186 		if (n != 0) {
2187 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
2188 			    "latency-timer", n);
2189 		}
2190 	}
2191 }
2192