xref: /titanic_41/usr/src/uts/i86pc/io/pciex/npe.c (revision 1babaf948dd28d81d79cf3ec089d6edc111ed4a8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
29  */
30 
31 /*
32  *	Host to PCI-Express local bus driver
33  */
34 
35 #include <sys/conf.h>
36 #include <sys/modctl.h>
37 #include <sys/file.h>
38 #include <sys/pci_impl.h>
39 #include <sys/pcie_impl.h>
40 #include <sys/sysmacros.h>
41 #include <sys/ddi_intr.h>
42 #include <sys/sunndi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ddifm.h>
45 #include <sys/ndifm.h>
46 #include <sys/fm/util.h>
47 #include <sys/hotplug/pci/pcie_hp.h>
48 #include <io/pci/pci_tools_ext.h>
49 #include <io/pci/pci_common.h>
50 #include <io/pciex/pcie_nvidia.h>
51 
52 /*
53  * Helper Macros
54  */
55 #define	NPE_IS_HANDLE_FOR_STDCFG_ACC(hp) \
56 	((hp) != NULL &&						\
57 	((ddi_acc_hdl_t *)(hp))->ah_platform_private != NULL &&		\
58 	(((ddi_acc_impl_t *)((ddi_acc_hdl_t *)(hp))->			\
59 	ah_platform_private)->						\
60 	    ahi_acc_attr &(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_CONFIG_SPACE)) \
61 		== DDI_ACCATTR_CONFIG_SPACE)
62 
63 /*
64  * Bus Operation functions
65  */
66 static int	npe_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *,
67 		    off_t, off_t, caddr_t *);
68 static int	npe_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t,
69 		    void *, void *);
70 static int	npe_intr_ops(dev_info_t *, dev_info_t *, ddi_intr_op_t,
71 		    ddi_intr_handle_impl_t *, void *);
72 static int	npe_fm_init(dev_info_t *, dev_info_t *, int,
73 		    ddi_iblock_cookie_t *);
74 
75 static int	npe_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *);
76 
77 /*
78  * Disable URs and Received MA for all PCIe devices.  Until x86 SW is changed so
79  * that random drivers do not do PIO accesses on devices that it does not own,
80  * these error bits must be disabled.  SERR must also be disabled if URs have
81  * been masked.
82  */
83 uint32_t	npe_aer_uce_mask = PCIE_AER_UCE_UR;
84 uint32_t	npe_aer_ce_mask = 0;
85 uint32_t	npe_aer_suce_mask = PCIE_AER_SUCE_RCVD_MA;
86 
87 struct bus_ops npe_bus_ops = {
88 	BUSO_REV,
89 	npe_bus_map,
90 	NULL,
91 	NULL,
92 	NULL,
93 	i_ddi_map_fault,
94 	NULL,
95 	ddi_dma_allochdl,
96 	ddi_dma_freehdl,
97 	ddi_dma_bindhdl,
98 	ddi_dma_unbindhdl,
99 	ddi_dma_flush,
100 	ddi_dma_win,
101 	ddi_dma_mctl,
102 	npe_ctlops,
103 	ddi_bus_prop_op,
104 	0,			/* (*bus_get_eventcookie)();	*/
105 	0,			/* (*bus_add_eventcall)();	*/
106 	0,			/* (*bus_remove_eventcall)();	*/
107 	0,			/* (*bus_post_event)();		*/
108 	0,			/* (*bus_intr_ctl)(); */
109 	0,			/* (*bus_config)(); */
110 	0,			/* (*bus_unconfig)(); */
111 	npe_fm_init,		/* (*bus_fm_init)(); */
112 	NULL,			/* (*bus_fm_fini)(); */
113 	NULL,			/* (*bus_fm_access_enter)(); */
114 	NULL,			/* (*bus_fm_access_exit)(); */
115 	NULL,			/* (*bus_power)(); */
116 	npe_intr_ops,		/* (*bus_intr_op)(); */
117 	pcie_hp_common_ops	/* (*bus_hp_op)(); */
118 };
119 
120 static int	npe_open(dev_t *, int, int, cred_t *);
121 static int	npe_close(dev_t, int, int, cred_t *);
122 static int	npe_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
123 
124 struct cb_ops npe_cb_ops = {
125 	npe_open,			/* open */
126 	npe_close,			/* close */
127 	nodev,				/* strategy */
128 	nodev,				/* print */
129 	nodev,				/* dump */
130 	nodev,				/* read */
131 	nodev,				/* write */
132 	npe_ioctl,			/* ioctl */
133 	nodev,				/* devmap */
134 	nodev,				/* mmap */
135 	nodev,				/* segmap */
136 	nochpoll,			/* poll */
137 	pcie_prop_op,			/* cb_prop_op */
138 	NULL,				/* streamtab */
139 	D_NEW | D_MP | D_HOTPLUG,	/* Driver compatibility flag */
140 	CB_REV,				/* rev */
141 	nodev,				/* int (*cb_aread)() */
142 	nodev				/* int (*cb_awrite)() */
143 };
144 
145 
146 /*
147  * Device Node Operation functions
148  */
149 static int	npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
150 static int	npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
151 static int	npe_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
152 
153 struct dev_ops npe_ops = {
154 	DEVO_REV,		/* devo_rev */
155 	0,			/* refcnt  */
156 	npe_info,		/* info */
157 	nulldev,		/* identify */
158 	nulldev,		/* probe */
159 	npe_attach,		/* attach */
160 	npe_detach,		/* detach */
161 	nulldev,		/* reset */
162 	&npe_cb_ops,		/* driver operations */
163 	&npe_bus_ops,		/* bus operations */
164 	NULL,			/* power */
165 	ddi_quiesce_not_needed,		/* quiesce */
166 };
167 
168 /*
169  * Internal routines in support of particular npe_ctlops.
170  */
171 static int npe_removechild(dev_info_t *child);
172 static int npe_initchild(dev_info_t *child);
173 
174 /*
175  * External support routine
176  */
177 extern void	npe_query_acpi_mcfg(dev_info_t *dip);
178 extern void	npe_ck804_fix_aer_ptr(ddi_acc_handle_t cfg_hdl);
179 extern int	npe_disable_empty_bridges_workaround(dev_info_t *child);
180 extern void	npe_nvidia_error_workaround(ddi_acc_handle_t cfg_hdl);
181 extern void	npe_intel_error_workaround(ddi_acc_handle_t cfg_hdl);
182 extern boolean_t npe_is_mmcfg_supported(dev_info_t *dip);
183 extern void	npe_enable_htmsi_children(dev_info_t *dip);
184 extern int	npe_save_htconfig_children(dev_info_t *dip);
185 extern int	npe_restore_htconfig_children(dev_info_t *dip);
186 
187 /*
188  * Module linkage information for the kernel.
189  */
190 static struct modldrv modldrv = {
191 	&mod_driverops,				/* Type of module */
192 	"Host to PCIe nexus driver",		/* Name of module */
193 	&npe_ops,				/* driver ops */
194 };
195 
196 static struct modlinkage modlinkage = {
197 	MODREV_1,
198 	(void *)&modldrv,
199 	NULL
200 };
201 
202 /* Save minimal state. */
203 void *npe_statep;
204 
205 int
206 _init(void)
207 {
208 	int e;
209 
210 	/*
211 	 * Initialize per-pci bus soft state pointer.
212 	 */
213 	e = ddi_soft_state_init(&npe_statep, sizeof (pci_state_t), 1);
214 	if (e != 0)
215 		return (e);
216 
217 	if ((e = mod_install(&modlinkage)) != 0)
218 		ddi_soft_state_fini(&npe_statep);
219 
220 	return (e);
221 }
222 
223 
224 int
225 _fini(void)
226 {
227 	int rc;
228 
229 	rc = mod_remove(&modlinkage);
230 	if (rc != 0)
231 		return (rc);
232 
233 	ddi_soft_state_fini(&npe_statep);
234 	return (rc);
235 }
236 
237 
238 int
239 _info(struct modinfo *modinfop)
240 {
241 	return (mod_info(&modlinkage, modinfop));
242 }
243 
244 /*ARGSUSED*/
245 static int
246 npe_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
247 {
248 	minor_t		minor = getminor((dev_t)arg);
249 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
250 	pci_state_t	*pcip = ddi_get_soft_state(npe_statep, instance);
251 	int		ret = DDI_SUCCESS;
252 
253 	switch (cmd) {
254 	case DDI_INFO_DEVT2INSTANCE:
255 		*result = (void *)(intptr_t)instance;
256 		break;
257 	case DDI_INFO_DEVT2DEVINFO:
258 		if (pcip == NULL) {
259 			ret = DDI_FAILURE;
260 			break;
261 		}
262 
263 		*result = (void *)pcip->pci_dip;
264 		break;
265 	default:
266 		ret = DDI_FAILURE;
267 		break;
268 	}
269 
270 	return (ret);
271 }
272 
273 /*ARGSUSED*/
274 static int
275 npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
276 {
277 	int		instance = ddi_get_instance(devi);
278 	pci_state_t	*pcip = NULL;
279 
280 	if (cmd == DDI_RESUME) {
281 		/*
282 		 * the system might still be able to resume even if this fails
283 		 */
284 		(void) npe_restore_htconfig_children(devi);
285 		return (DDI_SUCCESS);
286 	}
287 
288 	/*
289 	 * We must do this here in order to ensure that all top level devices
290 	 * get their HyperTransport MSI mapping regs programmed first.
291 	 * "Memory controller" and "hostbridge" class devices are leaf devices
292 	 * that may affect MSI translation functionality for devices
293 	 * connected to the same link/bus.
294 	 *
295 	 * This will also program HT MSI mapping registers on root buses
296 	 * devices (basically sitting on an HT bus) that are not dependent
297 	 * on the aforementioned HT devices for MSI translation.
298 	 */
299 	npe_enable_htmsi_children(devi);
300 
301 	if (ddi_prop_update_string(DDI_DEV_T_NONE, devi, "device_type",
302 	    "pciex") != DDI_PROP_SUCCESS) {
303 		cmn_err(CE_WARN, "npe:  'device_type' prop create failed");
304 	}
305 
306 	if (ddi_soft_state_zalloc(npe_statep, instance) == DDI_SUCCESS)
307 		pcip = ddi_get_soft_state(npe_statep, instance);
308 
309 	if (pcip == NULL)
310 		return (DDI_FAILURE);
311 
312 	pcip->pci_dip = devi;
313 	pcip->pci_soft_state = PCI_SOFT_STATE_CLOSED;
314 
315 	if (pcie_init(devi, NULL) != DDI_SUCCESS)
316 		goto fail1;
317 
318 	/* Second arg: initialize for pci_express root nexus */
319 	if (pcitool_init(devi, B_TRUE) != DDI_SUCCESS)
320 		goto fail2;
321 
322 	pcip->pci_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
323 	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
324 	ddi_fm_init(devi, &pcip->pci_fmcap, &pcip->pci_fm_ibc);
325 
326 	if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) {
327 		ddi_fm_handler_register(devi, npe_fm_callback, NULL);
328 	}
329 
330 	PCIE_DIP2PFD(devi) = kmem_zalloc(sizeof (pf_data_t), KM_SLEEP);
331 	pcie_rc_init_pfd(devi, PCIE_DIP2PFD(devi));
332 
333 	npe_query_acpi_mcfg(devi);
334 	ddi_report_dev(devi);
335 	pcie_fab_init_bus(devi, PCIE_BUS_FINAL);
336 
337 	return (DDI_SUCCESS);
338 
339 fail2:
340 	(void) pcie_uninit(devi);
341 fail1:
342 	pcie_rc_fini_bus(devi);
343 	ddi_soft_state_free(npe_statep, instance);
344 
345 	return (DDI_FAILURE);
346 }
347 
348 /*ARGSUSED*/
349 static int
350 npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
351 {
352 	int instance = ddi_get_instance(devi);
353 	pci_state_t *pcip;
354 
355 	pcip = ddi_get_soft_state(npe_statep, ddi_get_instance(devi));
356 
357 	switch (cmd) {
358 	case DDI_DETACH:
359 		pcie_fab_fini_bus(devi, PCIE_BUS_INITIAL);
360 
361 		/* Uninitialize pcitool support. */
362 		pcitool_uninit(devi);
363 
364 		if (pcie_uninit(devi) != DDI_SUCCESS)
365 			return (DDI_FAILURE);
366 
367 		if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE)
368 			ddi_fm_handler_unregister(devi);
369 
370 		pcie_rc_fini_pfd(PCIE_DIP2PFD(devi));
371 		kmem_free(PCIE_DIP2PFD(devi), sizeof (pf_data_t));
372 
373 		ddi_fm_fini(devi);
374 		ddi_soft_state_free(npe_statep, instance);
375 		return (DDI_SUCCESS);
376 
377 	case DDI_SUSPEND:
378 		/*
379 		 * the system might still be able to suspend/resume even if
380 		 * this fails
381 		 */
382 		(void) npe_save_htconfig_children(devi);
383 		return (DDI_SUCCESS);
384 	default:
385 		return (DDI_FAILURE);
386 	}
387 }
388 
389 /*
390  * Configure the access handle for standard configuration space
391  * access (see pci_fm_acc_setup for code that initializes the
392  * access-function pointers).
393  */
394 static int
395 npe_setup_std_pcicfg_acc(dev_info_t *rdip, ddi_map_req_t *mp,
396     ddi_acc_hdl_t *hp, off_t offset, off_t len)
397 {
398 	int ret;
399 
400 	if ((ret = pci_fm_acc_setup(hp, offset, len)) ==
401 	    DDI_SUCCESS) {
402 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
403 		    mp->map_handlep->ah_acc.devacc_attr_access
404 		    != DDI_DEFAULT_ACC) {
405 			ndi_fmc_insert(rdip, ACC_HANDLE,
406 			    (void *)mp->map_handlep, NULL);
407 		}
408 	}
409 	return (ret);
410 }
411 
412 static int
413 npe_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
414     off_t offset, off_t len, caddr_t *vaddrp)
415 {
416 	int 		rnumber;
417 	int		length;
418 	int		space;
419 	ddi_acc_impl_t	*ap;
420 	ddi_acc_hdl_t	*hp;
421 	ddi_map_req_t	mr;
422 	pci_regspec_t	pci_reg;
423 	pci_regspec_t	*pci_rp;
424 	struct regspec	reg;
425 	pci_acc_cfblk_t	*cfp;
426 	int		retval;
427 	int64_t		*ecfginfo;
428 	uint_t		nelem;
429 
430 	mr = *mp; /* Get private copy of request */
431 	mp = &mr;
432 
433 	/*
434 	 * check for register number
435 	 */
436 	switch (mp->map_type) {
437 	case DDI_MT_REGSPEC:
438 		pci_reg = *(pci_regspec_t *)(mp->map_obj.rp);
439 		pci_rp = &pci_reg;
440 		if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS)
441 			return (DDI_FAILURE);
442 		break;
443 	case DDI_MT_RNUMBER:
444 		rnumber = mp->map_obj.rnumber;
445 		/*
446 		 * get ALL "reg" properties for dip, select the one of
447 		 * of interest. In x86, "assigned-addresses" property
448 		 * is identical to the "reg" property, so there is no
449 		 * need to cross check the two to determine the physical
450 		 * address of the registers.
451 		 * This routine still performs some validity checks to
452 		 * make sure that everything is okay.
453 		 */
454 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip,
455 		    DDI_PROP_DONTPASS, "reg", (int **)&pci_rp,
456 		    (uint_t *)&length) != DDI_PROP_SUCCESS)
457 			return (DDI_FAILURE);
458 
459 		/*
460 		 * validate the register number.
461 		 */
462 		length /= (sizeof (pci_regspec_t) / sizeof (int));
463 		if (rnumber >= length) {
464 			ddi_prop_free(pci_rp);
465 			return (DDI_FAILURE);
466 		}
467 
468 		/*
469 		 * copy the required entry.
470 		 */
471 		pci_reg = pci_rp[rnumber];
472 
473 		/*
474 		 * free the memory allocated by ddi_prop_lookup_int_array
475 		 */
476 		ddi_prop_free(pci_rp);
477 
478 		pci_rp = &pci_reg;
479 		if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS)
480 			return (DDI_FAILURE);
481 		mp->map_type = DDI_MT_REGSPEC;
482 		break;
483 	default:
484 		return (DDI_ME_INVAL);
485 	}
486 
487 	space = pci_rp->pci_phys_hi & PCI_REG_ADDR_M;
488 
489 	/*
490 	 * check for unmap and unlock of address space
491 	 */
492 	if ((mp->map_op == DDI_MO_UNMAP) || (mp->map_op == DDI_MO_UNLOCK)) {
493 		switch (space) {
494 		case PCI_ADDR_IO:
495 			reg.regspec_bustype = 1;
496 			break;
497 
498 		case PCI_ADDR_CONFIG:
499 			/*
500 			 * If this is an unmap/unlock of a standard config
501 			 * space mapping (memory-mapped config space mappings
502 			 * would have the DDI_ACCATTR_CPU_VADDR bit set in the
503 			 * acc_attr), undo that setup here.
504 			 */
505 			if (NPE_IS_HANDLE_FOR_STDCFG_ACC(mp->map_handlep)) {
506 
507 				if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
508 				    mp->map_handlep->ah_acc.devacc_attr_access
509 				    != DDI_DEFAULT_ACC) {
510 					ndi_fmc_remove(rdip, ACC_HANDLE,
511 					    (void *)mp->map_handlep);
512 				}
513 				return (DDI_SUCCESS);
514 			}
515 
516 			pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE;
517 
518 			/* FALLTHROUGH */
519 		case PCI_ADDR_MEM64:
520 			/*
521 			 * MEM64 requires special treatment on map, to check
522 			 * that the device is below 4G.  On unmap, however,
523 			 * we can assume that everything is OK... the map
524 			 * must have succeeded.
525 			 */
526 			/* FALLTHROUGH */
527 		case PCI_ADDR_MEM32:
528 			reg.regspec_bustype = 0;
529 			break;
530 
531 		default:
532 			return (DDI_FAILURE);
533 		}
534 
535 		/*
536 		 * Adjust offset and length
537 		 * A non-zero length means override the one in the regspec.
538 		 */
539 		pci_rp->pci_phys_low += (uint_t)offset;
540 		if (len != 0)
541 			pci_rp->pci_size_low = len;
542 
543 		reg.regspec_addr = pci_rp->pci_phys_low;
544 		reg.regspec_size = pci_rp->pci_size_low;
545 
546 		mp->map_obj.rp = &reg;
547 		retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp);
548 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
549 		    mp->map_handlep->ah_acc.devacc_attr_access !=
550 		    DDI_DEFAULT_ACC) {
551 			ndi_fmc_remove(rdip, ACC_HANDLE,
552 			    (void *)mp->map_handlep);
553 		}
554 		return (retval);
555 
556 	}
557 
558 	/* check for user mapping request - not legal for Config */
559 	if (mp->map_op == DDI_MO_MAP_HANDLE && space == PCI_ADDR_CONFIG) {
560 		cmn_err(CE_NOTE, "npe: Config mapping request from user\n");
561 		return (DDI_FAILURE);
562 	}
563 
564 
565 	/*
566 	 * Note that pci_fm_acc_setup() is called to serve two purposes
567 	 * i) enable legacy PCI I/O style config space access
568 	 * ii) register with FMA
569 	 */
570 	if (space == PCI_ADDR_CONFIG) {
571 
572 		/* Can't map config space without a handle */
573 		hp = (ddi_acc_hdl_t *)mp->map_handlep;
574 		if (hp == NULL)
575 			return (DDI_FAILURE);
576 
577 		/* record the device address for future reference */
578 		cfp = (pci_acc_cfblk_t *)&hp->ah_bus_private;
579 		cfp->c_busnum = PCI_REG_BUS_G(pci_rp->pci_phys_hi);
580 		cfp->c_devnum = PCI_REG_DEV_G(pci_rp->pci_phys_hi);
581 		cfp->c_funcnum = PCI_REG_FUNC_G(pci_rp->pci_phys_hi);
582 
583 		*vaddrp = (caddr_t)offset;
584 
585 		/* Check if MMCFG is supported */
586 		if (!npe_is_mmcfg_supported(rdip)) {
587 			return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
588 			    offset, len));
589 		}
590 
591 
592 		if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, rdip, 0,
593 		    "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) {
594 
595 			if (nelem != 4 ||
596 			    cfp->c_busnum < ecfginfo[2] ||
597 			    cfp->c_busnum > ecfginfo[3]) {
598 				/*
599 				 * Invalid property or Doesn't contain the
600 				 * requested bus; fall back to standard
601 				 * (I/O-based) config access.
602 				 */
603 				ddi_prop_free(ecfginfo);
604 				return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
605 				    offset, len));
606 			} else {
607 				pci_rp->pci_phys_low = ecfginfo[0];
608 
609 				ddi_prop_free(ecfginfo);
610 
611 				pci_rp->pci_phys_low += ((cfp->c_busnum << 20) |
612 				    (cfp->c_devnum) << 15 |
613 				    (cfp->c_funcnum << 12));
614 
615 				pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE;
616 			}
617 		} else {
618 			/*
619 			 * Couldn't find the MMCFG property -- fall back to
620 			 * standard config access
621 			 */
622 			return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
623 			    offset, len));
624 		}
625 	}
626 
627 	length = pci_rp->pci_size_low;
628 
629 	/*
630 	 * range check
631 	 */
632 	if ((offset >= length) || (len > length) || (offset + len > length))
633 		return (DDI_FAILURE);
634 
635 	/*
636 	 * Adjust offset and length
637 	 * A non-zero length means override the one in the regspec.
638 	 */
639 	pci_rp->pci_phys_low += (uint_t)offset;
640 	if (len != 0)
641 		pci_rp->pci_size_low = len;
642 
643 	/*
644 	 * convert the pci regsec into the generic regspec used by the
645 	 * parent root nexus driver.
646 	 */
647 	switch (space) {
648 	case PCI_ADDR_IO:
649 		reg.regspec_bustype = 1;
650 		break;
651 	case PCI_ADDR_CONFIG:
652 	case PCI_ADDR_MEM64:
653 		/*
654 		 * We can't handle 64-bit devices that are mapped above
655 		 * 4G or that are larger than 4G.
656 		 */
657 		if (pci_rp->pci_phys_mid != 0 || pci_rp->pci_size_hi != 0)
658 			return (DDI_FAILURE);
659 		/*
660 		 * Other than that, we can treat them as 32-bit mappings
661 		 */
662 		/* FALLTHROUGH */
663 	case PCI_ADDR_MEM32:
664 		reg.regspec_bustype = 0;
665 		break;
666 	default:
667 		return (DDI_FAILURE);
668 	}
669 
670 	reg.regspec_addr = pci_rp->pci_phys_low;
671 	reg.regspec_size = pci_rp->pci_size_low;
672 
673 	mp->map_obj.rp = &reg;
674 	retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp);
675 	if (retval == DDI_SUCCESS) {
676 		/*
677 		 * For config space gets force use of cautious access routines.
678 		 * These will handle default and protected mode accesses too.
679 		 */
680 		if (space == PCI_ADDR_CONFIG) {
681 			ap = (ddi_acc_impl_t *)mp->map_handlep;
682 			ap->ahi_acc_attr &= ~DDI_ACCATTR_DIRECT;
683 			ap->ahi_acc_attr |= DDI_ACCATTR_CONFIG_SPACE;
684 			ap->ahi_get8 = i_ddi_caut_get8;
685 			ap->ahi_get16 = i_ddi_caut_get16;
686 			ap->ahi_get32 = i_ddi_caut_get32;
687 			ap->ahi_get64 = i_ddi_caut_get64;
688 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
689 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
690 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
691 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
692 		}
693 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
694 		    mp->map_handlep->ah_acc.devacc_attr_access !=
695 		    DDI_DEFAULT_ACC) {
696 			ndi_fmc_insert(rdip, ACC_HANDLE,
697 			    (void *)mp->map_handlep, NULL);
698 		}
699 	}
700 	return (retval);
701 }
702 
703 
704 
705 /*ARGSUSED*/
706 static int
707 npe_ctlops(dev_info_t *dip, dev_info_t *rdip,
708 	ddi_ctl_enum_t ctlop, void *arg, void *result)
709 {
710 	int		rn;
711 	int		totreg;
712 	uint_t		reglen;
713 	pci_regspec_t	*drv_regp;
714 	struct attachspec *asp;
715 	struct detachspec *dsp;
716 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep,
717 	    ddi_get_instance(dip));
718 
719 	switch (ctlop) {
720 	case DDI_CTLOPS_REPORTDEV:
721 		if (rdip == (dev_info_t *)0)
722 			return (DDI_FAILURE);
723 		cmn_err(CE_CONT, "?PCI Express-device: %s@%s, %s%d\n",
724 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
725 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
726 		return (DDI_SUCCESS);
727 
728 	case DDI_CTLOPS_INITCHILD:
729 		return (npe_initchild((dev_info_t *)arg));
730 
731 	case DDI_CTLOPS_UNINITCHILD:
732 		return (npe_removechild((dev_info_t *)arg));
733 
734 	case DDI_CTLOPS_SIDDEV:
735 		return (DDI_SUCCESS);
736 
737 	case DDI_CTLOPS_REGSIZE:
738 	case DDI_CTLOPS_NREGS:
739 		if (rdip == (dev_info_t *)0)
740 			return (DDI_FAILURE);
741 
742 		*(int *)result = 0;
743 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip,
744 		    DDI_PROP_DONTPASS, "reg", (int **)&drv_regp,
745 		    &reglen) != DDI_PROP_SUCCESS) {
746 			return (DDI_FAILURE);
747 		}
748 
749 		totreg = (reglen * sizeof (int)) / sizeof (pci_regspec_t);
750 		if (ctlop == DDI_CTLOPS_NREGS)
751 			*(int *)result = totreg;
752 		else if (ctlop == DDI_CTLOPS_REGSIZE) {
753 			rn = *(int *)arg;
754 			if (rn >= totreg) {
755 				ddi_prop_free(drv_regp);
756 				return (DDI_FAILURE);
757 			}
758 			*(off_t *)result = drv_regp[rn].pci_size_low;
759 		}
760 		ddi_prop_free(drv_regp);
761 
762 		return (DDI_SUCCESS);
763 
764 	case DDI_CTLOPS_POWER:
765 	{
766 		power_req_t	*reqp = (power_req_t *)arg;
767 		/*
768 		 * We currently understand reporting of PCI_PM_IDLESPEED
769 		 * capability. Everything else is passed up.
770 		 */
771 		if ((reqp->request_type == PMR_REPORT_PMCAP) &&
772 		    (reqp->req.report_pmcap_req.cap ==  PCI_PM_IDLESPEED))
773 			return (DDI_SUCCESS);
774 
775 		break;
776 	}
777 
778 	case DDI_CTLOPS_PEEK:
779 	case DDI_CTLOPS_POKE:
780 		return (pci_common_peekpoke(dip, rdip, ctlop, arg, result));
781 
782 	/* X86 systems support PME wakeup from suspended state */
783 	case DDI_CTLOPS_ATTACH:
784 		if (!pcie_is_child(dip, rdip))
785 			return (DDI_SUCCESS);
786 
787 		asp = (struct attachspec *)arg;
788 		if ((asp->when == DDI_POST) && (asp->result == DDI_SUCCESS)) {
789 			pf_init(rdip, (void *)pci_p->pci_fm_ibc, asp->cmd);
790 			(void) pcie_postattach_child(rdip);
791 		}
792 
793 		/* only do this for immediate children */
794 		if (asp->cmd == DDI_RESUME && asp->when == DDI_PRE &&
795 		    ddi_get_parent(rdip) == dip)
796 			if (pci_pre_resume(rdip) != DDI_SUCCESS) {
797 				/* Not good, better stop now. */
798 				cmn_err(CE_PANIC,
799 				    "Couldn't pre-resume device %p",
800 				    (void *) dip);
801 				/* NOTREACHED */
802 			}
803 
804 		return (DDI_SUCCESS);
805 
806 	case DDI_CTLOPS_DETACH:
807 		if (!pcie_is_child(dip, rdip))
808 			return (DDI_SUCCESS);
809 
810 		dsp = (struct detachspec *)arg;
811 
812 		if (dsp->when == DDI_PRE)
813 			pf_fini(rdip, dsp->cmd);
814 
815 		/* only do this for immediate children */
816 		if (dsp->cmd == DDI_SUSPEND && dsp->when == DDI_POST &&
817 		    ddi_get_parent(rdip) == dip)
818 			if (pci_post_suspend(rdip) != DDI_SUCCESS)
819 				return (DDI_FAILURE);
820 
821 		return (DDI_SUCCESS);
822 
823 	default:
824 		break;
825 	}
826 
827 	return (ddi_ctlops(dip, rdip, ctlop, arg, result));
828 
829 }
830 
831 
832 /*
833  * npe_intr_ops
834  */
835 static int
836 npe_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
837     ddi_intr_handle_impl_t *hdlp, void *result)
838 {
839 	return (pci_common_intr_ops(pdip, rdip, intr_op, hdlp, result));
840 }
841 
842 
843 static int
844 npe_initchild(dev_info_t *child)
845 {
846 	char		name[80];
847 	pcie_bus_t	*bus_p;
848 	uint32_t	regs;
849 	ddi_acc_handle_t	cfg_hdl;
850 
851 	/*
852 	 * Do not bind drivers to empty bridges.
853 	 * Fail above, if the bridge is found to be hotplug capable
854 	 */
855 	if (npe_disable_empty_bridges_workaround(child) == 1)
856 		return (DDI_FAILURE);
857 
858 	if (pci_common_name_child(child, name, 80) != DDI_SUCCESS)
859 		return (DDI_FAILURE);
860 
861 	ddi_set_name_addr(child, name);
862 
863 	/*
864 	 * Pseudo nodes indicate a prototype node with per-instance
865 	 * properties to be merged into the real h/w device node.
866 	 * The interpretation of the unit-address is DD[,F]
867 	 * where DD is the device id and F is the function.
868 	 */
869 	if (ndi_dev_is_persistent_node(child) == 0) {
870 		extern int pci_allow_pseudo_children;
871 
872 		ddi_set_parent_data(child, NULL);
873 
874 		/*
875 		 * Try to merge the properties from this prototype
876 		 * node into real h/w nodes.
877 		 */
878 		if (ndi_merge_node(child, pci_common_name_child) ==
879 		    DDI_SUCCESS) {
880 			/*
881 			 * Merged ok - return failure to remove the node.
882 			 */
883 			ddi_set_name_addr(child, NULL);
884 			return (DDI_FAILURE);
885 		}
886 
887 		/* workaround for DDIVS to run under PCI Express */
888 		if (pci_allow_pseudo_children) {
889 			/*
890 			 * If the "interrupts" property doesn't exist,
891 			 * this must be the ddivs no-intr case, and it returns
892 			 * DDI_SUCCESS instead of DDI_FAILURE.
893 			 */
894 			if (ddi_prop_get_int(DDI_DEV_T_ANY, child,
895 			    DDI_PROP_DONTPASS, "interrupts", -1) == -1)
896 				return (DDI_SUCCESS);
897 			/*
898 			 * Create the ddi_parent_private_data for a pseudo
899 			 * child.
900 			 */
901 			pci_common_set_parent_private_data(child);
902 			return (DDI_SUCCESS);
903 		}
904 
905 		/*
906 		 * The child was not merged into a h/w node,
907 		 * but there's not much we can do with it other
908 		 * than return failure to cause the node to be removed.
909 		 */
910 		cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged",
911 		    ddi_get_name(child), ddi_get_name_addr(child),
912 		    ddi_get_name(child));
913 		ddi_set_name_addr(child, NULL);
914 		return (DDI_NOT_WELL_FORMED);
915 	}
916 
917 	if (ddi_prop_get_int(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
918 	    "interrupts", -1) != -1)
919 		pci_common_set_parent_private_data(child);
920 	else
921 		ddi_set_parent_data(child, NULL);
922 
923 	/* Disable certain errors on PCIe drivers for x86 platforms */
924 	regs = pcie_get_aer_uce_mask() | npe_aer_uce_mask;
925 	pcie_set_aer_uce_mask(regs);
926 	regs = pcie_get_aer_ce_mask() | npe_aer_ce_mask;
927 	pcie_set_aer_ce_mask(regs);
928 	regs = pcie_get_aer_suce_mask() | npe_aer_suce_mask;
929 	pcie_set_aer_suce_mask(regs);
930 
931 	/*
932 	 * If URs are disabled, mask SERRs as well, otherwise the system will
933 	 * still be notified of URs
934 	 */
935 	if (npe_aer_uce_mask & PCIE_AER_UCE_UR)
936 		pcie_set_serr_mask(1);
937 
938 	if (pci_config_setup(child, &cfg_hdl) == DDI_SUCCESS) {
939 		npe_ck804_fix_aer_ptr(cfg_hdl);
940 		npe_nvidia_error_workaround(cfg_hdl);
941 		npe_intel_error_workaround(cfg_hdl);
942 		pci_config_teardown(&cfg_hdl);
943 	}
944 
945 	bus_p = PCIE_DIP2BUS(child);
946 	if (bus_p) {
947 		uint16_t device_id = (uint16_t)(bus_p->bus_dev_ven_id >> 16);
948 		uint16_t vendor_id = (uint16_t)(bus_p->bus_dev_ven_id & 0xFFFF);
949 		uint16_t rev_id = bus_p->bus_rev_id;
950 
951 		/* Disable AER for certain NVIDIA Chipsets */
952 		if ((vendor_id == NVIDIA_VENDOR_ID) &&
953 		    (device_id == NVIDIA_CK804_DEVICE_ID) &&
954 		    (rev_id < NVIDIA_CK804_AER_VALID_REVID))
955 			bus_p->bus_aer_off = 0;
956 
957 		pcie_init_dom(child);
958 		(void) pcie_initchild(child);
959 	}
960 
961 	return (DDI_SUCCESS);
962 }
963 
964 
965 static int
966 npe_removechild(dev_info_t *dip)
967 {
968 	pcie_uninitchild(dip);
969 
970 	ddi_set_name_addr(dip, NULL);
971 
972 	/*
973 	 * Strip the node to properly convert it back to prototype form
974 	 */
975 	ddi_remove_minor_node(dip, NULL);
976 
977 	ddi_prop_remove_all(dip);
978 
979 	return (DDI_SUCCESS);
980 }
981 
982 static int
983 npe_open(dev_t *devp, int flags, int otyp, cred_t *credp)
984 {
985 	minor_t		minor = getminor(*devp);
986 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
987 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
988 	int	rv;
989 
990 	/*
991 	 * Make sure the open is for the right file type.
992 	 */
993 	if (otyp != OTYP_CHR)
994 		return (EINVAL);
995 
996 	if (pci_p == NULL)
997 		return (ENXIO);
998 
999 	mutex_enter(&pci_p->pci_mutex);
1000 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1001 	case PCI_TOOL_REG_MINOR_NUM:
1002 	case PCI_TOOL_INTR_MINOR_NUM:
1003 		break;
1004 	default:
1005 		/* Handle devctl ioctls */
1006 		rv = pcie_open(pci_p->pci_dip, devp, flags, otyp, credp);
1007 		mutex_exit(&pci_p->pci_mutex);
1008 		return (rv);
1009 	}
1010 
1011 	/* Handle pcitool ioctls */
1012 	if (flags & FEXCL) {
1013 		if (pci_p->pci_soft_state != PCI_SOFT_STATE_CLOSED) {
1014 			mutex_exit(&pci_p->pci_mutex);
1015 			cmn_err(CE_NOTE, "npe_open: busy");
1016 			return (EBUSY);
1017 		}
1018 		pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN_EXCL;
1019 	} else {
1020 		if (pci_p->pci_soft_state == PCI_SOFT_STATE_OPEN_EXCL) {
1021 			mutex_exit(&pci_p->pci_mutex);
1022 			cmn_err(CE_NOTE, "npe_open: busy");
1023 			return (EBUSY);
1024 		}
1025 		pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN;
1026 	}
1027 	mutex_exit(&pci_p->pci_mutex);
1028 
1029 	return (0);
1030 }
1031 
1032 static int
1033 npe_close(dev_t dev, int flags, int otyp, cred_t *credp)
1034 {
1035 	minor_t		minor = getminor(dev);
1036 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
1037 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
1038 	int	rv;
1039 
1040 	if (pci_p == NULL)
1041 		return (ENXIO);
1042 
1043 	mutex_enter(&pci_p->pci_mutex);
1044 
1045 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1046 	case PCI_TOOL_REG_MINOR_NUM:
1047 	case PCI_TOOL_INTR_MINOR_NUM:
1048 		break;
1049 	default:
1050 		/* Handle devctl ioctls */
1051 		rv = pcie_close(pci_p->pci_dip, dev, flags, otyp, credp);
1052 		mutex_exit(&pci_p->pci_mutex);
1053 		return (rv);
1054 	}
1055 
1056 	/* Handle pcitool ioctls */
1057 	pci_p->pci_soft_state = PCI_SOFT_STATE_CLOSED;
1058 	mutex_exit(&pci_p->pci_mutex);
1059 	return (0);
1060 }
1061 
1062 static int
1063 npe_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1064 {
1065 	minor_t		minor = getminor(dev);
1066 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
1067 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
1068 	int		ret = ENOTTY;
1069 
1070 	if (pci_p == NULL)
1071 		return (ENXIO);
1072 
1073 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1074 	case PCI_TOOL_REG_MINOR_NUM:
1075 	case PCI_TOOL_INTR_MINOR_NUM:
1076 		/* To handle pcitool related ioctls */
1077 		ret =  pci_common_ioctl(pci_p->pci_dip, dev, cmd, arg, mode,
1078 		    credp, rvalp);
1079 		break;
1080 	default:
1081 		/* To handle devctl and hotplug related ioctls */
1082 		ret = pcie_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, credp,
1083 		    rvalp);
1084 		break;
1085 	}
1086 
1087 	return (ret);
1088 }
1089 
1090 /*ARGSUSED*/
1091 static int
1092 npe_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap,
1093     ddi_iblock_cookie_t *ibc)
1094 {
1095 	pci_state_t  *pcip = ddi_get_soft_state(npe_statep,
1096 	    ddi_get_instance(dip));
1097 
1098 	ASSERT(ibc != NULL);
1099 	*ibc = pcip->pci_fm_ibc;
1100 
1101 	return (pcip->pci_fmcap);
1102 }
1103 
1104 /*ARGSUSED*/
1105 static int
1106 npe_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *no_used)
1107 {
1108 	/*
1109 	 * On current x86 systems, npe's callback does not get called for failed
1110 	 * loads.  If in the future this feature is used, the fault PA should be
1111 	 * logged in the derr->fme_bus_specific field.  The appropriate PCIe
1112 	 * error handling code should be called and needs to be coordinated with
1113 	 * safe access handling.
1114 	 */
1115 
1116 	return (DDI_FM_OK);
1117 }
1118