xref: /titanic_52/usr/src/uts/i86pc/io/pciex/npe.c (revision bfc032a14cc866ab7f34ca6fd86c240a5ebede9d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Host to PCI-Express local bus driver
29  */
30 
31 #include <sys/conf.h>
32 #include <sys/modctl.h>
33 #include <sys/file.h>
34 #include <sys/pci_impl.h>
35 #include <sys/pcie_impl.h>
36 #include <sys/sysmacros.h>
37 #include <sys/ddi_intr.h>
38 #include <sys/sunndi.h>
39 #include <sys/sunddi.h>
40 #include <sys/ddifm.h>
41 #include <sys/ndifm.h>
42 #include <sys/fm/util.h>
43 #include <sys/hotplug/pci/pcie_hp.h>
44 #include <io/pci/pci_tools_ext.h>
45 #include <io/pci/pci_common.h>
46 #include <io/pciex/pcie_nvidia.h>
47 
48 /*
49  * Helper Macros
50  */
51 #define	NPE_IS_HANDLE_FOR_STDCFG_ACC(hp) \
52 	((hp) != NULL &&						\
53 	((ddi_acc_hdl_t *)(hp))->ah_platform_private != NULL &&		\
54 	(((ddi_acc_impl_t *)((ddi_acc_hdl_t *)(hp))->			\
55 	ah_platform_private)->						\
56 	    ahi_acc_attr &(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_CONFIG_SPACE)) \
57 		== DDI_ACCATTR_CONFIG_SPACE)
58 
59 /*
60  * Bus Operation functions
61  */
62 static int	npe_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *,
63 		    off_t, off_t, caddr_t *);
64 static int	npe_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t,
65 		    void *, void *);
66 static int	npe_intr_ops(dev_info_t *, dev_info_t *, ddi_intr_op_t,
67 		    ddi_intr_handle_impl_t *, void *);
68 static int	npe_fm_init(dev_info_t *, dev_info_t *, int,
69 		    ddi_iblock_cookie_t *);
70 
71 static int	npe_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *);
72 
73 /*
74  * Disable URs and Received MA for all PCIe devices.  Until x86 SW is changed so
75  * that random drivers do not do PIO accesses on devices that it does not own,
76  * these error bits must be disabled.  SERR must also be disabled if URs have
77  * been masked.
78  */
79 uint32_t	npe_aer_uce_mask = PCIE_AER_UCE_UR;
80 uint32_t	npe_aer_ce_mask = 0;
81 uint32_t	npe_aer_suce_mask = PCIE_AER_SUCE_RCVD_MA;
82 
83 struct bus_ops npe_bus_ops = {
84 	BUSO_REV,
85 	npe_bus_map,
86 	NULL,
87 	NULL,
88 	NULL,
89 	i_ddi_map_fault,
90 	ddi_dma_map,
91 	ddi_dma_allochdl,
92 	ddi_dma_freehdl,
93 	ddi_dma_bindhdl,
94 	ddi_dma_unbindhdl,
95 	ddi_dma_flush,
96 	ddi_dma_win,
97 	ddi_dma_mctl,
98 	npe_ctlops,
99 	ddi_bus_prop_op,
100 	0,			/* (*bus_get_eventcookie)();	*/
101 	0,			/* (*bus_add_eventcall)();	*/
102 	0,			/* (*bus_remove_eventcall)();	*/
103 	0,			/* (*bus_post_event)();		*/
104 	0,			/* (*bus_intr_ctl)(); */
105 	0,			/* (*bus_config)(); */
106 	0,			/* (*bus_unconfig)(); */
107 	npe_fm_init,		/* (*bus_fm_init)(); */
108 	NULL,			/* (*bus_fm_fini)(); */
109 	NULL,			/* (*bus_fm_access_enter)(); */
110 	NULL,			/* (*bus_fm_access_exit)(); */
111 	NULL,			/* (*bus_power)(); */
112 	npe_intr_ops,		/* (*bus_intr_op)(); */
113 	pcie_hp_common_ops	/* (*bus_hp_op)(); */
114 };
115 
116 static int	npe_open(dev_t *, int, int, cred_t *);
117 static int	npe_close(dev_t, int, int, cred_t *);
118 static int	npe_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
119 
120 struct cb_ops npe_cb_ops = {
121 	npe_open,			/* open */
122 	npe_close,			/* close */
123 	nodev,				/* strategy */
124 	nodev,				/* print */
125 	nodev,				/* dump */
126 	nodev,				/* read */
127 	nodev,				/* write */
128 	npe_ioctl,			/* ioctl */
129 	nodev,				/* devmap */
130 	nodev,				/* mmap */
131 	nodev,				/* segmap */
132 	nochpoll,			/* poll */
133 	pcie_prop_op,			/* cb_prop_op */
134 	NULL,				/* streamtab */
135 	D_NEW | D_MP | D_HOTPLUG,	/* Driver compatibility flag */
136 	CB_REV,				/* rev */
137 	nodev,				/* int (*cb_aread)() */
138 	nodev				/* int (*cb_awrite)() */
139 };
140 
141 
142 /*
143  * Device Node Operation functions
144  */
145 static int	npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
146 static int	npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
147 static int	npe_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
148 
149 struct dev_ops npe_ops = {
150 	DEVO_REV,		/* devo_rev */
151 	0,			/* refcnt  */
152 	npe_info,		/* info */
153 	nulldev,		/* identify */
154 	nulldev,		/* probe */
155 	npe_attach,		/* attach */
156 	npe_detach,		/* detach */
157 	nulldev,		/* reset */
158 	&npe_cb_ops,		/* driver operations */
159 	&npe_bus_ops,		/* bus operations */
160 	NULL,			/* power */
161 	ddi_quiesce_not_needed,		/* quiesce */
162 };
163 
164 /*
165  * Internal routines in support of particular npe_ctlops.
166  */
167 static int npe_removechild(dev_info_t *child);
168 static int npe_initchild(dev_info_t *child);
169 
170 /*
171  * External support routine
172  */
173 extern void	npe_query_acpi_mcfg(dev_info_t *dip);
174 extern void	npe_ck804_fix_aer_ptr(ddi_acc_handle_t cfg_hdl);
175 extern int	npe_disable_empty_bridges_workaround(dev_info_t *child);
176 extern void	npe_nvidia_error_workaround(ddi_acc_handle_t cfg_hdl);
177 extern void	npe_intel_error_workaround(ddi_acc_handle_t cfg_hdl);
178 extern boolean_t npe_is_mmcfg_supported(dev_info_t *dip);
179 extern void	npe_enable_htmsi_children(dev_info_t *dip);
180 extern int	npe_save_htconfig_children(dev_info_t *dip);
181 extern int	npe_restore_htconfig_children(dev_info_t *dip);
182 
183 /*
184  * Module linkage information for the kernel.
185  */
186 static struct modldrv modldrv = {
187 	&mod_driverops,				/* Type of module */
188 	"Host to PCIe nexus driver",		/* Name of module */
189 	&npe_ops,				/* driver ops */
190 };
191 
192 static struct modlinkage modlinkage = {
193 	MODREV_1,
194 	(void *)&modldrv,
195 	NULL
196 };
197 
198 /* Save minimal state. */
199 void *npe_statep;
200 
201 int
202 _init(void)
203 {
204 	int e;
205 
206 	/*
207 	 * Initialize per-pci bus soft state pointer.
208 	 */
209 	e = ddi_soft_state_init(&npe_statep, sizeof (pci_state_t), 1);
210 	if (e != 0)
211 		return (e);
212 
213 	if ((e = mod_install(&modlinkage)) != 0)
214 		ddi_soft_state_fini(&npe_statep);
215 
216 	return (e);
217 }
218 
219 
220 int
221 _fini(void)
222 {
223 	int rc;
224 
225 	rc = mod_remove(&modlinkage);
226 	if (rc != 0)
227 		return (rc);
228 
229 	ddi_soft_state_fini(&npe_statep);
230 	return (rc);
231 }
232 
233 
234 int
235 _info(struct modinfo *modinfop)
236 {
237 	return (mod_info(&modlinkage, modinfop));
238 }
239 
240 /*ARGSUSED*/
241 static int
242 npe_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
243 {
244 	minor_t		minor = getminor((dev_t)arg);
245 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
246 	pci_state_t	*pcip = ddi_get_soft_state(npe_statep, instance);
247 	int		ret = DDI_SUCCESS;
248 
249 	switch (cmd) {
250 	case DDI_INFO_DEVT2INSTANCE:
251 		*result = (void *)(intptr_t)instance;
252 		break;
253 	case DDI_INFO_DEVT2DEVINFO:
254 		if (pcip == NULL) {
255 			ret = DDI_FAILURE;
256 			break;
257 		}
258 
259 		*result = (void *)pcip->pci_dip;
260 		break;
261 	default:
262 		ret = DDI_FAILURE;
263 		break;
264 	}
265 
266 	return (ret);
267 }
268 
269 /*ARGSUSED*/
270 static int
271 npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
272 {
273 	int		instance = ddi_get_instance(devi);
274 	pci_state_t	*pcip = NULL;
275 
276 	if (cmd == DDI_RESUME) {
277 		/*
278 		 * the system might still be able to resume even if this fails
279 		 */
280 		(void) npe_restore_htconfig_children(devi);
281 		return (DDI_SUCCESS);
282 	}
283 
284 	/*
285 	 * We must do this here in order to ensure that all top level devices
286 	 * get their HyperTransport MSI mapping regs programmed first.
287 	 * "Memory controller" and "hostbridge" class devices are leaf devices
288 	 * that may affect MSI translation functionality for devices
289 	 * connected to the same link/bus.
290 	 *
291 	 * This will also program HT MSI mapping registers on root buses
292 	 * devices (basically sitting on an HT bus) that are not dependent
293 	 * on the aforementioned HT devices for MSI translation.
294 	 */
295 	npe_enable_htmsi_children(devi);
296 
297 	if (ddi_prop_update_string(DDI_DEV_T_NONE, devi, "device_type",
298 	    "pciex") != DDI_PROP_SUCCESS) {
299 		cmn_err(CE_WARN, "npe:  'device_type' prop create failed");
300 	}
301 
302 	if (ddi_soft_state_zalloc(npe_statep, instance) == DDI_SUCCESS)
303 		pcip = ddi_get_soft_state(npe_statep, instance);
304 
305 	if (pcip == NULL)
306 		return (DDI_FAILURE);
307 
308 	pcip->pci_dip = devi;
309 	pcip->pci_soft_state = PCI_SOFT_STATE_CLOSED;
310 
311 	if (pcie_init(devi, NULL) != DDI_SUCCESS)
312 		goto fail1;
313 
314 	/* Second arg: initialize for pci_express root nexus */
315 	if (pcitool_init(devi, B_TRUE) != DDI_SUCCESS)
316 		goto fail2;
317 
318 	pcip->pci_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
319 	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
320 	ddi_fm_init(devi, &pcip->pci_fmcap, &pcip->pci_fm_ibc);
321 
322 	if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) {
323 		ddi_fm_handler_register(devi, npe_fm_callback, NULL);
324 	}
325 
326 	PCIE_DIP2PFD(devi) = kmem_zalloc(sizeof (pf_data_t), KM_SLEEP);
327 	pcie_rc_init_pfd(devi, PCIE_DIP2PFD(devi));
328 
329 	npe_query_acpi_mcfg(devi);
330 	ddi_report_dev(devi);
331 	pcie_fab_init_bus(devi, PCIE_BUS_FINAL);
332 
333 	return (DDI_SUCCESS);
334 
335 fail2:
336 	(void) pcie_uninit(devi);
337 fail1:
338 	pcie_rc_fini_bus(devi);
339 	ddi_soft_state_free(npe_statep, instance);
340 
341 	return (DDI_FAILURE);
342 }
343 
344 /*ARGSUSED*/
345 static int
346 npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
347 {
348 	int instance = ddi_get_instance(devi);
349 	pci_state_t *pcip;
350 
351 	pcip = ddi_get_soft_state(npe_statep, ddi_get_instance(devi));
352 
353 	switch (cmd) {
354 	case DDI_DETACH:
355 		pcie_fab_fini_bus(devi, PCIE_BUS_INITIAL);
356 
357 		/* Uninitialize pcitool support. */
358 		pcitool_uninit(devi);
359 
360 		if (pcie_uninit(devi) != DDI_SUCCESS)
361 			return (DDI_FAILURE);
362 
363 		if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE)
364 			ddi_fm_handler_unregister(devi);
365 
366 		pcie_rc_fini_pfd(PCIE_DIP2PFD(devi));
367 		kmem_free(PCIE_DIP2PFD(devi), sizeof (pf_data_t));
368 
369 		ddi_fm_fini(devi);
370 		ddi_soft_state_free(npe_statep, instance);
371 		return (DDI_SUCCESS);
372 
373 	case DDI_SUSPEND:
374 		/*
375 		 * the system might still be able to suspend/resume even if
376 		 * this fails
377 		 */
378 		(void) npe_save_htconfig_children(devi);
379 		return (DDI_SUCCESS);
380 	default:
381 		return (DDI_FAILURE);
382 	}
383 }
384 
385 /*
386  * Configure the access handle for standard configuration space
387  * access (see pci_fm_acc_setup for code that initializes the
388  * access-function pointers).
389  */
390 static int
391 npe_setup_std_pcicfg_acc(dev_info_t *rdip, ddi_map_req_t *mp,
392     ddi_acc_hdl_t *hp, off_t offset, off_t len)
393 {
394 	int ret;
395 
396 	if ((ret = pci_fm_acc_setup(hp, offset, len)) ==
397 	    DDI_SUCCESS) {
398 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
399 		    mp->map_handlep->ah_acc.devacc_attr_access
400 		    != DDI_DEFAULT_ACC) {
401 			ndi_fmc_insert(rdip, ACC_HANDLE,
402 			    (void *)mp->map_handlep, NULL);
403 		}
404 	}
405 	return (ret);
406 }
407 
408 static int
409 npe_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
410     off_t offset, off_t len, caddr_t *vaddrp)
411 {
412 	int 		rnumber;
413 	int		length;
414 	int		space;
415 	ddi_acc_impl_t	*ap;
416 	ddi_acc_hdl_t	*hp;
417 	ddi_map_req_t	mr;
418 	pci_regspec_t	pci_reg;
419 	pci_regspec_t	*pci_rp;
420 	struct regspec	reg;
421 	pci_acc_cfblk_t	*cfp;
422 	int		retval;
423 	int64_t		*ecfginfo;
424 	uint_t		nelem;
425 
426 	mr = *mp; /* Get private copy of request */
427 	mp = &mr;
428 
429 	/*
430 	 * check for register number
431 	 */
432 	switch (mp->map_type) {
433 	case DDI_MT_REGSPEC:
434 		pci_reg = *(pci_regspec_t *)(mp->map_obj.rp);
435 		pci_rp = &pci_reg;
436 		if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS)
437 			return (DDI_FAILURE);
438 		break;
439 	case DDI_MT_RNUMBER:
440 		rnumber = mp->map_obj.rnumber;
441 		/*
442 		 * get ALL "reg" properties for dip, select the one of
443 		 * of interest. In x86, "assigned-addresses" property
444 		 * is identical to the "reg" property, so there is no
445 		 * need to cross check the two to determine the physical
446 		 * address of the registers.
447 		 * This routine still performs some validity checks to
448 		 * make sure that everything is okay.
449 		 */
450 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip,
451 		    DDI_PROP_DONTPASS, "reg", (int **)&pci_rp,
452 		    (uint_t *)&length) != DDI_PROP_SUCCESS)
453 			return (DDI_FAILURE);
454 
455 		/*
456 		 * validate the register number.
457 		 */
458 		length /= (sizeof (pci_regspec_t) / sizeof (int));
459 		if (rnumber >= length) {
460 			ddi_prop_free(pci_rp);
461 			return (DDI_FAILURE);
462 		}
463 
464 		/*
465 		 * copy the required entry.
466 		 */
467 		pci_reg = pci_rp[rnumber];
468 
469 		/*
470 		 * free the memory allocated by ddi_prop_lookup_int_array
471 		 */
472 		ddi_prop_free(pci_rp);
473 
474 		pci_rp = &pci_reg;
475 		if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS)
476 			return (DDI_FAILURE);
477 		mp->map_type = DDI_MT_REGSPEC;
478 		break;
479 	default:
480 		return (DDI_ME_INVAL);
481 	}
482 
483 	space = pci_rp->pci_phys_hi & PCI_REG_ADDR_M;
484 
485 	/*
486 	 * check for unmap and unlock of address space
487 	 */
488 	if ((mp->map_op == DDI_MO_UNMAP) || (mp->map_op == DDI_MO_UNLOCK)) {
489 		switch (space) {
490 		case PCI_ADDR_IO:
491 			reg.regspec_bustype = 1;
492 			break;
493 
494 		case PCI_ADDR_CONFIG:
495 			/*
496 			 * If this is an unmap/unlock of a standard config
497 			 * space mapping (memory-mapped config space mappings
498 			 * would have the DDI_ACCATTR_CPU_VADDR bit set in the
499 			 * acc_attr), undo that setup here.
500 			 */
501 			if (NPE_IS_HANDLE_FOR_STDCFG_ACC(mp->map_handlep)) {
502 
503 				if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
504 				    mp->map_handlep->ah_acc.devacc_attr_access
505 				    != DDI_DEFAULT_ACC) {
506 					ndi_fmc_remove(rdip, ACC_HANDLE,
507 					    (void *)mp->map_handlep);
508 				}
509 				return (DDI_SUCCESS);
510 			}
511 
512 			pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE;
513 
514 			/* FALLTHROUGH */
515 		case PCI_ADDR_MEM64:
516 			/*
517 			 * MEM64 requires special treatment on map, to check
518 			 * that the device is below 4G.  On unmap, however,
519 			 * we can assume that everything is OK... the map
520 			 * must have succeeded.
521 			 */
522 			/* FALLTHROUGH */
523 		case PCI_ADDR_MEM32:
524 			reg.regspec_bustype = 0;
525 			break;
526 
527 		default:
528 			return (DDI_FAILURE);
529 		}
530 
531 		/*
532 		 * Adjust offset and length
533 		 * A non-zero length means override the one in the regspec.
534 		 */
535 		pci_rp->pci_phys_low += (uint_t)offset;
536 		if (len != 0)
537 			pci_rp->pci_size_low = len;
538 
539 		reg.regspec_addr = pci_rp->pci_phys_low;
540 		reg.regspec_size = pci_rp->pci_size_low;
541 
542 		mp->map_obj.rp = &reg;
543 		retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp);
544 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
545 		    mp->map_handlep->ah_acc.devacc_attr_access !=
546 		    DDI_DEFAULT_ACC) {
547 			ndi_fmc_remove(rdip, ACC_HANDLE,
548 			    (void *)mp->map_handlep);
549 		}
550 		return (retval);
551 
552 	}
553 
554 	/* check for user mapping request - not legal for Config */
555 	if (mp->map_op == DDI_MO_MAP_HANDLE && space == PCI_ADDR_CONFIG) {
556 		cmn_err(CE_NOTE, "npe: Config mapping request from user\n");
557 		return (DDI_FAILURE);
558 	}
559 
560 
561 	/*
562 	 * Note that pci_fm_acc_setup() is called to serve two purposes
563 	 * i) enable legacy PCI I/O style config space access
564 	 * ii) register with FMA
565 	 */
566 	if (space == PCI_ADDR_CONFIG) {
567 
568 		/* Can't map config space without a handle */
569 		hp = (ddi_acc_hdl_t *)mp->map_handlep;
570 		if (hp == NULL)
571 			return (DDI_FAILURE);
572 
573 		/* record the device address for future reference */
574 		cfp = (pci_acc_cfblk_t *)&hp->ah_bus_private;
575 		cfp->c_busnum = PCI_REG_BUS_G(pci_rp->pci_phys_hi);
576 		cfp->c_devnum = PCI_REG_DEV_G(pci_rp->pci_phys_hi);
577 		cfp->c_funcnum = PCI_REG_FUNC_G(pci_rp->pci_phys_hi);
578 
579 		*vaddrp = (caddr_t)offset;
580 
581 		/* Check if MMCFG is supported */
582 		if (!npe_is_mmcfg_supported(rdip)) {
583 			return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
584 			    offset, len));
585 		}
586 
587 
588 		if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, rdip, 0,
589 		    "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) {
590 
591 			if (nelem != 4 ||
592 			    cfp->c_busnum < ecfginfo[2] ||
593 			    cfp->c_busnum > ecfginfo[3]) {
594 				/*
595 				 * Invalid property or Doesn't contain the
596 				 * requested bus; fall back to standard
597 				 * (I/O-based) config access.
598 				 */
599 				ddi_prop_free(ecfginfo);
600 				return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
601 				    offset, len));
602 			} else {
603 				pci_rp->pci_phys_low = ecfginfo[0];
604 
605 				ddi_prop_free(ecfginfo);
606 
607 				pci_rp->pci_phys_low += ((cfp->c_busnum << 20) |
608 				    (cfp->c_devnum) << 15 |
609 				    (cfp->c_funcnum << 12));
610 
611 				pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE;
612 			}
613 		} else {
614 			/*
615 			 * Couldn't find the MMCFG property -- fall back to
616 			 * standard config access
617 			 */
618 			return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
619 			    offset, len));
620 		}
621 	}
622 
623 	length = pci_rp->pci_size_low;
624 
625 	/*
626 	 * range check
627 	 */
628 	if ((offset >= length) || (len > length) || (offset + len > length))
629 		return (DDI_FAILURE);
630 
631 	/*
632 	 * Adjust offset and length
633 	 * A non-zero length means override the one in the regspec.
634 	 */
635 	pci_rp->pci_phys_low += (uint_t)offset;
636 	if (len != 0)
637 		pci_rp->pci_size_low = len;
638 
639 	/*
640 	 * convert the pci regsec into the generic regspec used by the
641 	 * parent root nexus driver.
642 	 */
643 	switch (space) {
644 	case PCI_ADDR_IO:
645 		reg.regspec_bustype = 1;
646 		break;
647 	case PCI_ADDR_CONFIG:
648 	case PCI_ADDR_MEM64:
649 		/*
650 		 * We can't handle 64-bit devices that are mapped above
651 		 * 4G or that are larger than 4G.
652 		 */
653 		if (pci_rp->pci_phys_mid != 0 || pci_rp->pci_size_hi != 0)
654 			return (DDI_FAILURE);
655 		/*
656 		 * Other than that, we can treat them as 32-bit mappings
657 		 */
658 		/* FALLTHROUGH */
659 	case PCI_ADDR_MEM32:
660 		reg.regspec_bustype = 0;
661 		break;
662 	default:
663 		return (DDI_FAILURE);
664 	}
665 
666 	reg.regspec_addr = pci_rp->pci_phys_low;
667 	reg.regspec_size = pci_rp->pci_size_low;
668 
669 	mp->map_obj.rp = &reg;
670 	retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp);
671 	if (retval == DDI_SUCCESS) {
672 		/*
673 		 * For config space gets force use of cautious access routines.
674 		 * These will handle default and protected mode accesses too.
675 		 */
676 		if (space == PCI_ADDR_CONFIG) {
677 			ap = (ddi_acc_impl_t *)mp->map_handlep;
678 			ap->ahi_acc_attr &= ~DDI_ACCATTR_DIRECT;
679 			ap->ahi_acc_attr |= DDI_ACCATTR_CONFIG_SPACE;
680 			ap->ahi_get8 = i_ddi_caut_get8;
681 			ap->ahi_get16 = i_ddi_caut_get16;
682 			ap->ahi_get32 = i_ddi_caut_get32;
683 			ap->ahi_get64 = i_ddi_caut_get64;
684 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
685 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
686 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
687 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
688 		}
689 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
690 		    mp->map_handlep->ah_acc.devacc_attr_access !=
691 		    DDI_DEFAULT_ACC) {
692 			ndi_fmc_insert(rdip, ACC_HANDLE,
693 			    (void *)mp->map_handlep, NULL);
694 		}
695 	}
696 	return (retval);
697 }
698 
699 
700 
701 /*ARGSUSED*/
702 static int
703 npe_ctlops(dev_info_t *dip, dev_info_t *rdip,
704 	ddi_ctl_enum_t ctlop, void *arg, void *result)
705 {
706 	int		rn;
707 	int		totreg;
708 	uint_t		reglen;
709 	pci_regspec_t	*drv_regp;
710 	struct attachspec *asp;
711 	struct detachspec *dsp;
712 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep,
713 	    ddi_get_instance(dip));
714 
715 	switch (ctlop) {
716 	case DDI_CTLOPS_REPORTDEV:
717 		if (rdip == (dev_info_t *)0)
718 			return (DDI_FAILURE);
719 		cmn_err(CE_CONT, "?PCI Express-device: %s@%s, %s%d\n",
720 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
721 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
722 		return (DDI_SUCCESS);
723 
724 	case DDI_CTLOPS_INITCHILD:
725 		return (npe_initchild((dev_info_t *)arg));
726 
727 	case DDI_CTLOPS_UNINITCHILD:
728 		return (npe_removechild((dev_info_t *)arg));
729 
730 	case DDI_CTLOPS_SIDDEV:
731 		return (DDI_SUCCESS);
732 
733 	case DDI_CTLOPS_REGSIZE:
734 	case DDI_CTLOPS_NREGS:
735 		if (rdip == (dev_info_t *)0)
736 			return (DDI_FAILURE);
737 
738 		*(int *)result = 0;
739 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip,
740 		    DDI_PROP_DONTPASS, "reg", (int **)&drv_regp,
741 		    &reglen) != DDI_PROP_SUCCESS) {
742 			return (DDI_FAILURE);
743 		}
744 
745 		totreg = (reglen * sizeof (int)) / sizeof (pci_regspec_t);
746 		if (ctlop == DDI_CTLOPS_NREGS)
747 			*(int *)result = totreg;
748 		else if (ctlop == DDI_CTLOPS_REGSIZE) {
749 			rn = *(int *)arg;
750 			if (rn >= totreg) {
751 				ddi_prop_free(drv_regp);
752 				return (DDI_FAILURE);
753 			}
754 			*(off_t *)result = drv_regp[rn].pci_size_low;
755 		}
756 		ddi_prop_free(drv_regp);
757 
758 		return (DDI_SUCCESS);
759 
760 	case DDI_CTLOPS_POWER:
761 	{
762 		power_req_t	*reqp = (power_req_t *)arg;
763 		/*
764 		 * We currently understand reporting of PCI_PM_IDLESPEED
765 		 * capability. Everything else is passed up.
766 		 */
767 		if ((reqp->request_type == PMR_REPORT_PMCAP) &&
768 		    (reqp->req.report_pmcap_req.cap ==  PCI_PM_IDLESPEED))
769 			return (DDI_SUCCESS);
770 
771 		break;
772 	}
773 
774 	case DDI_CTLOPS_PEEK:
775 	case DDI_CTLOPS_POKE:
776 		return (pci_common_peekpoke(dip, rdip, ctlop, arg, result));
777 
778 	/* X86 systems support PME wakeup from suspended state */
779 	case DDI_CTLOPS_ATTACH:
780 		if (!pcie_is_child(dip, rdip))
781 			return (DDI_SUCCESS);
782 
783 		asp = (struct attachspec *)arg;
784 		if ((asp->when == DDI_POST) && (asp->result == DDI_SUCCESS)) {
785 			pf_init(rdip, (void *)pci_p->pci_fm_ibc, asp->cmd);
786 			(void) pcie_postattach_child(rdip);
787 		}
788 
789 		/* only do this for immediate children */
790 		if (asp->cmd == DDI_RESUME && asp->when == DDI_PRE &&
791 		    ddi_get_parent(rdip) == dip)
792 			if (pci_pre_resume(rdip) != DDI_SUCCESS) {
793 				/* Not good, better stop now. */
794 				cmn_err(CE_PANIC,
795 				    "Couldn't pre-resume device %p",
796 				    (void *) dip);
797 				/* NOTREACHED */
798 			}
799 
800 		return (DDI_SUCCESS);
801 
802 	case DDI_CTLOPS_DETACH:
803 		if (!pcie_is_child(dip, rdip))
804 			return (DDI_SUCCESS);
805 
806 		dsp = (struct detachspec *)arg;
807 
808 		if (dsp->when == DDI_PRE)
809 			pf_fini(rdip, dsp->cmd);
810 
811 		/* only do this for immediate children */
812 		if (dsp->cmd == DDI_SUSPEND && dsp->when == DDI_POST &&
813 		    ddi_get_parent(rdip) == dip)
814 			if (pci_post_suspend(rdip) != DDI_SUCCESS)
815 				return (DDI_FAILURE);
816 
817 		return (DDI_SUCCESS);
818 
819 	default:
820 		break;
821 	}
822 
823 	return (ddi_ctlops(dip, rdip, ctlop, arg, result));
824 
825 }
826 
827 
828 /*
829  * npe_intr_ops
830  */
831 static int
832 npe_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
833     ddi_intr_handle_impl_t *hdlp, void *result)
834 {
835 	return (pci_common_intr_ops(pdip, rdip, intr_op, hdlp, result));
836 }
837 
838 
839 static int
840 npe_initchild(dev_info_t *child)
841 {
842 	char		name[80];
843 	pcie_bus_t	*bus_p;
844 	uint32_t	regs;
845 	ddi_acc_handle_t	cfg_hdl;
846 
847 	/*
848 	 * Do not bind drivers to empty bridges.
849 	 * Fail above, if the bridge is found to be hotplug capable
850 	 */
851 	if (npe_disable_empty_bridges_workaround(child) == 1)
852 		return (DDI_FAILURE);
853 
854 	if (pci_common_name_child(child, name, 80) != DDI_SUCCESS)
855 		return (DDI_FAILURE);
856 
857 	ddi_set_name_addr(child, name);
858 
859 	/*
860 	 * Pseudo nodes indicate a prototype node with per-instance
861 	 * properties to be merged into the real h/w device node.
862 	 * The interpretation of the unit-address is DD[,F]
863 	 * where DD is the device id and F is the function.
864 	 */
865 	if (ndi_dev_is_persistent_node(child) == 0) {
866 		extern int pci_allow_pseudo_children;
867 
868 		ddi_set_parent_data(child, NULL);
869 
870 		/*
871 		 * Try to merge the properties from this prototype
872 		 * node into real h/w nodes.
873 		 */
874 		if (ndi_merge_node(child, pci_common_name_child) ==
875 		    DDI_SUCCESS) {
876 			/*
877 			 * Merged ok - return failure to remove the node.
878 			 */
879 			ddi_set_name_addr(child, NULL);
880 			return (DDI_FAILURE);
881 		}
882 
883 		/* workaround for DDIVS to run under PCI Express */
884 		if (pci_allow_pseudo_children) {
885 			/*
886 			 * If the "interrupts" property doesn't exist,
887 			 * this must be the ddivs no-intr case, and it returns
888 			 * DDI_SUCCESS instead of DDI_FAILURE.
889 			 */
890 			if (ddi_prop_get_int(DDI_DEV_T_ANY, child,
891 			    DDI_PROP_DONTPASS, "interrupts", -1) == -1)
892 				return (DDI_SUCCESS);
893 			/*
894 			 * Create the ddi_parent_private_data for a pseudo
895 			 * child.
896 			 */
897 			pci_common_set_parent_private_data(child);
898 			return (DDI_SUCCESS);
899 		}
900 
901 		/*
902 		 * The child was not merged into a h/w node,
903 		 * but there's not much we can do with it other
904 		 * than return failure to cause the node to be removed.
905 		 */
906 		cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged",
907 		    ddi_get_name(child), ddi_get_name_addr(child),
908 		    ddi_get_name(child));
909 		ddi_set_name_addr(child, NULL);
910 		return (DDI_NOT_WELL_FORMED);
911 	}
912 
913 	if (ddi_prop_get_int(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
914 	    "interrupts", -1) != -1)
915 		pci_common_set_parent_private_data(child);
916 	else
917 		ddi_set_parent_data(child, NULL);
918 
919 	/* Disable certain errors on PCIe drivers for x86 platforms */
920 	regs = pcie_get_aer_uce_mask() | npe_aer_uce_mask;
921 	pcie_set_aer_uce_mask(regs);
922 	regs = pcie_get_aer_ce_mask() | npe_aer_ce_mask;
923 	pcie_set_aer_ce_mask(regs);
924 	regs = pcie_get_aer_suce_mask() | npe_aer_suce_mask;
925 	pcie_set_aer_suce_mask(regs);
926 
927 	/*
928 	 * If URs are disabled, mask SERRs as well, otherwise the system will
929 	 * still be notified of URs
930 	 */
931 	if (npe_aer_uce_mask & PCIE_AER_UCE_UR)
932 		pcie_set_serr_mask(1);
933 
934 	if (pci_config_setup(child, &cfg_hdl) == DDI_SUCCESS) {
935 		npe_ck804_fix_aer_ptr(cfg_hdl);
936 		npe_nvidia_error_workaround(cfg_hdl);
937 		npe_intel_error_workaround(cfg_hdl);
938 		pci_config_teardown(&cfg_hdl);
939 	}
940 
941 	bus_p = PCIE_DIP2BUS(child);
942 	if (bus_p) {
943 		uint16_t device_id = (uint16_t)(bus_p->bus_dev_ven_id >> 16);
944 		uint16_t vendor_id = (uint16_t)(bus_p->bus_dev_ven_id & 0xFFFF);
945 		uint16_t rev_id = bus_p->bus_rev_id;
946 
947 		/* Disable AER for certain NVIDIA Chipsets */
948 		if ((vendor_id == NVIDIA_VENDOR_ID) &&
949 		    (device_id == NVIDIA_CK804_DEVICE_ID) &&
950 		    (rev_id < NVIDIA_CK804_AER_VALID_REVID))
951 			bus_p->bus_aer_off = 0;
952 
953 		pcie_init_dom(child);
954 		(void) pcie_initchild(child);
955 	}
956 
957 	return (DDI_SUCCESS);
958 }
959 
960 
961 static int
962 npe_removechild(dev_info_t *dip)
963 {
964 	pcie_uninitchild(dip);
965 
966 	ddi_set_name_addr(dip, NULL);
967 
968 	/*
969 	 * Strip the node to properly convert it back to prototype form
970 	 */
971 	ddi_remove_minor_node(dip, NULL);
972 
973 	ddi_prop_remove_all(dip);
974 
975 	return (DDI_SUCCESS);
976 }
977 
978 static int
979 npe_open(dev_t *devp, int flags, int otyp, cred_t *credp)
980 {
981 	minor_t		minor = getminor(*devp);
982 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
983 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
984 	int	rv;
985 
986 	/*
987 	 * Make sure the open is for the right file type.
988 	 */
989 	if (otyp != OTYP_CHR)
990 		return (EINVAL);
991 
992 	if (pci_p == NULL)
993 		return (ENXIO);
994 
995 	mutex_enter(&pci_p->pci_mutex);
996 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
997 	case PCI_TOOL_REG_MINOR_NUM:
998 	case PCI_TOOL_INTR_MINOR_NUM:
999 		break;
1000 	default:
1001 		/* Handle devctl ioctls */
1002 		rv = pcie_open(pci_p->pci_dip, devp, flags, otyp, credp);
1003 		mutex_exit(&pci_p->pci_mutex);
1004 		return (rv);
1005 	}
1006 
1007 	/* Handle pcitool ioctls */
1008 	if (flags & FEXCL) {
1009 		if (pci_p->pci_soft_state != PCI_SOFT_STATE_CLOSED) {
1010 			mutex_exit(&pci_p->pci_mutex);
1011 			cmn_err(CE_NOTE, "npe_open: busy");
1012 			return (EBUSY);
1013 		}
1014 		pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN_EXCL;
1015 	} else {
1016 		if (pci_p->pci_soft_state == PCI_SOFT_STATE_OPEN_EXCL) {
1017 			mutex_exit(&pci_p->pci_mutex);
1018 			cmn_err(CE_NOTE, "npe_open: busy");
1019 			return (EBUSY);
1020 		}
1021 		pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN;
1022 	}
1023 	mutex_exit(&pci_p->pci_mutex);
1024 
1025 	return (0);
1026 }
1027 
1028 static int
1029 npe_close(dev_t dev, int flags, int otyp, cred_t *credp)
1030 {
1031 	minor_t		minor = getminor(dev);
1032 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
1033 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
1034 	int	rv;
1035 
1036 	if (pci_p == NULL)
1037 		return (ENXIO);
1038 
1039 	mutex_enter(&pci_p->pci_mutex);
1040 
1041 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1042 	case PCI_TOOL_REG_MINOR_NUM:
1043 	case PCI_TOOL_INTR_MINOR_NUM:
1044 		break;
1045 	default:
1046 		/* Handle devctl ioctls */
1047 		rv = pcie_close(pci_p->pci_dip, dev, flags, otyp, credp);
1048 		mutex_exit(&pci_p->pci_mutex);
1049 		return (rv);
1050 	}
1051 
1052 	/* Handle pcitool ioctls */
1053 	pci_p->pci_soft_state = PCI_SOFT_STATE_CLOSED;
1054 	mutex_exit(&pci_p->pci_mutex);
1055 	return (0);
1056 }
1057 
1058 static int
1059 npe_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1060 {
1061 	minor_t		minor = getminor(dev);
1062 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
1063 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
1064 	int		ret = ENOTTY;
1065 
1066 	if (pci_p == NULL)
1067 		return (ENXIO);
1068 
1069 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1070 	case PCI_TOOL_REG_MINOR_NUM:
1071 	case PCI_TOOL_INTR_MINOR_NUM:
1072 		/* To handle pcitool related ioctls */
1073 		ret =  pci_common_ioctl(pci_p->pci_dip, dev, cmd, arg, mode,
1074 		    credp, rvalp);
1075 		break;
1076 	default:
1077 		/* To handle devctl and hotplug related ioctls */
1078 		ret = pcie_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, credp,
1079 		    rvalp);
1080 		break;
1081 	}
1082 
1083 	return (ret);
1084 }
1085 
1086 /*ARGSUSED*/
1087 static int
1088 npe_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap,
1089     ddi_iblock_cookie_t *ibc)
1090 {
1091 	pci_state_t  *pcip = ddi_get_soft_state(npe_statep,
1092 	    ddi_get_instance(dip));
1093 
1094 	ASSERT(ibc != NULL);
1095 	*ibc = pcip->pci_fm_ibc;
1096 
1097 	return (pcip->pci_fmcap);
1098 }
1099 
1100 /*ARGSUSED*/
1101 static int
1102 npe_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *no_used)
1103 {
1104 	/*
1105 	 * On current x86 systems, npe's callback does not get called for failed
1106 	 * loads.  If in the future this feature is used, the fault PA should be
1107 	 * logged in the derr->fme_bus_specific field.  The appropriate PCIe
1108 	 * error handling code should be called and needs to be coordinated with
1109 	 * safe access handling.
1110 	 */
1111 
1112 	return (DDI_FM_OK);
1113 }
1114