xref: /illumos-gate/usr/src/uts/i86pc/io/pciex/npe.c (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
29  * Copyright 2016 Joyent, Inc.
30  */
31 
32 /*
33  *	Host to PCI-Express local bus driver
34  */
35 
36 #include <sys/conf.h>
37 #include <sys/modctl.h>
38 #include <sys/file.h>
39 #include <sys/pci_impl.h>
40 #include <sys/pcie_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ddi_intr.h>
43 #include <sys/sunndi.h>
44 #include <sys/sunddi.h>
45 #include <sys/ddifm.h>
46 #include <sys/ndifm.h>
47 #include <sys/fm/util.h>
48 #include <sys/hotplug/pci/pcie_hp.h>
49 #include <io/pci/pci_tools_ext.h>
50 #include <io/pci/pci_common.h>
51 #include <io/pciex/pcie_nvidia.h>
52 
53 /*
54  * Helper Macros
55  */
56 #define	NPE_IS_HANDLE_FOR_STDCFG_ACC(hp) \
57 	((hp) != NULL &&						\
58 	((ddi_acc_hdl_t *)(hp))->ah_platform_private != NULL &&		\
59 	(((ddi_acc_impl_t *)((ddi_acc_hdl_t *)(hp))->			\
60 	ah_platform_private)->						\
61 	    ahi_acc_attr &(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_CONFIG_SPACE)) \
62 		== DDI_ACCATTR_CONFIG_SPACE)
63 
64 /*
65  * Bus Operation functions
66  */
67 static int	npe_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *,
68 		    off_t, off_t, caddr_t *);
69 static int	npe_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t,
70 		    void *, void *);
71 static int	npe_intr_ops(dev_info_t *, dev_info_t *, ddi_intr_op_t,
72 		    ddi_intr_handle_impl_t *, void *);
73 static int	npe_fm_init(dev_info_t *, dev_info_t *, int,
74 		    ddi_iblock_cookie_t *);
75 
76 static int	npe_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *);
77 
78 /*
79  * Disable URs and Received MA for all PCIe devices.  Until x86 SW is changed so
80  * that random drivers do not do PIO accesses on devices that it does not own,
81  * these error bits must be disabled.  SERR must also be disabled if URs have
82  * been masked.
83  */
84 uint32_t	npe_aer_uce_mask = PCIE_AER_UCE_UR;
85 uint32_t	npe_aer_ce_mask = 0;
86 uint32_t	npe_aer_suce_mask = PCIE_AER_SUCE_RCVD_MA;
87 
88 struct bus_ops npe_bus_ops = {
89 	BUSO_REV,
90 	npe_bus_map,
91 	NULL,
92 	NULL,
93 	NULL,
94 	i_ddi_map_fault,
95 	NULL,
96 	ddi_dma_allochdl,
97 	ddi_dma_freehdl,
98 	ddi_dma_bindhdl,
99 	ddi_dma_unbindhdl,
100 	ddi_dma_flush,
101 	ddi_dma_win,
102 	ddi_dma_mctl,
103 	npe_ctlops,
104 	ddi_bus_prop_op,
105 	0,			/* (*bus_get_eventcookie)();	*/
106 	0,			/* (*bus_add_eventcall)();	*/
107 	0,			/* (*bus_remove_eventcall)();	*/
108 	0,			/* (*bus_post_event)();		*/
109 	0,			/* (*bus_intr_ctl)(); */
110 	0,			/* (*bus_config)(); */
111 	0,			/* (*bus_unconfig)(); */
112 	npe_fm_init,		/* (*bus_fm_init)(); */
113 	NULL,			/* (*bus_fm_fini)(); */
114 	NULL,			/* (*bus_fm_access_enter)(); */
115 	NULL,			/* (*bus_fm_access_exit)(); */
116 	NULL,			/* (*bus_power)(); */
117 	npe_intr_ops,		/* (*bus_intr_op)(); */
118 	pcie_hp_common_ops	/* (*bus_hp_op)(); */
119 };
120 
121 static int	npe_open(dev_t *, int, int, cred_t *);
122 static int	npe_close(dev_t, int, int, cred_t *);
123 static int	npe_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
124 
125 struct cb_ops npe_cb_ops = {
126 	npe_open,			/* open */
127 	npe_close,			/* close */
128 	nodev,				/* strategy */
129 	nodev,				/* print */
130 	nodev,				/* dump */
131 	nodev,				/* read */
132 	nodev,				/* write */
133 	npe_ioctl,			/* ioctl */
134 	nodev,				/* devmap */
135 	nodev,				/* mmap */
136 	nodev,				/* segmap */
137 	nochpoll,			/* poll */
138 	pcie_prop_op,			/* cb_prop_op */
139 	NULL,				/* streamtab */
140 	D_NEW | D_MP | D_HOTPLUG,	/* Driver compatibility flag */
141 	CB_REV,				/* rev */
142 	nodev,				/* int (*cb_aread)() */
143 	nodev				/* int (*cb_awrite)() */
144 };
145 
146 
147 /*
148  * Device Node Operation functions
149  */
150 static int	npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
151 static int	npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
152 static int	npe_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
153 
154 struct dev_ops npe_ops = {
155 	DEVO_REV,		/* devo_rev */
156 	0,			/* refcnt  */
157 	npe_info,		/* info */
158 	nulldev,		/* identify */
159 	nulldev,		/* probe */
160 	npe_attach,		/* attach */
161 	npe_detach,		/* detach */
162 	nulldev,		/* reset */
163 	&npe_cb_ops,		/* driver operations */
164 	&npe_bus_ops,		/* bus operations */
165 	NULL,			/* power */
166 	ddi_quiesce_not_needed,		/* quiesce */
167 };
168 
169 /*
170  * Internal routines in support of particular npe_ctlops.
171  */
172 static int npe_removechild(dev_info_t *child);
173 static int npe_initchild(dev_info_t *child);
174 
175 /*
176  * External support routine
177  */
178 extern void	npe_query_acpi_mcfg(dev_info_t *dip);
179 extern void	npe_ck804_fix_aer_ptr(ddi_acc_handle_t cfg_hdl);
180 extern int	npe_disable_empty_bridges_workaround(dev_info_t *child);
181 extern void	npe_nvidia_error_workaround(ddi_acc_handle_t cfg_hdl);
182 extern void	npe_intel_error_workaround(ddi_acc_handle_t cfg_hdl);
183 extern boolean_t npe_is_mmcfg_supported(dev_info_t *dip);
184 extern void	npe_enable_htmsi_children(dev_info_t *dip);
185 extern int	npe_save_htconfig_children(dev_info_t *dip);
186 extern int	npe_restore_htconfig_children(dev_info_t *dip);
187 
188 /*
189  * Module linkage information for the kernel.
190  */
191 static struct modldrv modldrv = {
192 	&mod_driverops,				/* Type of module */
193 	"Host to PCIe nexus driver",		/* Name of module */
194 	&npe_ops,				/* driver ops */
195 };
196 
197 static struct modlinkage modlinkage = {
198 	MODREV_1,
199 	(void *)&modldrv,
200 	NULL
201 };
202 
203 /* Save minimal state. */
204 void *npe_statep;
205 
206 int
207 _init(void)
208 {
209 	int e;
210 
211 	/*
212 	 * Initialize per-pci bus soft state pointer.
213 	 */
214 	e = ddi_soft_state_init(&npe_statep, sizeof (pci_state_t), 1);
215 	if (e != 0)
216 		return (e);
217 
218 	if ((e = mod_install(&modlinkage)) != 0)
219 		ddi_soft_state_fini(&npe_statep);
220 
221 	return (e);
222 }
223 
224 
225 int
226 _fini(void)
227 {
228 	int rc;
229 
230 	rc = mod_remove(&modlinkage);
231 	if (rc != 0)
232 		return (rc);
233 
234 	ddi_soft_state_fini(&npe_statep);
235 	return (rc);
236 }
237 
238 
239 int
240 _info(struct modinfo *modinfop)
241 {
242 	return (mod_info(&modlinkage, modinfop));
243 }
244 
245 /*ARGSUSED*/
246 static int
247 npe_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
248 {
249 	minor_t		minor = getminor((dev_t)arg);
250 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
251 	pci_state_t	*pcip = ddi_get_soft_state(npe_statep, instance);
252 	int		ret = DDI_SUCCESS;
253 
254 	switch (cmd) {
255 	case DDI_INFO_DEVT2INSTANCE:
256 		*result = (void *)(intptr_t)instance;
257 		break;
258 	case DDI_INFO_DEVT2DEVINFO:
259 		if (pcip == NULL) {
260 			ret = DDI_FAILURE;
261 			break;
262 		}
263 
264 		*result = (void *)pcip->pci_dip;
265 		break;
266 	default:
267 		ret = DDI_FAILURE;
268 		break;
269 	}
270 
271 	return (ret);
272 }
273 
274 /*ARGSUSED*/
275 static int
276 npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
277 {
278 	int		instance = ddi_get_instance(devi);
279 	pci_state_t	*pcip = NULL;
280 
281 	if (cmd == DDI_RESUME) {
282 		/*
283 		 * the system might still be able to resume even if this fails
284 		 */
285 		(void) npe_restore_htconfig_children(devi);
286 		return (DDI_SUCCESS);
287 	}
288 
289 	/*
290 	 * We must do this here in order to ensure that all top level devices
291 	 * get their HyperTransport MSI mapping regs programmed first.
292 	 * "Memory controller" and "hostbridge" class devices are leaf devices
293 	 * that may affect MSI translation functionality for devices
294 	 * connected to the same link/bus.
295 	 *
296 	 * This will also program HT MSI mapping registers on root buses
297 	 * devices (basically sitting on an HT bus) that are not dependent
298 	 * on the aforementioned HT devices for MSI translation.
299 	 */
300 	npe_enable_htmsi_children(devi);
301 
302 	if (ddi_prop_update_string(DDI_DEV_T_NONE, devi, "device_type",
303 	    "pciex") != DDI_PROP_SUCCESS) {
304 		cmn_err(CE_WARN, "npe:  'device_type' prop create failed");
305 	}
306 
307 	if (ddi_soft_state_zalloc(npe_statep, instance) == DDI_SUCCESS)
308 		pcip = ddi_get_soft_state(npe_statep, instance);
309 
310 	if (pcip == NULL)
311 		return (DDI_FAILURE);
312 
313 	pcip->pci_dip = devi;
314 	pcip->pci_soft_state = PCI_SOFT_STATE_CLOSED;
315 
316 	if (pcie_init(devi, NULL) != DDI_SUCCESS)
317 		goto fail1;
318 
319 	/* Second arg: initialize for pci_express root nexus */
320 	if (pcitool_init(devi, B_TRUE) != DDI_SUCCESS)
321 		goto fail2;
322 
323 	pcip->pci_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
324 	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
325 	ddi_fm_init(devi, &pcip->pci_fmcap, &pcip->pci_fm_ibc);
326 
327 	if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) {
328 		ddi_fm_handler_register(devi, npe_fm_callback, NULL);
329 	}
330 
331 	PCIE_DIP2PFD(devi) = kmem_zalloc(sizeof (pf_data_t), KM_SLEEP);
332 	pcie_rc_init_pfd(devi, PCIE_DIP2PFD(devi));
333 
334 	npe_query_acpi_mcfg(devi);
335 	ddi_report_dev(devi);
336 	pcie_fab_init_bus(devi, PCIE_BUS_FINAL);
337 
338 	return (DDI_SUCCESS);
339 
340 fail2:
341 	(void) pcie_uninit(devi);
342 fail1:
343 	pcie_rc_fini_bus(devi);
344 	ddi_soft_state_free(npe_statep, instance);
345 
346 	return (DDI_FAILURE);
347 }
348 
349 /*ARGSUSED*/
350 static int
351 npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
352 {
353 	int instance = ddi_get_instance(devi);
354 	pci_state_t *pcip;
355 
356 	pcip = ddi_get_soft_state(npe_statep, ddi_get_instance(devi));
357 
358 	switch (cmd) {
359 	case DDI_DETACH:
360 		pcie_fab_fini_bus(devi, PCIE_BUS_INITIAL);
361 
362 		/* Uninitialize pcitool support. */
363 		pcitool_uninit(devi);
364 
365 		if (pcie_uninit(devi) != DDI_SUCCESS)
366 			return (DDI_FAILURE);
367 
368 		if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE)
369 			ddi_fm_handler_unregister(devi);
370 
371 		pcie_rc_fini_pfd(PCIE_DIP2PFD(devi));
372 		kmem_free(PCIE_DIP2PFD(devi), sizeof (pf_data_t));
373 
374 		ddi_fm_fini(devi);
375 		ddi_soft_state_free(npe_statep, instance);
376 		return (DDI_SUCCESS);
377 
378 	case DDI_SUSPEND:
379 		/*
380 		 * the system might still be able to suspend/resume even if
381 		 * this fails
382 		 */
383 		(void) npe_save_htconfig_children(devi);
384 		return (DDI_SUCCESS);
385 	default:
386 		return (DDI_FAILURE);
387 	}
388 }
389 
390 /*
391  * Configure the access handle for standard configuration space
392  * access (see pci_fm_acc_setup for code that initializes the
393  * access-function pointers).
394  */
395 static int
396 npe_setup_std_pcicfg_acc(dev_info_t *rdip, ddi_map_req_t *mp,
397     ddi_acc_hdl_t *hp, off_t offset, off_t len)
398 {
399 	int ret;
400 
401 	if ((ret = pci_fm_acc_setup(hp, offset, len)) ==
402 	    DDI_SUCCESS) {
403 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
404 		    mp->map_handlep->ah_acc.devacc_attr_access
405 		    != DDI_DEFAULT_ACC) {
406 			ndi_fmc_insert(rdip, ACC_HANDLE,
407 			    (void *)mp->map_handlep, NULL);
408 		}
409 	}
410 	return (ret);
411 }
412 
413 static int
414 npe_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
415     off_t offset, off_t len, caddr_t *vaddrp)
416 {
417 	int 		rnumber;
418 	int		space;
419 	ddi_acc_impl_t	*ap;
420 	ddi_acc_hdl_t	*hp;
421 	ddi_map_req_t	mr;
422 	pci_regspec_t	pci_reg;
423 	pci_regspec_t	*pci_rp;
424 	struct regspec64 reg;
425 	pci_acc_cfblk_t	*cfp;
426 	int		retval;
427 	int64_t		*ecfginfo;
428 	uint_t		nelem;
429 	uint64_t	pci_rlength;
430 
431 	mr = *mp; /* Get private copy of request */
432 	mp = &mr;
433 
434 	/*
435 	 * check for register number
436 	 */
437 	switch (mp->map_type) {
438 	case DDI_MT_REGSPEC:
439 		pci_reg = *(pci_regspec_t *)(mp->map_obj.rp);
440 		pci_rp = &pci_reg;
441 		if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS)
442 			return (DDI_FAILURE);
443 		break;
444 	case DDI_MT_RNUMBER:
445 		rnumber = mp->map_obj.rnumber;
446 		/*
447 		 * get ALL "reg" properties for dip, select the one of
448 		 * of interest. In x86, "assigned-addresses" property
449 		 * is identical to the "reg" property, so there is no
450 		 * need to cross check the two to determine the physical
451 		 * address of the registers.
452 		 * This routine still performs some validity checks to
453 		 * make sure that everything is okay.
454 		 */
455 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip,
456 		    DDI_PROP_DONTPASS, "reg", (int **)&pci_rp, &nelem) !=
457 		    DDI_PROP_SUCCESS)
458 			return (DDI_FAILURE);
459 
460 		/*
461 		 * validate the register number.
462 		 */
463 		nelem /= (sizeof (pci_regspec_t) / sizeof (int));
464 		if (rnumber >= nelem) {
465 			ddi_prop_free(pci_rp);
466 			return (DDI_FAILURE);
467 		}
468 
469 		/*
470 		 * copy the required entry.
471 		 */
472 		pci_reg = pci_rp[rnumber];
473 
474 		/*
475 		 * free the memory allocated by ddi_prop_lookup_int_array
476 		 */
477 		ddi_prop_free(pci_rp);
478 
479 		pci_rp = &pci_reg;
480 		if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS)
481 			return (DDI_FAILURE);
482 		mp->map_type = DDI_MT_REGSPEC;
483 		break;
484 	default:
485 		return (DDI_ME_INVAL);
486 	}
487 
488 	space = pci_rp->pci_phys_hi & PCI_REG_ADDR_M;
489 
490 	/*
491 	 * check for unmap and unlock of address space
492 	 */
493 	if ((mp->map_op == DDI_MO_UNMAP) || (mp->map_op == DDI_MO_UNLOCK)) {
494 		switch (space) {
495 		case PCI_ADDR_IO:
496 			reg.regspec_bustype = 1;
497 			break;
498 
499 		case PCI_ADDR_CONFIG:
500 			/*
501 			 * If this is an unmap/unlock of a standard config
502 			 * space mapping (memory-mapped config space mappings
503 			 * would have the DDI_ACCATTR_CPU_VADDR bit set in the
504 			 * acc_attr), undo that setup here.
505 			 */
506 			if (NPE_IS_HANDLE_FOR_STDCFG_ACC(mp->map_handlep)) {
507 
508 				if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
509 				    mp->map_handlep->ah_acc.devacc_attr_access
510 				    != DDI_DEFAULT_ACC) {
511 					ndi_fmc_remove(rdip, ACC_HANDLE,
512 					    (void *)mp->map_handlep);
513 				}
514 				return (DDI_SUCCESS);
515 			}
516 
517 			pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE;
518 
519 			/* FALLTHROUGH */
520 		case PCI_ADDR_MEM64:
521 		case PCI_ADDR_MEM32:
522 			reg.regspec_bustype = 0;
523 			break;
524 
525 		default:
526 			return (DDI_FAILURE);
527 		}
528 
529 		reg.regspec_addr = (uint64_t)pci_rp->pci_phys_mid << 32 |
530 		    (uint64_t)pci_rp->pci_phys_low;
531 		reg.regspec_size = (uint64_t)pci_rp->pci_size_hi << 32 |
532 		    (uint64_t)pci_rp->pci_size_low;
533 
534 		/*
535 		 * Adjust offset and length
536 		 * A non-zero length means override the one in the regspec.
537 		 */
538 		if (reg.regspec_addr + offset < MAX(reg.regspec_addr, offset))
539 			return (DDI_FAILURE);
540 		reg.regspec_addr += offset;
541 		if (len != 0)
542 			reg.regspec_size = len;
543 
544 		mp->map_obj.rp = (struct regspec *)&reg;
545 		mp->map_flags |= DDI_MF_EXT_REGSPEC;
546 		retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp);
547 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
548 		    mp->map_handlep->ah_acc.devacc_attr_access !=
549 		    DDI_DEFAULT_ACC) {
550 			ndi_fmc_remove(rdip, ACC_HANDLE,
551 			    (void *)mp->map_handlep);
552 		}
553 		return (retval);
554 
555 	}
556 
557 	/* check for user mapping request - not legal for Config */
558 	if (mp->map_op == DDI_MO_MAP_HANDLE && space == PCI_ADDR_CONFIG) {
559 		cmn_err(CE_NOTE, "npe: Config mapping request from user\n");
560 		return (DDI_FAILURE);
561 	}
562 
563 
564 	/*
565 	 * Note that pci_fm_acc_setup() is called to serve two purposes
566 	 * i) enable legacy PCI I/O style config space access
567 	 * ii) register with FMA
568 	 */
569 	if (space == PCI_ADDR_CONFIG) {
570 
571 		/* Can't map config space without a handle */
572 		hp = (ddi_acc_hdl_t *)mp->map_handlep;
573 		if (hp == NULL)
574 			return (DDI_FAILURE);
575 
576 		/* record the device address for future reference */
577 		cfp = (pci_acc_cfblk_t *)&hp->ah_bus_private;
578 		cfp->c_busnum = PCI_REG_BUS_G(pci_rp->pci_phys_hi);
579 		cfp->c_devnum = PCI_REG_DEV_G(pci_rp->pci_phys_hi);
580 		cfp->c_funcnum = PCI_REG_FUNC_G(pci_rp->pci_phys_hi);
581 
582 		*vaddrp = (caddr_t)offset;
583 
584 		/* Check if MMCFG is supported */
585 		if (!npe_is_mmcfg_supported(rdip)) {
586 			return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
587 			    offset, len));
588 		}
589 
590 
591 		if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, rdip, 0,
592 		    "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) {
593 
594 			if (nelem != 4 ||
595 			    cfp->c_busnum < ecfginfo[2] ||
596 			    cfp->c_busnum > ecfginfo[3]) {
597 				/*
598 				 * Invalid property or Doesn't contain the
599 				 * requested bus; fall back to standard
600 				 * (I/O-based) config access.
601 				 */
602 				ddi_prop_free(ecfginfo);
603 				return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
604 				    offset, len));
605 			} else {
606 				pci_rp->pci_phys_low = ecfginfo[0];
607 
608 				ddi_prop_free(ecfginfo);
609 
610 				pci_rp->pci_phys_low += ((cfp->c_busnum << 20) |
611 				    (cfp->c_devnum) << 15 |
612 				    (cfp->c_funcnum << 12));
613 
614 				pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE;
615 			}
616 		} else {
617 			/*
618 			 * Couldn't find the MMCFG property -- fall back to
619 			 * standard config access
620 			 */
621 			return (npe_setup_std_pcicfg_acc(rdip, mp, hp,
622 			    offset, len));
623 		}
624 	}
625 
626 	/*
627 	 * range check
628 	 */
629 	pci_rlength = (uint64_t)pci_rp->pci_size_low |
630 	    (uint64_t)pci_rp->pci_size_hi << 32;
631 	if ((offset >= pci_rlength) || (len > pci_rlength) ||
632 	    (offset + len > pci_rlength) || (offset + len < MAX(offset, len))) {
633 		return (DDI_FAILURE);
634 	}
635 
636 	/*
637 	 * convert the pci regsec into the generic regspec used by the
638 	 * parent root nexus driver.
639 	 */
640 	switch (space) {
641 	case PCI_ADDR_IO:
642 		reg.regspec_bustype = 1;
643 		break;
644 	case PCI_ADDR_CONFIG:
645 	case PCI_ADDR_MEM64:
646 	case PCI_ADDR_MEM32:
647 		reg.regspec_bustype = 0;
648 		break;
649 	default:
650 		return (DDI_FAILURE);
651 	}
652 
653 	reg.regspec_addr = (uint64_t)pci_rp->pci_phys_mid << 32 |
654 	    (uint64_t)pci_rp->pci_phys_low;
655 	reg.regspec_size = pci_rlength;
656 
657 	/*
658 	 * Adjust offset and length
659 	 * A non-zero length means override the one in the regspec.
660 	 */
661 	if (reg.regspec_addr + offset < MAX(reg.regspec_addr, offset))
662 		return (DDI_FAILURE);
663 	reg.regspec_addr += offset;
664 	if (len != 0)
665 		reg.regspec_size = len;
666 
667 
668 	mp->map_obj.rp = (struct regspec *)&reg;
669 	mp->map_flags |= DDI_MF_EXT_REGSPEC;
670 	retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp);
671 	if (retval == DDI_SUCCESS) {
672 		/*
673 		 * For config space gets force use of cautious access routines.
674 		 * These will handle default and protected mode accesses too.
675 		 */
676 		if (space == PCI_ADDR_CONFIG) {
677 			ap = (ddi_acc_impl_t *)mp->map_handlep;
678 			ap->ahi_acc_attr &= ~DDI_ACCATTR_DIRECT;
679 			ap->ahi_acc_attr |= DDI_ACCATTR_CONFIG_SPACE;
680 			ap->ahi_get8 = i_ddi_caut_get8;
681 			ap->ahi_get16 = i_ddi_caut_get16;
682 			ap->ahi_get32 = i_ddi_caut_get32;
683 			ap->ahi_get64 = i_ddi_caut_get64;
684 			ap->ahi_rep_get8 = i_ddi_caut_rep_get8;
685 			ap->ahi_rep_get16 = i_ddi_caut_rep_get16;
686 			ap->ahi_rep_get32 = i_ddi_caut_rep_get32;
687 			ap->ahi_rep_get64 = i_ddi_caut_rep_get64;
688 		}
689 		if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) &&
690 		    mp->map_handlep->ah_acc.devacc_attr_access !=
691 		    DDI_DEFAULT_ACC) {
692 			ndi_fmc_insert(rdip, ACC_HANDLE,
693 			    (void *)mp->map_handlep, NULL);
694 		}
695 	}
696 	return (retval);
697 }
698 
699 
700 
701 /*ARGSUSED*/
702 static int
703 npe_ctlops(dev_info_t *dip, dev_info_t *rdip,
704     ddi_ctl_enum_t ctlop, void *arg, void *result)
705 {
706 	int		totreg;
707 	uint_t		reglen;
708 	pci_regspec_t	*drv_regp;
709 	struct attachspec *asp;
710 	struct detachspec *dsp;
711 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep,
712 	    ddi_get_instance(dip));
713 
714 	switch (ctlop) {
715 	case DDI_CTLOPS_REPORTDEV:
716 		if (rdip == (dev_info_t *)0)
717 			return (DDI_FAILURE);
718 		cmn_err(CE_CONT, "?PCI Express-device: %s@%s, %s%d\n",
719 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
720 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
721 		return (DDI_SUCCESS);
722 
723 	case DDI_CTLOPS_INITCHILD:
724 		return (npe_initchild((dev_info_t *)arg));
725 
726 	case DDI_CTLOPS_UNINITCHILD:
727 		return (npe_removechild((dev_info_t *)arg));
728 
729 	case DDI_CTLOPS_SIDDEV:
730 		return (DDI_SUCCESS);
731 
732 	case DDI_CTLOPS_REGSIZE:
733 	case DDI_CTLOPS_NREGS:
734 		if (rdip == (dev_info_t *)0)
735 			return (DDI_FAILURE);
736 
737 		*(int *)result = 0;
738 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip,
739 		    DDI_PROP_DONTPASS, "reg", (int **)&drv_regp,
740 		    &reglen) != DDI_PROP_SUCCESS) {
741 			return (DDI_FAILURE);
742 		}
743 
744 		totreg = (reglen * sizeof (int)) / sizeof (pci_regspec_t);
745 		if (ctlop == DDI_CTLOPS_NREGS)
746 			*(int *)result = totreg;
747 		else if (ctlop == DDI_CTLOPS_REGSIZE) {
748 			uint64_t val;
749 			int rn;
750 
751 			rn = *(int *)arg;
752 			if (rn >= totreg) {
753 				ddi_prop_free(drv_regp);
754 				return (DDI_FAILURE);
755 			}
756 			val = drv_regp[rn].pci_size_low |
757 			    (uint64_t)drv_regp[rn].pci_size_hi << 32;
758 			if (val > OFF_MAX) {
759 				int ce = CE_NOTE;
760 #ifdef DEBUG
761 				ce = CE_WARN;
762 #endif
763 				dev_err(rdip, ce, "failed to get register "
764 				    "size, value larger than OFF_MAX: 0x%"
765 				    PRIx64 "\n", val);
766 				return (DDI_FAILURE);
767 			}
768 			*(off_t *)result = (off_t)val;
769 		}
770 		ddi_prop_free(drv_regp);
771 
772 		return (DDI_SUCCESS);
773 
774 	case DDI_CTLOPS_POWER:
775 	{
776 		power_req_t	*reqp = (power_req_t *)arg;
777 		/*
778 		 * We currently understand reporting of PCI_PM_IDLESPEED
779 		 * capability. Everything else is passed up.
780 		 */
781 		if ((reqp->request_type == PMR_REPORT_PMCAP) &&
782 		    (reqp->req.report_pmcap_req.cap ==  PCI_PM_IDLESPEED))
783 			return (DDI_SUCCESS);
784 
785 		break;
786 	}
787 
788 	case DDI_CTLOPS_PEEK:
789 	case DDI_CTLOPS_POKE:
790 		return (pci_common_peekpoke(dip, rdip, ctlop, arg, result));
791 
792 	/* X86 systems support PME wakeup from suspended state */
793 	case DDI_CTLOPS_ATTACH:
794 		if (!pcie_is_child(dip, rdip))
795 			return (DDI_SUCCESS);
796 
797 		asp = (struct attachspec *)arg;
798 		if ((asp->when == DDI_POST) && (asp->result == DDI_SUCCESS)) {
799 			pf_init(rdip, (void *)pci_p->pci_fm_ibc, asp->cmd);
800 			(void) pcie_postattach_child(rdip);
801 		}
802 
803 		/* only do this for immediate children */
804 		if (asp->cmd == DDI_RESUME && asp->when == DDI_PRE &&
805 		    ddi_get_parent(rdip) == dip)
806 			if (pci_pre_resume(rdip) != DDI_SUCCESS) {
807 				/* Not good, better stop now. */
808 				cmn_err(CE_PANIC,
809 				    "Couldn't pre-resume device %p",
810 				    (void *) dip);
811 				/* NOTREACHED */
812 			}
813 
814 		return (DDI_SUCCESS);
815 
816 	case DDI_CTLOPS_DETACH:
817 		if (!pcie_is_child(dip, rdip))
818 			return (DDI_SUCCESS);
819 
820 		dsp = (struct detachspec *)arg;
821 
822 		if (dsp->when == DDI_PRE)
823 			pf_fini(rdip, dsp->cmd);
824 
825 		/* only do this for immediate children */
826 		if (dsp->cmd == DDI_SUSPEND && dsp->when == DDI_POST &&
827 		    ddi_get_parent(rdip) == dip)
828 			if (pci_post_suspend(rdip) != DDI_SUCCESS)
829 				return (DDI_FAILURE);
830 
831 		return (DDI_SUCCESS);
832 
833 	default:
834 		break;
835 	}
836 
837 	return (ddi_ctlops(dip, rdip, ctlop, arg, result));
838 
839 }
840 
841 
842 /*
843  * npe_intr_ops
844  */
845 static int
846 npe_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
847     ddi_intr_handle_impl_t *hdlp, void *result)
848 {
849 	return (pci_common_intr_ops(pdip, rdip, intr_op, hdlp, result));
850 }
851 
852 
853 static int
854 npe_initchild(dev_info_t *child)
855 {
856 	char		name[80];
857 	pcie_bus_t	*bus_p;
858 	uint32_t	regs;
859 	ddi_acc_handle_t	cfg_hdl;
860 
861 	/*
862 	 * Do not bind drivers to empty bridges.
863 	 * Fail above, if the bridge is found to be hotplug capable
864 	 */
865 	if (npe_disable_empty_bridges_workaround(child) == 1)
866 		return (DDI_FAILURE);
867 
868 	if (pci_common_name_child(child, name, 80) != DDI_SUCCESS)
869 		return (DDI_FAILURE);
870 
871 	ddi_set_name_addr(child, name);
872 
873 	/*
874 	 * Pseudo nodes indicate a prototype node with per-instance
875 	 * properties to be merged into the real h/w device node.
876 	 * The interpretation of the unit-address is DD[,F]
877 	 * where DD is the device id and F is the function.
878 	 */
879 	if (ndi_dev_is_persistent_node(child) == 0) {
880 		extern int pci_allow_pseudo_children;
881 
882 		ddi_set_parent_data(child, NULL);
883 
884 		/*
885 		 * Try to merge the properties from this prototype
886 		 * node into real h/w nodes.
887 		 */
888 		if (ndi_merge_node(child, pci_common_name_child) ==
889 		    DDI_SUCCESS) {
890 			/*
891 			 * Merged ok - return failure to remove the node.
892 			 */
893 			ddi_set_name_addr(child, NULL);
894 			return (DDI_FAILURE);
895 		}
896 
897 		/* workaround for DDIVS to run under PCI Express */
898 		if (pci_allow_pseudo_children) {
899 			/*
900 			 * If the "interrupts" property doesn't exist,
901 			 * this must be the ddivs no-intr case, and it returns
902 			 * DDI_SUCCESS instead of DDI_FAILURE.
903 			 */
904 			if (ddi_prop_get_int(DDI_DEV_T_ANY, child,
905 			    DDI_PROP_DONTPASS, "interrupts", -1) == -1)
906 				return (DDI_SUCCESS);
907 			/*
908 			 * Create the ddi_parent_private_data for a pseudo
909 			 * child.
910 			 */
911 			pci_common_set_parent_private_data(child);
912 			return (DDI_SUCCESS);
913 		}
914 
915 		/*
916 		 * The child was not merged into a h/w node,
917 		 * but there's not much we can do with it other
918 		 * than return failure to cause the node to be removed.
919 		 */
920 		cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged",
921 		    ddi_get_name(child), ddi_get_name_addr(child),
922 		    ddi_get_name(child));
923 		ddi_set_name_addr(child, NULL);
924 		return (DDI_NOT_WELL_FORMED);
925 	}
926 
927 	if (ddi_prop_get_int(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
928 	    "interrupts", -1) != -1)
929 		pci_common_set_parent_private_data(child);
930 	else
931 		ddi_set_parent_data(child, NULL);
932 
933 	/* Disable certain errors on PCIe drivers for x86 platforms */
934 	regs = pcie_get_aer_uce_mask() | npe_aer_uce_mask;
935 	pcie_set_aer_uce_mask(regs);
936 	regs = pcie_get_aer_ce_mask() | npe_aer_ce_mask;
937 	pcie_set_aer_ce_mask(regs);
938 	regs = pcie_get_aer_suce_mask() | npe_aer_suce_mask;
939 	pcie_set_aer_suce_mask(regs);
940 
941 	/*
942 	 * If URs are disabled, mask SERRs as well, otherwise the system will
943 	 * still be notified of URs
944 	 */
945 	if (npe_aer_uce_mask & PCIE_AER_UCE_UR)
946 		pcie_set_serr_mask(1);
947 
948 	if (pci_config_setup(child, &cfg_hdl) == DDI_SUCCESS) {
949 		npe_ck804_fix_aer_ptr(cfg_hdl);
950 		npe_nvidia_error_workaround(cfg_hdl);
951 		npe_intel_error_workaround(cfg_hdl);
952 		pci_config_teardown(&cfg_hdl);
953 	}
954 
955 	bus_p = PCIE_DIP2BUS(child);
956 	if (bus_p) {
957 		uint16_t device_id = (uint16_t)(bus_p->bus_dev_ven_id >> 16);
958 		uint16_t vendor_id = (uint16_t)(bus_p->bus_dev_ven_id & 0xFFFF);
959 		uint16_t rev_id = bus_p->bus_rev_id;
960 
961 		/* Disable AER for certain NVIDIA Chipsets */
962 		if ((vendor_id == NVIDIA_VENDOR_ID) &&
963 		    (device_id == NVIDIA_CK804_DEVICE_ID) &&
964 		    (rev_id < NVIDIA_CK804_AER_VALID_REVID))
965 			bus_p->bus_aer_off = 0;
966 
967 		pcie_init_dom(child);
968 		(void) pcie_initchild(child);
969 	}
970 
971 	return (DDI_SUCCESS);
972 }
973 
974 
975 static int
976 npe_removechild(dev_info_t *dip)
977 {
978 	pcie_uninitchild(dip);
979 
980 	ddi_set_name_addr(dip, NULL);
981 
982 	/*
983 	 * Strip the node to properly convert it back to prototype form
984 	 */
985 	ddi_remove_minor_node(dip, NULL);
986 
987 	ddi_prop_remove_all(dip);
988 
989 	return (DDI_SUCCESS);
990 }
991 
992 static int
993 npe_open(dev_t *devp, int flags, int otyp, cred_t *credp)
994 {
995 	minor_t		minor = getminor(*devp);
996 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
997 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
998 	int	rv;
999 
1000 	/*
1001 	 * Make sure the open is for the right file type.
1002 	 */
1003 	if (otyp != OTYP_CHR)
1004 		return (EINVAL);
1005 
1006 	if (pci_p == NULL)
1007 		return (ENXIO);
1008 
1009 	mutex_enter(&pci_p->pci_mutex);
1010 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1011 	case PCI_TOOL_REG_MINOR_NUM:
1012 	case PCI_TOOL_INTR_MINOR_NUM:
1013 		break;
1014 	default:
1015 		/* Handle devctl ioctls */
1016 		rv = pcie_open(pci_p->pci_dip, devp, flags, otyp, credp);
1017 		mutex_exit(&pci_p->pci_mutex);
1018 		return (rv);
1019 	}
1020 
1021 	/* Handle pcitool ioctls */
1022 	if (flags & FEXCL) {
1023 		if (pci_p->pci_soft_state != PCI_SOFT_STATE_CLOSED) {
1024 			mutex_exit(&pci_p->pci_mutex);
1025 			cmn_err(CE_NOTE, "npe_open: busy");
1026 			return (EBUSY);
1027 		}
1028 		pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN_EXCL;
1029 	} else {
1030 		if (pci_p->pci_soft_state == PCI_SOFT_STATE_OPEN_EXCL) {
1031 			mutex_exit(&pci_p->pci_mutex);
1032 			cmn_err(CE_NOTE, "npe_open: busy");
1033 			return (EBUSY);
1034 		}
1035 		pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN;
1036 	}
1037 	mutex_exit(&pci_p->pci_mutex);
1038 
1039 	return (0);
1040 }
1041 
1042 static int
1043 npe_close(dev_t dev, int flags, int otyp, cred_t *credp)
1044 {
1045 	minor_t		minor = getminor(dev);
1046 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
1047 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
1048 	int	rv;
1049 
1050 	if (pci_p == NULL)
1051 		return (ENXIO);
1052 
1053 	mutex_enter(&pci_p->pci_mutex);
1054 
1055 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1056 	case PCI_TOOL_REG_MINOR_NUM:
1057 	case PCI_TOOL_INTR_MINOR_NUM:
1058 		break;
1059 	default:
1060 		/* Handle devctl ioctls */
1061 		rv = pcie_close(pci_p->pci_dip, dev, flags, otyp, credp);
1062 		mutex_exit(&pci_p->pci_mutex);
1063 		return (rv);
1064 	}
1065 
1066 	/* Handle pcitool ioctls */
1067 	pci_p->pci_soft_state = PCI_SOFT_STATE_CLOSED;
1068 	mutex_exit(&pci_p->pci_mutex);
1069 	return (0);
1070 }
1071 
1072 static int
1073 npe_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1074 {
1075 	minor_t		minor = getminor(dev);
1076 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
1077 	pci_state_t	*pci_p = ddi_get_soft_state(npe_statep, instance);
1078 	int		ret = ENOTTY;
1079 
1080 	if (pci_p == NULL)
1081 		return (ENXIO);
1082 
1083 	switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) {
1084 	case PCI_TOOL_REG_MINOR_NUM:
1085 	case PCI_TOOL_INTR_MINOR_NUM:
1086 		/* To handle pcitool related ioctls */
1087 		ret =  pci_common_ioctl(pci_p->pci_dip, dev, cmd, arg, mode,
1088 		    credp, rvalp);
1089 		break;
1090 	default:
1091 		/* To handle devctl and hotplug related ioctls */
1092 		ret = pcie_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, credp,
1093 		    rvalp);
1094 		break;
1095 	}
1096 
1097 	return (ret);
1098 }
1099 
1100 /*ARGSUSED*/
1101 static int
1102 npe_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap,
1103     ddi_iblock_cookie_t *ibc)
1104 {
1105 	pci_state_t  *pcip = ddi_get_soft_state(npe_statep,
1106 	    ddi_get_instance(dip));
1107 
1108 	ASSERT(ibc != NULL);
1109 	*ibc = pcip->pci_fm_ibc;
1110 
1111 	return (pcip->pci_fmcap);
1112 }
1113 
1114 /*ARGSUSED*/
1115 static int
1116 npe_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *no_used)
1117 {
1118 	/*
1119 	 * On current x86 systems, npe's callback does not get called for failed
1120 	 * loads.  If in the future this feature is used, the fault PA should be
1121 	 * logged in the derr->fme_bus_specific field.  The appropriate PCIe
1122 	 * error handling code should be called and needs to be coordinated with
1123 	 * safe access handling.
1124 	 */
1125 
1126 	return (DDI_FM_OK);
1127 }
1128