1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 29 * Copyright 2019 Joyent, Inc. 30 * Copyright 2024 Oxide Computer Company 31 */ 32 33 /* 34 * npe (Nexus PCIe driver): Host to PCI-Express local bus driver 35 * 36 * npe serves as the driver for PCIe Root Complexes and as the nexus driver 37 * for PCIe devices. See also: npe(4D). For more information about hotplug, 38 * see the big theory statement at uts/common/os/ddi_hp_impl.c. 39 * 40 * 41 * NDI EVENT HANDLING SUPPORT 42 * 43 * npe supports NDI event handling. The only available event is surprise 44 * removal of a device. Child drivers can register surprise removal event 45 * callbacks by requesting an event cookie using ddi_get_eventcookie for 46 * the DDI_DEVI_REMOVE_EVENT and add their callback using 47 * ddi_add_event_handler. For an example, see the nvme driver in 48 * uts/common/io/nvme/nvme.c. 49 * 50 * The NDI events in npe are retrieved using NDI_EVENT_NOPASS, which 51 * prevent them from being propagated up the tree once they reach the npe's 52 * bus_get_eventcookie operations. This is important because npe maintains 53 * the state of PCIe devices and their receptacles, via the PCIe hotplug 54 * controller driver (pciehpc). 55 * 56 * Hot removal events are ultimately posted by the PCIe hotplug controller 57 * interrupt handler for hotplug events. Events are posted using the 58 * ndi_post_event interface. 59 */ 60 61 #include <sys/conf.h> 62 #include <sys/modctl.h> 63 #include <sys/file.h> 64 #include <sys/pci_impl.h> 65 #include <sys/pcie_impl.h> 66 #include <sys/sysmacros.h> 67 #include <sys/ddi_intr.h> 68 #include <sys/sunndi.h> 69 #include <sys/sunddi.h> 70 #include <sys/ddifm.h> 71 #include <sys/ndifm.h> 72 #include <sys/fm/util.h> 73 #include <sys/hotplug/pci/pcie_hp.h> 74 #include <io/pci/pci_tools_ext.h> 75 #include <io/pci/pci_common.h> 76 #include <io/pciex/pcie_nvidia.h> 77 78 /* 79 * Helper Macros 80 */ 81 #define NPE_IS_HANDLE_FOR_STDCFG_ACC(hp) \ 82 ((hp) != NULL && \ 83 ((ddi_acc_hdl_t *)(hp))->ah_platform_private != NULL && \ 84 (((ddi_acc_impl_t *)((ddi_acc_hdl_t *)(hp))-> \ 85 ah_platform_private)-> \ 86 ahi_acc_attr &(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_CONFIG_SPACE)) \ 87 == DDI_ACCATTR_CONFIG_SPACE) 88 89 /* 90 * Bus Operation functions 91 */ 92 static int npe_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *, 93 off_t, off_t, caddr_t *); 94 static int npe_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, 95 void *, void *); 96 static int npe_intr_ops(dev_info_t *, dev_info_t *, ddi_intr_op_t, 97 ddi_intr_handle_impl_t *, void *); 98 static int npe_fm_init(dev_info_t *, dev_info_t *, int, 99 ddi_iblock_cookie_t *); 100 static int npe_bus_get_eventcookie(dev_info_t *, dev_info_t *, char *, 101 ddi_eventcookie_t *); 102 static int npe_bus_add_eventcall(dev_info_t *, dev_info_t *, 103 ddi_eventcookie_t, void (*)(dev_info_t *, 104 ddi_eventcookie_t, void *, void *), 105 void *, ddi_callback_id_t *); 106 static int npe_bus_remove_eventcall(dev_info_t *, ddi_callback_id_t); 107 static int npe_bus_post_event(dev_info_t *, dev_info_t *, 108 ddi_eventcookie_t, void *); 109 110 static int npe_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *); 111 112 /* 113 * Disable URs and Received MA for all PCIe devices. Until x86 SW is changed so 114 * that random drivers do not do PIO accesses on devices that it does not own, 115 * these error bits must be disabled. SERR must also be disabled if URs have 116 * been masked. 117 */ 118 uint32_t npe_aer_uce_mask = PCIE_AER_UCE_UR; 119 uint32_t npe_aer_ce_mask = 0; 120 uint32_t npe_aer_suce_mask = PCIE_AER_SUCE_RCVD_MA; 121 122 struct bus_ops npe_bus_ops = { 123 BUSO_REV, 124 npe_bus_map, 125 NULL, 126 NULL, 127 NULL, 128 i_ddi_map_fault, 129 NULL, 130 ddi_dma_allochdl, 131 ddi_dma_freehdl, 132 ddi_dma_bindhdl, 133 ddi_dma_unbindhdl, 134 ddi_dma_flush, 135 ddi_dma_win, 136 ddi_dma_mctl, 137 npe_ctlops, 138 ddi_bus_prop_op, 139 npe_bus_get_eventcookie, 140 npe_bus_add_eventcall, 141 npe_bus_remove_eventcall, 142 npe_bus_post_event, 143 0, /* (*bus_intr_ctl)(); */ 144 0, /* (*bus_config)(); */ 145 0, /* (*bus_unconfig)(); */ 146 npe_fm_init, /* (*bus_fm_init)(); */ 147 NULL, /* (*bus_fm_fini)(); */ 148 NULL, /* (*bus_fm_access_enter)(); */ 149 NULL, /* (*bus_fm_access_exit)(); */ 150 NULL, /* (*bus_power)(); */ 151 npe_intr_ops, /* (*bus_intr_op)(); */ 152 pcie_hp_common_ops /* (*bus_hp_op)(); */ 153 }; 154 155 static int npe_open(dev_t *, int, int, cred_t *); 156 static int npe_close(dev_t, int, int, cred_t *); 157 static int npe_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 158 159 struct cb_ops npe_cb_ops = { 160 npe_open, /* open */ 161 npe_close, /* close */ 162 nodev, /* strategy */ 163 nodev, /* print */ 164 nodev, /* dump */ 165 nodev, /* read */ 166 nodev, /* write */ 167 npe_ioctl, /* ioctl */ 168 nodev, /* devmap */ 169 nodev, /* mmap */ 170 nodev, /* segmap */ 171 nochpoll, /* poll */ 172 pcie_prop_op, /* cb_prop_op */ 173 NULL, /* streamtab */ 174 D_NEW | D_MP | D_HOTPLUG, /* Driver compatibility flag */ 175 CB_REV, /* rev */ 176 nodev, /* int (*cb_aread)() */ 177 nodev /* int (*cb_awrite)() */ 178 }; 179 180 181 /* 182 * Device Node Operation functions 183 */ 184 static int npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 185 static int npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 186 static int npe_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 187 188 struct dev_ops npe_ops = { 189 DEVO_REV, /* devo_rev */ 190 0, /* refcnt */ 191 npe_info, /* info */ 192 nulldev, /* identify */ 193 nulldev, /* probe */ 194 npe_attach, /* attach */ 195 npe_detach, /* detach */ 196 nulldev, /* reset */ 197 &npe_cb_ops, /* driver operations */ 198 &npe_bus_ops, /* bus operations */ 199 NULL, /* power */ 200 ddi_quiesce_not_needed, /* quiesce */ 201 }; 202 203 /* 204 * Internal routines in support of particular npe_ctlops. 205 */ 206 static int npe_removechild(dev_info_t *child); 207 static int npe_initchild(dev_info_t *child); 208 209 /* 210 * External support routine 211 */ 212 extern void npe_ck804_fix_aer_ptr(ddi_acc_handle_t cfg_hdl); 213 extern int npe_disable_empty_bridges_workaround(dev_info_t *child); 214 extern void npe_nvidia_error_workaround(ddi_acc_handle_t cfg_hdl); 215 extern void npe_intel_error_workaround(ddi_acc_handle_t cfg_hdl); 216 extern boolean_t npe_is_mmcfg_supported(dev_info_t *dip); 217 extern void npe_enable_htmsi_children(dev_info_t *dip); 218 extern int npe_save_htconfig_children(dev_info_t *dip); 219 extern int npe_restore_htconfig_children(dev_info_t *dip); 220 221 /* 222 * Module linkage information for the kernel. 223 */ 224 static struct modldrv modldrv = { 225 &mod_driverops, /* Type of module */ 226 "Host to PCIe nexus driver", /* Name of module */ 227 &npe_ops, /* driver ops */ 228 }; 229 230 static struct modlinkage modlinkage = { 231 MODREV_1, 232 (void *)&modldrv, 233 NULL 234 }; 235 236 /* Save minimal state. */ 237 void *npe_statep; 238 239 int 240 _init(void) 241 { 242 int e; 243 244 /* 245 * Initialize per-pci bus soft state pointer. 246 */ 247 e = ddi_soft_state_init(&npe_statep, sizeof (pci_state_t), 1); 248 if (e != 0) 249 return (e); 250 251 if ((e = mod_install(&modlinkage)) != 0) 252 ddi_soft_state_fini(&npe_statep); 253 254 return (e); 255 } 256 257 258 int 259 _fini(void) 260 { 261 int rc; 262 263 rc = mod_remove(&modlinkage); 264 if (rc != 0) 265 return (rc); 266 267 ddi_soft_state_fini(&npe_statep); 268 return (rc); 269 } 270 271 272 int 273 _info(struct modinfo *modinfop) 274 { 275 return (mod_info(&modlinkage, modinfop)); 276 } 277 278 /*ARGSUSED*/ 279 static int 280 npe_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 281 { 282 minor_t minor = getminor((dev_t)arg); 283 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 284 pci_state_t *pcip = ddi_get_soft_state(npe_statep, instance); 285 int ret = DDI_SUCCESS; 286 287 switch (cmd) { 288 case DDI_INFO_DEVT2INSTANCE: 289 *result = (void *)(intptr_t)instance; 290 break; 291 case DDI_INFO_DEVT2DEVINFO: 292 if (pcip == NULL) { 293 ret = DDI_FAILURE; 294 break; 295 } 296 297 *result = (void *)pcip->pci_dip; 298 break; 299 default: 300 ret = DDI_FAILURE; 301 break; 302 } 303 304 return (ret); 305 } 306 307 /* 308 * See big theory statement at the top of this file for more information about 309 * surprise removal events. 310 */ 311 #define NPE_EVENT_TAG_HOT_REMOVAL 0 312 static ndi_event_definition_t npe_ndi_event_defs[1] = { 313 {NPE_EVENT_TAG_HOT_REMOVAL, DDI_DEVI_REMOVE_EVENT, EPL_KERNEL, 314 NDI_EVENT_POST_TO_ALL} 315 }; 316 317 static ndi_event_set_t npe_ndi_events = { 318 NDI_EVENTS_REV1, ARRAY_SIZE(npe_ndi_event_defs), npe_ndi_event_defs 319 }; 320 321 /*ARGSUSED*/ 322 static int 323 npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 324 { 325 int instance = ddi_get_instance(devi); 326 pci_state_t *pcip = NULL; 327 int ret; 328 329 if (cmd == DDI_RESUME) { 330 /* 331 * the system might still be able to resume even if this fails 332 */ 333 (void) npe_restore_htconfig_children(devi); 334 return (DDI_SUCCESS); 335 } 336 337 /* 338 * We must do this here in order to ensure that all top level devices 339 * get their HyperTransport MSI mapping regs programmed first. 340 * "Memory controller" and "hostbridge" class devices are leaf devices 341 * that may affect MSI translation functionality for devices 342 * connected to the same link/bus. 343 * 344 * This will also program HT MSI mapping registers on root buses 345 * devices (basically sitting on an HT bus) that are not dependent 346 * on the aforementioned HT devices for MSI translation. 347 */ 348 npe_enable_htmsi_children(devi); 349 350 if (ddi_prop_update_string(DDI_DEV_T_NONE, devi, "device_type", 351 "pciex") != DDI_PROP_SUCCESS) { 352 cmn_err(CE_WARN, "npe: 'device_type' prop create failed"); 353 } 354 355 if (ddi_soft_state_zalloc(npe_statep, instance) == DDI_SUCCESS) 356 pcip = ddi_get_soft_state(npe_statep, instance); 357 358 if (pcip == NULL) 359 return (DDI_FAILURE); 360 361 pcip->pci_dip = devi; 362 pcip->pci_soft_state = PCI_SOFT_STATE_CLOSED; 363 364 if (pcie_init(devi, NULL) != DDI_SUCCESS) 365 goto fail1; 366 367 ret = ndi_event_alloc_hdl(pcip->pci_dip, NULL, &pcip->pci_ndi_event_hdl, 368 NDI_SLEEP); 369 if (ret == NDI_SUCCESS) { 370 ret = ndi_event_bind_set(pcip->pci_ndi_event_hdl, 371 &npe_ndi_events, NDI_SLEEP); 372 if (ret != NDI_SUCCESS) { 373 dev_err(pcip->pci_dip, CE_WARN, "npe: failed to bind " 374 "NDI event set (error=%d)", ret); 375 goto fail1; 376 } 377 } else { 378 dev_err(pcip->pci_dip, CE_WARN, "npe: failed to allocate " 379 "event handle (error=%d)", ret); 380 goto fail1; 381 } 382 383 /* Second arg: initialize for pci_express root nexus */ 384 if (pcitool_init(devi, B_TRUE) != DDI_SUCCESS) 385 goto fail2; 386 387 pcip->pci_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 388 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 389 ddi_fm_init(devi, &pcip->pci_fmcap, &pcip->pci_fm_ibc); 390 391 if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) { 392 ddi_fm_handler_register(devi, npe_fm_callback, NULL); 393 } 394 395 PCIE_DIP2PFD(devi) = kmem_zalloc(sizeof (pf_data_t), KM_SLEEP); 396 pcie_rc_init_pfd(devi, PCIE_DIP2PFD(devi)); 397 398 ddi_report_dev(devi); 399 pcie_fab_init_bus(devi, PCIE_BUS_FINAL); 400 401 return (DDI_SUCCESS); 402 403 fail2: 404 (void) pcie_uninit(devi); 405 fail1: 406 pcie_rc_fini_bus(devi); 407 ddi_soft_state_free(npe_statep, instance); 408 409 return (DDI_FAILURE); 410 } 411 412 /*ARGSUSED*/ 413 static int 414 npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 415 { 416 int instance = ddi_get_instance(devi); 417 pci_state_t *pcip; 418 int ret; 419 420 pcip = ddi_get_soft_state(npe_statep, ddi_get_instance(devi)); 421 422 switch (cmd) { 423 case DDI_DETACH: 424 425 /* 426 * Clean up event handling first, to ensure there are no 427 * oustanding callbacks registered. 428 */ 429 ret = ndi_event_unbind_set(pcip->pci_ndi_event_hdl, 430 &npe_ndi_events, NDI_SLEEP); 431 if (ret == NDI_SUCCESS) { 432 /* ndi_event_free_hdl always succeeds. */ 433 (void) ndi_event_free_hdl(pcip->pci_ndi_event_hdl); 434 } else { 435 /* 436 * The event set will only fail to unbind if there are 437 * outstanding callbacks registered for it, which 438 * probably means a child driver still has one 439 * registered and thus was not cleaned up properly 440 * before npe's detach routine was called. Consequently, 441 * we should fail the detach here. 442 */ 443 dev_err(pcip->pci_dip, CE_WARN, "npe: failed to " 444 "unbind NDI event set (error=%d)", ret); 445 return (DDI_FAILURE); 446 } 447 448 pcie_fab_fini_bus(devi, PCIE_BUS_INITIAL); 449 450 /* Uninitialize pcitool support. */ 451 pcitool_uninit(devi); 452 453 if (pcie_uninit(devi) != DDI_SUCCESS) 454 return (DDI_FAILURE); 455 456 if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) 457 ddi_fm_handler_unregister(devi); 458 459 pcie_rc_fini_pfd(PCIE_DIP2PFD(devi)); 460 kmem_free(PCIE_DIP2PFD(devi), sizeof (pf_data_t)); 461 462 ddi_fm_fini(devi); 463 ddi_soft_state_free(npe_statep, instance); 464 465 return (DDI_SUCCESS); 466 467 case DDI_SUSPEND: 468 /* 469 * the system might still be able to suspend/resume even if 470 * this fails 471 */ 472 (void) npe_save_htconfig_children(devi); 473 return (DDI_SUCCESS); 474 default: 475 return (DDI_FAILURE); 476 } 477 } 478 479 /* 480 * Configure the access handle for standard configuration space 481 * access (see pci_fm_acc_setup for code that initializes the 482 * access-function pointers). 483 */ 484 static int 485 npe_setup_std_pcicfg_acc(dev_info_t *rdip, ddi_map_req_t *mp, 486 ddi_acc_hdl_t *hp, off_t offset, off_t len) 487 { 488 int ret; 489 490 if ((ret = pci_fm_acc_setup(hp, offset, len)) == 491 DDI_SUCCESS) { 492 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 493 mp->map_handlep->ah_acc.devacc_attr_access 494 != DDI_DEFAULT_ACC) { 495 ndi_fmc_insert(rdip, ACC_HANDLE, 496 (void *)mp->map_handlep, NULL); 497 } 498 } 499 return (ret); 500 } 501 502 static int 503 npe_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp, 504 off_t offset, off_t len, caddr_t *vaddrp) 505 { 506 int rnumber; 507 int space; 508 ddi_acc_impl_t *ap; 509 ddi_acc_hdl_t *hp; 510 ddi_map_req_t mr; 511 pci_regspec_t pci_reg; 512 pci_regspec_t *pci_rp; 513 struct regspec64 reg; 514 pci_acc_cfblk_t *cfp; 515 int retval; 516 int64_t *ecfginfo; 517 uint_t nelem; 518 uint64_t pci_rlength; 519 520 mr = *mp; /* Get private copy of request */ 521 mp = &mr; 522 523 /* 524 * check for register number 525 */ 526 switch (mp->map_type) { 527 case DDI_MT_REGSPEC: 528 pci_reg = *(pci_regspec_t *)(mp->map_obj.rp); 529 pci_rp = &pci_reg; 530 if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS) 531 return (DDI_FAILURE); 532 break; 533 case DDI_MT_RNUMBER: 534 rnumber = mp->map_obj.rnumber; 535 /* 536 * get ALL "reg" properties for dip, select the one of 537 * of interest. In x86, "assigned-addresses" property 538 * is identical to the "reg" property, so there is no 539 * need to cross check the two to determine the physical 540 * address of the registers. 541 * This routine still performs some validity checks to 542 * make sure that everything is okay. 543 */ 544 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip, 545 DDI_PROP_DONTPASS, "reg", (int **)&pci_rp, &nelem) != 546 DDI_PROP_SUCCESS) 547 return (DDI_FAILURE); 548 549 /* 550 * validate the register number. 551 */ 552 nelem /= (sizeof (pci_regspec_t) / sizeof (int)); 553 if (rnumber >= nelem) { 554 ddi_prop_free(pci_rp); 555 return (DDI_FAILURE); 556 } 557 558 /* 559 * copy the required entry. 560 */ 561 pci_reg = pci_rp[rnumber]; 562 563 /* 564 * free the memory allocated by ddi_prop_lookup_int_array 565 */ 566 ddi_prop_free(pci_rp); 567 568 pci_rp = &pci_reg; 569 if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS) 570 return (DDI_FAILURE); 571 mp->map_type = DDI_MT_REGSPEC; 572 break; 573 default: 574 return (DDI_ME_INVAL); 575 } 576 577 space = pci_rp->pci_phys_hi & PCI_REG_ADDR_M; 578 579 /* 580 * check for unmap and unlock of address space 581 */ 582 if ((mp->map_op == DDI_MO_UNMAP) || (mp->map_op == DDI_MO_UNLOCK)) { 583 switch (space) { 584 case PCI_ADDR_IO: 585 reg.regspec_bustype = 1; 586 break; 587 588 case PCI_ADDR_CONFIG: 589 /* 590 * If this is an unmap/unlock of a standard config 591 * space mapping (memory-mapped config space mappings 592 * would have the DDI_ACCATTR_CPU_VADDR bit set in the 593 * acc_attr), undo that setup here. 594 */ 595 if (NPE_IS_HANDLE_FOR_STDCFG_ACC(mp->map_handlep)) { 596 597 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 598 mp->map_handlep->ah_acc.devacc_attr_access 599 != DDI_DEFAULT_ACC) { 600 ndi_fmc_remove(rdip, ACC_HANDLE, 601 (void *)mp->map_handlep); 602 } 603 return (DDI_SUCCESS); 604 } 605 606 pci_rp->pci_size_hi = 0; 607 pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE; 608 609 /* FALLTHROUGH */ 610 case PCI_ADDR_MEM64: 611 case PCI_ADDR_MEM32: 612 reg.regspec_bustype = 0; 613 break; 614 615 default: 616 return (DDI_FAILURE); 617 } 618 619 reg.regspec_addr = (uint64_t)pci_rp->pci_phys_mid << 32 | 620 (uint64_t)pci_rp->pci_phys_low; 621 reg.regspec_size = (uint64_t)pci_rp->pci_size_hi << 32 | 622 (uint64_t)pci_rp->pci_size_low; 623 624 /* 625 * Adjust offset and length 626 * A non-zero length means override the one in the regspec. 627 */ 628 if (reg.regspec_addr + offset < MAX(reg.regspec_addr, offset)) 629 return (DDI_FAILURE); 630 reg.regspec_addr += offset; 631 if (len != 0) 632 reg.regspec_size = len; 633 634 mp->map_obj.rp = (struct regspec *)® 635 mp->map_flags |= DDI_MF_EXT_REGSPEC; 636 retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp); 637 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 638 mp->map_handlep->ah_acc.devacc_attr_access != 639 DDI_DEFAULT_ACC) { 640 ndi_fmc_remove(rdip, ACC_HANDLE, 641 (void *)mp->map_handlep); 642 } 643 return (retval); 644 645 } 646 647 /* check for user mapping request - not legal for Config */ 648 if (mp->map_op == DDI_MO_MAP_HANDLE && space == PCI_ADDR_CONFIG) { 649 cmn_err(CE_NOTE, "npe: Config mapping request from user\n"); 650 return (DDI_FAILURE); 651 } 652 653 654 /* 655 * Note that pci_fm_acc_setup() is called to serve two purposes 656 * i) enable legacy PCI I/O style config space access 657 * ii) register with FMA 658 */ 659 if (space == PCI_ADDR_CONFIG) { 660 661 /* Can't map config space without a handle */ 662 hp = (ddi_acc_hdl_t *)mp->map_handlep; 663 if (hp == NULL) 664 return (DDI_FAILURE); 665 666 /* record the device address for future reference */ 667 cfp = (pci_acc_cfblk_t *)&hp->ah_bus_private; 668 cfp->c_busnum = PCI_REG_BUS_G(pci_rp->pci_phys_hi); 669 cfp->c_devnum = PCI_REG_DEV_G(pci_rp->pci_phys_hi); 670 cfp->c_funcnum = PCI_REG_FUNC_G(pci_rp->pci_phys_hi); 671 672 *vaddrp = (caddr_t)offset; 673 674 /* Check if MMCFG is supported */ 675 if (!npe_is_mmcfg_supported(rdip)) { 676 return (npe_setup_std_pcicfg_acc(rdip, mp, hp, 677 offset, len)); 678 } 679 680 681 if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, rdip, 0, 682 "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) { 683 684 if (nelem != 4 || 685 cfp->c_busnum < ecfginfo[2] || 686 cfp->c_busnum > ecfginfo[3]) { 687 /* 688 * Invalid property or Doesn't contain the 689 * requested bus; fall back to standard 690 * (I/O-based) config access. 691 */ 692 ddi_prop_free(ecfginfo); 693 return (npe_setup_std_pcicfg_acc(rdip, mp, hp, 694 offset, len)); 695 } else { 696 uint64_t addr = (uint64_t)ecfginfo[0]; 697 698 /* 699 * The address for memory mapped configuration 700 * space may theoretically be anywhere in the 701 * processor's physical address space. 702 * 703 * We need to set both phys_mid and phys_low to 704 * account for this. Because we are mapping a 705 * single device, which has 1 KiB region and 706 * alignment requirements, along with the fact 707 * that we only allow for segment 0, means that 708 * the offset will always fit in the lower 709 * 32-bit word. 710 */ 711 pci_rp->pci_phys_mid = (uint32_t)(addr >> 32); 712 pci_rp->pci_phys_low = (uint32_t)addr; 713 714 ddi_prop_free(ecfginfo); 715 716 pci_rp->pci_phys_low += ((cfp->c_busnum << 20) | 717 (cfp->c_devnum) << 15 | 718 (cfp->c_funcnum << 12)); 719 720 pci_rp->pci_size_hi = 0; 721 pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE; 722 } 723 } else { 724 /* 725 * Couldn't find the MMCFG property -- fall back to 726 * standard config access 727 */ 728 return (npe_setup_std_pcicfg_acc(rdip, mp, hp, 729 offset, len)); 730 } 731 } 732 733 /* 734 * range check 735 */ 736 pci_rlength = (uint64_t)pci_rp->pci_size_low | 737 (uint64_t)pci_rp->pci_size_hi << 32; 738 if ((offset >= pci_rlength) || (len > pci_rlength) || 739 (offset + len > pci_rlength) || (offset + len < MAX(offset, len))) { 740 return (DDI_FAILURE); 741 } 742 743 /* 744 * convert the pci regsec into the generic regspec used by the 745 * parent root nexus driver. 746 */ 747 switch (space) { 748 case PCI_ADDR_IO: 749 reg.regspec_bustype = 1; 750 break; 751 case PCI_ADDR_CONFIG: 752 case PCI_ADDR_MEM64: 753 case PCI_ADDR_MEM32: 754 reg.regspec_bustype = 0; 755 break; 756 default: 757 return (DDI_FAILURE); 758 } 759 760 reg.regspec_addr = (uint64_t)pci_rp->pci_phys_mid << 32 | 761 (uint64_t)pci_rp->pci_phys_low; 762 reg.regspec_size = pci_rlength; 763 764 /* 765 * Adjust offset and length 766 * A non-zero length means override the one in the regspec. 767 */ 768 if (reg.regspec_addr + offset < MAX(reg.regspec_addr, offset)) 769 return (DDI_FAILURE); 770 reg.regspec_addr += offset; 771 if (len != 0) 772 reg.regspec_size = len; 773 774 775 mp->map_obj.rp = (struct regspec *)® 776 mp->map_flags |= DDI_MF_EXT_REGSPEC; 777 retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp); 778 if (retval == DDI_SUCCESS) { 779 /* 780 * For config space gets force use of cautious access routines. 781 * These will handle default and protected mode accesses too. 782 */ 783 if (space == PCI_ADDR_CONFIG) { 784 ap = (ddi_acc_impl_t *)mp->map_handlep; 785 ap->ahi_acc_attr &= ~DDI_ACCATTR_DIRECT; 786 ap->ahi_acc_attr |= DDI_ACCATTR_CONFIG_SPACE; 787 ap->ahi_get8 = i_ddi_caut_get8; 788 ap->ahi_get16 = i_ddi_caut_get16; 789 ap->ahi_get32 = i_ddi_caut_get32; 790 ap->ahi_get64 = i_ddi_caut_get64; 791 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 792 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 793 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 794 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 795 } 796 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 797 mp->map_handlep->ah_acc.devacc_attr_access != 798 DDI_DEFAULT_ACC) { 799 ndi_fmc_insert(rdip, ACC_HANDLE, 800 (void *)mp->map_handlep, NULL); 801 } 802 } 803 return (retval); 804 } 805 806 807 808 /*ARGSUSED*/ 809 static int 810 npe_ctlops(dev_info_t *dip, dev_info_t *rdip, 811 ddi_ctl_enum_t ctlop, void *arg, void *result) 812 { 813 int totreg; 814 uint_t reglen; 815 pci_regspec_t *drv_regp; 816 struct attachspec *asp; 817 struct detachspec *dsp; 818 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, 819 ddi_get_instance(dip)); 820 821 switch (ctlop) { 822 case DDI_CTLOPS_REPORTDEV: 823 if (rdip == (dev_info_t *)0) 824 return (DDI_FAILURE); 825 cmn_err(CE_CONT, "?PCI Express-device: %s@%s, %s%d\n", 826 ddi_node_name(rdip), ddi_get_name_addr(rdip), 827 ddi_driver_name(rdip), ddi_get_instance(rdip)); 828 return (DDI_SUCCESS); 829 830 case DDI_CTLOPS_INITCHILD: 831 return (npe_initchild((dev_info_t *)arg)); 832 833 case DDI_CTLOPS_UNINITCHILD: 834 return (npe_removechild((dev_info_t *)arg)); 835 836 case DDI_CTLOPS_SIDDEV: 837 return (DDI_SUCCESS); 838 839 case DDI_CTLOPS_REGSIZE: 840 case DDI_CTLOPS_NREGS: 841 if (rdip == (dev_info_t *)0) 842 return (DDI_FAILURE); 843 844 *(int *)result = 0; 845 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip, 846 DDI_PROP_DONTPASS, "reg", (int **)&drv_regp, 847 ®len) != DDI_PROP_SUCCESS) { 848 return (DDI_FAILURE); 849 } 850 851 totreg = (reglen * sizeof (int)) / sizeof (pci_regspec_t); 852 if (ctlop == DDI_CTLOPS_NREGS) 853 *(int *)result = totreg; 854 else if (ctlop == DDI_CTLOPS_REGSIZE) { 855 uint64_t val; 856 int rn; 857 858 rn = *(int *)arg; 859 if (rn >= totreg) { 860 ddi_prop_free(drv_regp); 861 return (DDI_FAILURE); 862 } 863 val = drv_regp[rn].pci_size_low | 864 (uint64_t)drv_regp[rn].pci_size_hi << 32; 865 if (val > OFF_MAX) { 866 int ce = CE_NOTE; 867 #ifdef DEBUG 868 ce = CE_WARN; 869 #endif 870 dev_err(rdip, ce, "failed to get register " 871 "size, value larger than OFF_MAX: 0x%" 872 PRIx64 "\n", val); 873 return (DDI_FAILURE); 874 } 875 *(off_t *)result = (off_t)val; 876 } 877 ddi_prop_free(drv_regp); 878 879 return (DDI_SUCCESS); 880 881 case DDI_CTLOPS_POWER: 882 { 883 power_req_t *reqp = (power_req_t *)arg; 884 /* 885 * We currently understand reporting of PCI_PM_IDLESPEED 886 * capability. Everything else is passed up. 887 */ 888 if ((reqp->request_type == PMR_REPORT_PMCAP) && 889 (reqp->req.report_pmcap_req.cap == PCI_PM_IDLESPEED)) 890 return (DDI_SUCCESS); 891 892 break; 893 } 894 895 case DDI_CTLOPS_PEEK: 896 case DDI_CTLOPS_POKE: 897 return (pci_common_peekpoke(dip, rdip, ctlop, arg, result)); 898 899 /* X86 systems support PME wakeup from suspended state */ 900 case DDI_CTLOPS_ATTACH: 901 if (!pcie_is_child(dip, rdip)) 902 return (DDI_SUCCESS); 903 904 asp = (struct attachspec *)arg; 905 if ((asp->when == DDI_POST) && (asp->result == DDI_SUCCESS)) { 906 pf_init(rdip, (void *)pci_p->pci_fm_ibc, asp->cmd); 907 (void) pcie_postattach_child(rdip); 908 } 909 910 /* only do this for immediate children */ 911 if (asp->cmd == DDI_RESUME && asp->when == DDI_PRE && 912 ddi_get_parent(rdip) == dip) 913 if (pci_pre_resume(rdip) != DDI_SUCCESS) { 914 /* Not good, better stop now. */ 915 cmn_err(CE_PANIC, 916 "Couldn't pre-resume device %p", 917 (void *) dip); 918 /* NOTREACHED */ 919 } 920 921 return (DDI_SUCCESS); 922 923 case DDI_CTLOPS_DETACH: 924 if (!pcie_is_child(dip, rdip)) 925 return (DDI_SUCCESS); 926 927 dsp = (struct detachspec *)arg; 928 929 if (dsp->when == DDI_PRE) 930 pf_fini(rdip, dsp->cmd); 931 932 /* only do this for immediate children */ 933 if (dsp->cmd == DDI_SUSPEND && dsp->when == DDI_POST && 934 ddi_get_parent(rdip) == dip) 935 if (pci_post_suspend(rdip) != DDI_SUCCESS) 936 return (DDI_FAILURE); 937 938 return (DDI_SUCCESS); 939 940 default: 941 break; 942 } 943 944 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 945 946 } 947 948 949 /* 950 * npe_intr_ops 951 */ 952 static int 953 npe_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op, 954 ddi_intr_handle_impl_t *hdlp, void *result) 955 { 956 return (pci_common_intr_ops(pdip, rdip, intr_op, hdlp, result)); 957 } 958 959 960 static int 961 npe_initchild(dev_info_t *child) 962 { 963 char name[80]; 964 pcie_bus_t *bus_p; 965 uint32_t regs; 966 ddi_acc_handle_t cfg_hdl; 967 968 /* 969 * Do not bind drivers to empty bridges. 970 * Fail above, if the bridge is found to be hotplug capable 971 */ 972 if (npe_disable_empty_bridges_workaround(child) == 1) 973 return (DDI_FAILURE); 974 975 if (pci_common_name_child(child, name, 80) != DDI_SUCCESS) 976 return (DDI_FAILURE); 977 978 ddi_set_name_addr(child, name); 979 980 /* 981 * Pseudo nodes indicate a prototype node with per-instance 982 * properties to be merged into the real h/w device node. 983 * The interpretation of the unit-address is DD[,F] 984 * where DD is the device id and F is the function. 985 */ 986 if (ndi_dev_is_persistent_node(child) == 0) { 987 extern int pci_allow_pseudo_children; 988 989 ddi_set_parent_data(child, NULL); 990 991 /* 992 * Try to merge the properties from this prototype 993 * node into real h/w nodes. 994 */ 995 if (ndi_merge_node(child, pci_common_name_child) == 996 DDI_SUCCESS) { 997 /* 998 * Merged ok - return failure to remove the node. 999 */ 1000 ddi_set_name_addr(child, NULL); 1001 return (DDI_FAILURE); 1002 } 1003 1004 /* workaround for DDIVS to run under PCI Express */ 1005 if (pci_allow_pseudo_children) { 1006 /* 1007 * If the "interrupts" property doesn't exist, 1008 * this must be the ddivs no-intr case, and it returns 1009 * DDI_SUCCESS instead of DDI_FAILURE. 1010 */ 1011 if (ddi_prop_get_int(DDI_DEV_T_ANY, child, 1012 DDI_PROP_DONTPASS, "interrupts", -1) == -1) 1013 return (DDI_SUCCESS); 1014 /* 1015 * Create the ddi_parent_private_data for a pseudo 1016 * child. 1017 */ 1018 pci_common_set_parent_private_data(child); 1019 return (DDI_SUCCESS); 1020 } 1021 1022 /* 1023 * The child was not merged into a h/w node, 1024 * but there's not much we can do with it other 1025 * than return failure to cause the node to be removed. 1026 */ 1027 cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged", 1028 ddi_get_name(child), ddi_get_name_addr(child), 1029 ddi_get_name(child)); 1030 ddi_set_name_addr(child, NULL); 1031 return (DDI_NOT_WELL_FORMED); 1032 } 1033 1034 if (ddi_prop_get_int(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS, 1035 "interrupts", -1) != -1) 1036 pci_common_set_parent_private_data(child); 1037 else 1038 ddi_set_parent_data(child, NULL); 1039 1040 /* Disable certain errors on PCIe drivers for x86 platforms */ 1041 regs = pcie_get_aer_uce_mask() | npe_aer_uce_mask; 1042 pcie_set_aer_uce_mask(regs); 1043 regs = pcie_get_aer_ce_mask() | npe_aer_ce_mask; 1044 pcie_set_aer_ce_mask(regs); 1045 regs = pcie_get_aer_suce_mask() | npe_aer_suce_mask; 1046 pcie_set_aer_suce_mask(regs); 1047 1048 /* 1049 * If URs are disabled, mask SERRs as well, otherwise the system will 1050 * still be notified of URs 1051 */ 1052 if (npe_aer_uce_mask & PCIE_AER_UCE_UR) 1053 pcie_set_serr_mask(1); 1054 1055 if (pci_config_setup(child, &cfg_hdl) == DDI_SUCCESS) { 1056 npe_ck804_fix_aer_ptr(cfg_hdl); 1057 npe_nvidia_error_workaround(cfg_hdl); 1058 npe_intel_error_workaround(cfg_hdl); 1059 pci_config_teardown(&cfg_hdl); 1060 } 1061 1062 bus_p = PCIE_DIP2BUS(child); 1063 if (bus_p) { 1064 uint16_t device_id = (uint16_t)(bus_p->bus_dev_ven_id >> 16); 1065 uint16_t vendor_id = (uint16_t)(bus_p->bus_dev_ven_id & 0xFFFF); 1066 uint16_t rev_id = bus_p->bus_rev_id; 1067 1068 /* Disable AER for certain NVIDIA Chipsets */ 1069 if ((vendor_id == NVIDIA_VENDOR_ID) && 1070 (device_id == NVIDIA_CK804_DEVICE_ID) && 1071 (rev_id < NVIDIA_CK804_AER_VALID_REVID)) 1072 bus_p->bus_aer_off = 0; 1073 1074 pcie_init_dom(child); 1075 (void) pcie_initchild(child); 1076 } 1077 1078 return (DDI_SUCCESS); 1079 } 1080 1081 1082 static int 1083 npe_removechild(dev_info_t *dip) 1084 { 1085 pcie_uninitchild(dip); 1086 1087 ddi_set_name_addr(dip, NULL); 1088 1089 /* 1090 * Strip the node to properly convert it back to prototype form 1091 */ 1092 ddi_remove_minor_node(dip, NULL); 1093 1094 ddi_prop_remove_all(dip); 1095 1096 return (DDI_SUCCESS); 1097 } 1098 1099 static int 1100 npe_open(dev_t *devp, int flags, int otyp, cred_t *credp) 1101 { 1102 minor_t minor = getminor(*devp); 1103 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 1104 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, instance); 1105 int rv; 1106 1107 /* 1108 * Make sure the open is for the right file type. 1109 */ 1110 if (otyp != OTYP_CHR) 1111 return (EINVAL); 1112 1113 if (pci_p == NULL) 1114 return (ENXIO); 1115 1116 mutex_enter(&pci_p->pci_mutex); 1117 switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) { 1118 case PCI_TOOL_REG_MINOR_NUM: 1119 case PCI_TOOL_INTR_MINOR_NUM: 1120 break; 1121 default: 1122 /* Handle devctl ioctls */ 1123 rv = pcie_open(pci_p->pci_dip, devp, flags, otyp, credp); 1124 mutex_exit(&pci_p->pci_mutex); 1125 return (rv); 1126 } 1127 1128 /* Handle pcitool ioctls */ 1129 if (flags & FEXCL) { 1130 if (pci_p->pci_soft_state != PCI_SOFT_STATE_CLOSED) { 1131 mutex_exit(&pci_p->pci_mutex); 1132 cmn_err(CE_NOTE, "npe_open: busy"); 1133 return (EBUSY); 1134 } 1135 pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN_EXCL; 1136 } else { 1137 if (pci_p->pci_soft_state == PCI_SOFT_STATE_OPEN_EXCL) { 1138 mutex_exit(&pci_p->pci_mutex); 1139 cmn_err(CE_NOTE, "npe_open: busy"); 1140 return (EBUSY); 1141 } 1142 pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN; 1143 } 1144 mutex_exit(&pci_p->pci_mutex); 1145 1146 return (0); 1147 } 1148 1149 static int 1150 npe_close(dev_t dev, int flags, int otyp, cred_t *credp) 1151 { 1152 minor_t minor = getminor(dev); 1153 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 1154 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, instance); 1155 int rv; 1156 1157 if (pci_p == NULL) 1158 return (ENXIO); 1159 1160 mutex_enter(&pci_p->pci_mutex); 1161 1162 switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) { 1163 case PCI_TOOL_REG_MINOR_NUM: 1164 case PCI_TOOL_INTR_MINOR_NUM: 1165 break; 1166 default: 1167 /* Handle devctl ioctls */ 1168 rv = pcie_close(pci_p->pci_dip, dev, flags, otyp, credp); 1169 mutex_exit(&pci_p->pci_mutex); 1170 return (rv); 1171 } 1172 1173 /* Handle pcitool ioctls */ 1174 pci_p->pci_soft_state = PCI_SOFT_STATE_CLOSED; 1175 mutex_exit(&pci_p->pci_mutex); 1176 return (0); 1177 } 1178 1179 static int 1180 npe_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1181 { 1182 minor_t minor = getminor(dev); 1183 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 1184 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, instance); 1185 int ret = ENOTTY; 1186 1187 if (pci_p == NULL) 1188 return (ENXIO); 1189 1190 switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) { 1191 case PCI_TOOL_REG_MINOR_NUM: 1192 case PCI_TOOL_INTR_MINOR_NUM: 1193 /* To handle pcitool related ioctls */ 1194 ret = pci_common_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, 1195 credp, rvalp); 1196 break; 1197 default: 1198 /* To handle devctl and hotplug related ioctls */ 1199 ret = pcie_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, credp, 1200 rvalp); 1201 break; 1202 } 1203 1204 return (ret); 1205 } 1206 1207 /*ARGSUSED*/ 1208 static int 1209 npe_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap, 1210 ddi_iblock_cookie_t *ibc) 1211 { 1212 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1213 ddi_get_instance(dip)); 1214 1215 ASSERT(ibc != NULL); 1216 *ibc = pcip->pci_fm_ibc; 1217 1218 return (pcip->pci_fmcap); 1219 } 1220 1221 static int 1222 npe_bus_get_eventcookie(dev_info_t *dip, dev_info_t *rdip, char *eventname, 1223 ddi_eventcookie_t *cookiep) 1224 { 1225 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1226 ddi_get_instance(dip)); 1227 1228 return (ndi_event_retrieve_cookie(pcip->pci_ndi_event_hdl, rdip, 1229 eventname, cookiep, NDI_EVENT_NOPASS)); 1230 } 1231 1232 static int 1233 npe_bus_add_eventcall(dev_info_t *dip, dev_info_t *rdip, 1234 ddi_eventcookie_t cookie, void (*callback)(dev_info_t *dip, 1235 ddi_eventcookie_t cookie, void *arg, void *bus_impldata), 1236 void *arg, ddi_callback_id_t *cb_id) 1237 { 1238 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1239 ddi_get_instance(dip)); 1240 1241 return (ndi_event_add_callback(pcip->pci_ndi_event_hdl, rdip, cookie, 1242 callback, arg, NDI_SLEEP, cb_id)); 1243 } 1244 1245 static int 1246 npe_bus_remove_eventcall(dev_info_t *dip, ddi_callback_id_t cb_id) 1247 { 1248 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1249 ddi_get_instance(dip)); 1250 return (ndi_event_remove_callback(pcip->pci_ndi_event_hdl, cb_id)); 1251 } 1252 1253 static int 1254 npe_bus_post_event(dev_info_t *dip, dev_info_t *rdip, 1255 ddi_eventcookie_t cookie, void *impl_data) 1256 { 1257 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1258 ddi_get_instance(dip)); 1259 return (ndi_event_do_callback(pcip->pci_ndi_event_hdl, rdip, cookie, 1260 impl_data)); 1261 1262 } 1263 1264 /*ARGSUSED*/ 1265 static int 1266 npe_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *no_used) 1267 { 1268 /* 1269 * On current x86 systems, npe's callback does not get called for failed 1270 * loads. If in the future this feature is used, the fault PA should be 1271 * logged in the derr->fme_bus_specific field. The appropriate PCIe 1272 * error handling code should be called and needs to be coordinated with 1273 * safe access handling. 1274 */ 1275 1276 return (DDI_FM_OK); 1277 } 1278