1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 29 * Copyright 2019 Joyent, Inc. 30 */ 31 32 /* 33 * npe (Nexus PCIe driver): Host to PCI-Express local bus driver 34 * 35 * npe serves as the driver for PCIe Root Complexes and as the nexus driver 36 * for PCIe devices. See also: npe(7D). For more information about hotplug, 37 * see the big theory statement at uts/common/os/ddi_hp_impl.c. 38 * 39 * 40 * NDI EVENT HANDLING SUPPORT 41 * 42 * npe supports NDI event handling. The only available event is surprise 43 * removal of a device. Child drivers can register surprise removal event 44 * callbacks by requesting an event cookie using ddi_get_eventcookie for 45 * the DDI_DEVI_REMOVE_EVENT and add their callback using 46 * ddi_add_event_handler. For an example, see the nvme driver in 47 * uts/common/io/nvme/nvme.c. 48 * 49 * The NDI events in npe are retrieved using NDI_EVENT_NOPASS, which 50 * prevent them from being propagated up the tree once they reach the npe's 51 * bus_get_eventcookie operations. This is important because npe maintains 52 * the state of PCIe devices and their receptacles, via the PCIe hotplug 53 * controller driver (pciehpc). 54 * 55 * Hot removal events are ultimately posted by the PCIe hotplug controller 56 * interrupt handler for hotplug events. Events are posted using the 57 * ndi_post_event interface. 58 */ 59 60 #include <sys/conf.h> 61 #include <sys/modctl.h> 62 #include <sys/file.h> 63 #include <sys/pci_impl.h> 64 #include <sys/pcie_impl.h> 65 #include <sys/sysmacros.h> 66 #include <sys/ddi_intr.h> 67 #include <sys/sunndi.h> 68 #include <sys/sunddi.h> 69 #include <sys/ddifm.h> 70 #include <sys/ndifm.h> 71 #include <sys/fm/util.h> 72 #include <sys/hotplug/pci/pcie_hp.h> 73 #include <io/pci/pci_tools_ext.h> 74 #include <io/pci/pci_common.h> 75 #include <io/pciex/pcie_nvidia.h> 76 77 /* 78 * Helper Macros 79 */ 80 #define NPE_IS_HANDLE_FOR_STDCFG_ACC(hp) \ 81 ((hp) != NULL && \ 82 ((ddi_acc_hdl_t *)(hp))->ah_platform_private != NULL && \ 83 (((ddi_acc_impl_t *)((ddi_acc_hdl_t *)(hp))-> \ 84 ah_platform_private)-> \ 85 ahi_acc_attr &(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_CONFIG_SPACE)) \ 86 == DDI_ACCATTR_CONFIG_SPACE) 87 88 /* 89 * Bus Operation functions 90 */ 91 static int npe_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *, 92 off_t, off_t, caddr_t *); 93 static int npe_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, 94 void *, void *); 95 static int npe_intr_ops(dev_info_t *, dev_info_t *, ddi_intr_op_t, 96 ddi_intr_handle_impl_t *, void *); 97 static int npe_fm_init(dev_info_t *, dev_info_t *, int, 98 ddi_iblock_cookie_t *); 99 static int npe_bus_get_eventcookie(dev_info_t *, dev_info_t *, char *, 100 ddi_eventcookie_t *); 101 static int npe_bus_add_eventcall(dev_info_t *, dev_info_t *, 102 ddi_eventcookie_t, void (*)(dev_info_t *, 103 ddi_eventcookie_t, void *, void *), 104 void *, ddi_callback_id_t *); 105 static int npe_bus_remove_eventcall(dev_info_t *, ddi_callback_id_t); 106 static int npe_bus_post_event(dev_info_t *, dev_info_t *, 107 ddi_eventcookie_t, void *); 108 109 static int npe_fm_callback(dev_info_t *, ddi_fm_error_t *, const void *); 110 111 /* 112 * Disable URs and Received MA for all PCIe devices. Until x86 SW is changed so 113 * that random drivers do not do PIO accesses on devices that it does not own, 114 * these error bits must be disabled. SERR must also be disabled if URs have 115 * been masked. 116 */ 117 uint32_t npe_aer_uce_mask = PCIE_AER_UCE_UR; 118 uint32_t npe_aer_ce_mask = 0; 119 uint32_t npe_aer_suce_mask = PCIE_AER_SUCE_RCVD_MA; 120 121 struct bus_ops npe_bus_ops = { 122 BUSO_REV, 123 npe_bus_map, 124 NULL, 125 NULL, 126 NULL, 127 i_ddi_map_fault, 128 NULL, 129 ddi_dma_allochdl, 130 ddi_dma_freehdl, 131 ddi_dma_bindhdl, 132 ddi_dma_unbindhdl, 133 ddi_dma_flush, 134 ddi_dma_win, 135 ddi_dma_mctl, 136 npe_ctlops, 137 ddi_bus_prop_op, 138 npe_bus_get_eventcookie, 139 npe_bus_add_eventcall, 140 npe_bus_remove_eventcall, 141 npe_bus_post_event, 142 0, /* (*bus_intr_ctl)(); */ 143 0, /* (*bus_config)(); */ 144 0, /* (*bus_unconfig)(); */ 145 npe_fm_init, /* (*bus_fm_init)(); */ 146 NULL, /* (*bus_fm_fini)(); */ 147 NULL, /* (*bus_fm_access_enter)(); */ 148 NULL, /* (*bus_fm_access_exit)(); */ 149 NULL, /* (*bus_power)(); */ 150 npe_intr_ops, /* (*bus_intr_op)(); */ 151 pcie_hp_common_ops /* (*bus_hp_op)(); */ 152 }; 153 154 static int npe_open(dev_t *, int, int, cred_t *); 155 static int npe_close(dev_t, int, int, cred_t *); 156 static int npe_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 157 158 struct cb_ops npe_cb_ops = { 159 npe_open, /* open */ 160 npe_close, /* close */ 161 nodev, /* strategy */ 162 nodev, /* print */ 163 nodev, /* dump */ 164 nodev, /* read */ 165 nodev, /* write */ 166 npe_ioctl, /* ioctl */ 167 nodev, /* devmap */ 168 nodev, /* mmap */ 169 nodev, /* segmap */ 170 nochpoll, /* poll */ 171 pcie_prop_op, /* cb_prop_op */ 172 NULL, /* streamtab */ 173 D_NEW | D_MP | D_HOTPLUG, /* Driver compatibility flag */ 174 CB_REV, /* rev */ 175 nodev, /* int (*cb_aread)() */ 176 nodev /* int (*cb_awrite)() */ 177 }; 178 179 180 /* 181 * Device Node Operation functions 182 */ 183 static int npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); 184 static int npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); 185 static int npe_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 186 187 struct dev_ops npe_ops = { 188 DEVO_REV, /* devo_rev */ 189 0, /* refcnt */ 190 npe_info, /* info */ 191 nulldev, /* identify */ 192 nulldev, /* probe */ 193 npe_attach, /* attach */ 194 npe_detach, /* detach */ 195 nulldev, /* reset */ 196 &npe_cb_ops, /* driver operations */ 197 &npe_bus_ops, /* bus operations */ 198 NULL, /* power */ 199 ddi_quiesce_not_needed, /* quiesce */ 200 }; 201 202 /* 203 * Internal routines in support of particular npe_ctlops. 204 */ 205 static int npe_removechild(dev_info_t *child); 206 static int npe_initchild(dev_info_t *child); 207 208 /* 209 * External support routine 210 */ 211 extern void npe_ck804_fix_aer_ptr(ddi_acc_handle_t cfg_hdl); 212 extern int npe_disable_empty_bridges_workaround(dev_info_t *child); 213 extern void npe_nvidia_error_workaround(ddi_acc_handle_t cfg_hdl); 214 extern void npe_intel_error_workaround(ddi_acc_handle_t cfg_hdl); 215 extern boolean_t npe_is_mmcfg_supported(dev_info_t *dip); 216 extern void npe_enable_htmsi_children(dev_info_t *dip); 217 extern int npe_save_htconfig_children(dev_info_t *dip); 218 extern int npe_restore_htconfig_children(dev_info_t *dip); 219 220 /* 221 * Module linkage information for the kernel. 222 */ 223 static struct modldrv modldrv = { 224 &mod_driverops, /* Type of module */ 225 "Host to PCIe nexus driver", /* Name of module */ 226 &npe_ops, /* driver ops */ 227 }; 228 229 static struct modlinkage modlinkage = { 230 MODREV_1, 231 (void *)&modldrv, 232 NULL 233 }; 234 235 /* Save minimal state. */ 236 void *npe_statep; 237 238 int 239 _init(void) 240 { 241 int e; 242 243 /* 244 * Initialize per-pci bus soft state pointer. 245 */ 246 e = ddi_soft_state_init(&npe_statep, sizeof (pci_state_t), 1); 247 if (e != 0) 248 return (e); 249 250 if ((e = mod_install(&modlinkage)) != 0) 251 ddi_soft_state_fini(&npe_statep); 252 253 return (e); 254 } 255 256 257 int 258 _fini(void) 259 { 260 int rc; 261 262 rc = mod_remove(&modlinkage); 263 if (rc != 0) 264 return (rc); 265 266 ddi_soft_state_fini(&npe_statep); 267 return (rc); 268 } 269 270 271 int 272 _info(struct modinfo *modinfop) 273 { 274 return (mod_info(&modlinkage, modinfop)); 275 } 276 277 /*ARGSUSED*/ 278 static int 279 npe_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 280 { 281 minor_t minor = getminor((dev_t)arg); 282 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 283 pci_state_t *pcip = ddi_get_soft_state(npe_statep, instance); 284 int ret = DDI_SUCCESS; 285 286 switch (cmd) { 287 case DDI_INFO_DEVT2INSTANCE: 288 *result = (void *)(intptr_t)instance; 289 break; 290 case DDI_INFO_DEVT2DEVINFO: 291 if (pcip == NULL) { 292 ret = DDI_FAILURE; 293 break; 294 } 295 296 *result = (void *)pcip->pci_dip; 297 break; 298 default: 299 ret = DDI_FAILURE; 300 break; 301 } 302 303 return (ret); 304 } 305 306 /* 307 * See big theory statement at the top of this file for more information about 308 * surprise removal events. 309 */ 310 #define NPE_EVENT_TAG_HOT_REMOVAL 0 311 static ndi_event_definition_t npe_ndi_event_defs[1] = { 312 {NPE_EVENT_TAG_HOT_REMOVAL, DDI_DEVI_REMOVE_EVENT, EPL_KERNEL, 313 NDI_EVENT_POST_TO_ALL} 314 }; 315 316 static ndi_event_set_t npe_ndi_events = { 317 NDI_EVENTS_REV1, ARRAY_SIZE(npe_ndi_event_defs), npe_ndi_event_defs 318 }; 319 320 /*ARGSUSED*/ 321 static int 322 npe_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 323 { 324 int instance = ddi_get_instance(devi); 325 pci_state_t *pcip = NULL; 326 int ret; 327 328 if (cmd == DDI_RESUME) { 329 /* 330 * the system might still be able to resume even if this fails 331 */ 332 (void) npe_restore_htconfig_children(devi); 333 return (DDI_SUCCESS); 334 } 335 336 /* 337 * We must do this here in order to ensure that all top level devices 338 * get their HyperTransport MSI mapping regs programmed first. 339 * "Memory controller" and "hostbridge" class devices are leaf devices 340 * that may affect MSI translation functionality for devices 341 * connected to the same link/bus. 342 * 343 * This will also program HT MSI mapping registers on root buses 344 * devices (basically sitting on an HT bus) that are not dependent 345 * on the aforementioned HT devices for MSI translation. 346 */ 347 npe_enable_htmsi_children(devi); 348 349 if (ddi_prop_update_string(DDI_DEV_T_NONE, devi, "device_type", 350 "pciex") != DDI_PROP_SUCCESS) { 351 cmn_err(CE_WARN, "npe: 'device_type' prop create failed"); 352 } 353 354 if (ddi_soft_state_zalloc(npe_statep, instance) == DDI_SUCCESS) 355 pcip = ddi_get_soft_state(npe_statep, instance); 356 357 if (pcip == NULL) 358 return (DDI_FAILURE); 359 360 pcip->pci_dip = devi; 361 pcip->pci_soft_state = PCI_SOFT_STATE_CLOSED; 362 363 if (pcie_init(devi, NULL) != DDI_SUCCESS) 364 goto fail1; 365 366 ret = ndi_event_alloc_hdl(pcip->pci_dip, NULL, &pcip->pci_ndi_event_hdl, 367 NDI_SLEEP); 368 if (ret == NDI_SUCCESS) { 369 ret = ndi_event_bind_set(pcip->pci_ndi_event_hdl, 370 &npe_ndi_events, NDI_SLEEP); 371 if (ret != NDI_SUCCESS) { 372 dev_err(pcip->pci_dip, CE_WARN, "npe: failed to bind " 373 "NDI event set (error=%d)", ret); 374 goto fail1; 375 } 376 } else { 377 dev_err(pcip->pci_dip, CE_WARN, "npe: failed to allocate " 378 "event handle (error=%d)", ret); 379 goto fail1; 380 } 381 382 /* Second arg: initialize for pci_express root nexus */ 383 if (pcitool_init(devi, B_TRUE) != DDI_SUCCESS) 384 goto fail2; 385 386 pcip->pci_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 387 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 388 ddi_fm_init(devi, &pcip->pci_fmcap, &pcip->pci_fm_ibc); 389 390 if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) { 391 ddi_fm_handler_register(devi, npe_fm_callback, NULL); 392 } 393 394 PCIE_DIP2PFD(devi) = kmem_zalloc(sizeof (pf_data_t), KM_SLEEP); 395 pcie_rc_init_pfd(devi, PCIE_DIP2PFD(devi)); 396 397 ddi_report_dev(devi); 398 pcie_fab_init_bus(devi, PCIE_BUS_FINAL); 399 400 return (DDI_SUCCESS); 401 402 fail2: 403 (void) pcie_uninit(devi); 404 fail1: 405 pcie_rc_fini_bus(devi); 406 ddi_soft_state_free(npe_statep, instance); 407 408 return (DDI_FAILURE); 409 } 410 411 /*ARGSUSED*/ 412 static int 413 npe_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 414 { 415 int instance = ddi_get_instance(devi); 416 pci_state_t *pcip; 417 int ret; 418 419 pcip = ddi_get_soft_state(npe_statep, ddi_get_instance(devi)); 420 421 switch (cmd) { 422 case DDI_DETACH: 423 424 /* 425 * Clean up event handling first, to ensure there are no 426 * oustanding callbacks registered. 427 */ 428 ret = ndi_event_unbind_set(pcip->pci_ndi_event_hdl, 429 &npe_ndi_events, NDI_SLEEP); 430 if (ret == NDI_SUCCESS) { 431 /* ndi_event_free_hdl always succeeds. */ 432 (void) ndi_event_free_hdl(pcip->pci_ndi_event_hdl); 433 } else { 434 /* 435 * The event set will only fail to unbind if there are 436 * outstanding callbacks registered for it, which 437 * probably means a child driver still has one 438 * registered and thus was not cleaned up properly 439 * before npe's detach routine was called. Consequently, 440 * we should fail the detach here. 441 */ 442 dev_err(pcip->pci_dip, CE_WARN, "npe: failed to " 443 "unbind NDI event set (error=%d)", ret); 444 return (DDI_FAILURE); 445 } 446 447 pcie_fab_fini_bus(devi, PCIE_BUS_INITIAL); 448 449 /* Uninitialize pcitool support. */ 450 pcitool_uninit(devi); 451 452 if (pcie_uninit(devi) != DDI_SUCCESS) 453 return (DDI_FAILURE); 454 455 if (pcip->pci_fmcap & DDI_FM_ERRCB_CAPABLE) 456 ddi_fm_handler_unregister(devi); 457 458 pcie_rc_fini_pfd(PCIE_DIP2PFD(devi)); 459 kmem_free(PCIE_DIP2PFD(devi), sizeof (pf_data_t)); 460 461 ddi_fm_fini(devi); 462 ddi_soft_state_free(npe_statep, instance); 463 464 return (DDI_SUCCESS); 465 466 case DDI_SUSPEND: 467 /* 468 * the system might still be able to suspend/resume even if 469 * this fails 470 */ 471 (void) npe_save_htconfig_children(devi); 472 return (DDI_SUCCESS); 473 default: 474 return (DDI_FAILURE); 475 } 476 } 477 478 /* 479 * Configure the access handle for standard configuration space 480 * access (see pci_fm_acc_setup for code that initializes the 481 * access-function pointers). 482 */ 483 static int 484 npe_setup_std_pcicfg_acc(dev_info_t *rdip, ddi_map_req_t *mp, 485 ddi_acc_hdl_t *hp, off_t offset, off_t len) 486 { 487 int ret; 488 489 if ((ret = pci_fm_acc_setup(hp, offset, len)) == 490 DDI_SUCCESS) { 491 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 492 mp->map_handlep->ah_acc.devacc_attr_access 493 != DDI_DEFAULT_ACC) { 494 ndi_fmc_insert(rdip, ACC_HANDLE, 495 (void *)mp->map_handlep, NULL); 496 } 497 } 498 return (ret); 499 } 500 501 static int 502 npe_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp, 503 off_t offset, off_t len, caddr_t *vaddrp) 504 { 505 int rnumber; 506 int space; 507 ddi_acc_impl_t *ap; 508 ddi_acc_hdl_t *hp; 509 ddi_map_req_t mr; 510 pci_regspec_t pci_reg; 511 pci_regspec_t *pci_rp; 512 struct regspec64 reg; 513 pci_acc_cfblk_t *cfp; 514 int retval; 515 int64_t *ecfginfo; 516 uint_t nelem; 517 uint64_t pci_rlength; 518 519 mr = *mp; /* Get private copy of request */ 520 mp = &mr; 521 522 /* 523 * check for register number 524 */ 525 switch (mp->map_type) { 526 case DDI_MT_REGSPEC: 527 pci_reg = *(pci_regspec_t *)(mp->map_obj.rp); 528 pci_rp = &pci_reg; 529 if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS) 530 return (DDI_FAILURE); 531 break; 532 case DDI_MT_RNUMBER: 533 rnumber = mp->map_obj.rnumber; 534 /* 535 * get ALL "reg" properties for dip, select the one of 536 * of interest. In x86, "assigned-addresses" property 537 * is identical to the "reg" property, so there is no 538 * need to cross check the two to determine the physical 539 * address of the registers. 540 * This routine still performs some validity checks to 541 * make sure that everything is okay. 542 */ 543 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip, 544 DDI_PROP_DONTPASS, "reg", (int **)&pci_rp, &nelem) != 545 DDI_PROP_SUCCESS) 546 return (DDI_FAILURE); 547 548 /* 549 * validate the register number. 550 */ 551 nelem /= (sizeof (pci_regspec_t) / sizeof (int)); 552 if (rnumber >= nelem) { 553 ddi_prop_free(pci_rp); 554 return (DDI_FAILURE); 555 } 556 557 /* 558 * copy the required entry. 559 */ 560 pci_reg = pci_rp[rnumber]; 561 562 /* 563 * free the memory allocated by ddi_prop_lookup_int_array 564 */ 565 ddi_prop_free(pci_rp); 566 567 pci_rp = &pci_reg; 568 if (pci_common_get_reg_prop(rdip, pci_rp) != DDI_SUCCESS) 569 return (DDI_FAILURE); 570 mp->map_type = DDI_MT_REGSPEC; 571 break; 572 default: 573 return (DDI_ME_INVAL); 574 } 575 576 space = pci_rp->pci_phys_hi & PCI_REG_ADDR_M; 577 578 /* 579 * check for unmap and unlock of address space 580 */ 581 if ((mp->map_op == DDI_MO_UNMAP) || (mp->map_op == DDI_MO_UNLOCK)) { 582 switch (space) { 583 case PCI_ADDR_IO: 584 reg.regspec_bustype = 1; 585 break; 586 587 case PCI_ADDR_CONFIG: 588 /* 589 * If this is an unmap/unlock of a standard config 590 * space mapping (memory-mapped config space mappings 591 * would have the DDI_ACCATTR_CPU_VADDR bit set in the 592 * acc_attr), undo that setup here. 593 */ 594 if (NPE_IS_HANDLE_FOR_STDCFG_ACC(mp->map_handlep)) { 595 596 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 597 mp->map_handlep->ah_acc.devacc_attr_access 598 != DDI_DEFAULT_ACC) { 599 ndi_fmc_remove(rdip, ACC_HANDLE, 600 (void *)mp->map_handlep); 601 } 602 return (DDI_SUCCESS); 603 } 604 605 pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE; 606 607 /* FALLTHROUGH */ 608 case PCI_ADDR_MEM64: 609 case PCI_ADDR_MEM32: 610 reg.regspec_bustype = 0; 611 break; 612 613 default: 614 return (DDI_FAILURE); 615 } 616 617 reg.regspec_addr = (uint64_t)pci_rp->pci_phys_mid << 32 | 618 (uint64_t)pci_rp->pci_phys_low; 619 reg.regspec_size = (uint64_t)pci_rp->pci_size_hi << 32 | 620 (uint64_t)pci_rp->pci_size_low; 621 622 /* 623 * Adjust offset and length 624 * A non-zero length means override the one in the regspec. 625 */ 626 if (reg.regspec_addr + offset < MAX(reg.regspec_addr, offset)) 627 return (DDI_FAILURE); 628 reg.regspec_addr += offset; 629 if (len != 0) 630 reg.regspec_size = len; 631 632 mp->map_obj.rp = (struct regspec *)® 633 mp->map_flags |= DDI_MF_EXT_REGSPEC; 634 retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp); 635 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 636 mp->map_handlep->ah_acc.devacc_attr_access != 637 DDI_DEFAULT_ACC) { 638 ndi_fmc_remove(rdip, ACC_HANDLE, 639 (void *)mp->map_handlep); 640 } 641 return (retval); 642 643 } 644 645 /* check for user mapping request - not legal for Config */ 646 if (mp->map_op == DDI_MO_MAP_HANDLE && space == PCI_ADDR_CONFIG) { 647 cmn_err(CE_NOTE, "npe: Config mapping request from user\n"); 648 return (DDI_FAILURE); 649 } 650 651 652 /* 653 * Note that pci_fm_acc_setup() is called to serve two purposes 654 * i) enable legacy PCI I/O style config space access 655 * ii) register with FMA 656 */ 657 if (space == PCI_ADDR_CONFIG) { 658 659 /* Can't map config space without a handle */ 660 hp = (ddi_acc_hdl_t *)mp->map_handlep; 661 if (hp == NULL) 662 return (DDI_FAILURE); 663 664 /* record the device address for future reference */ 665 cfp = (pci_acc_cfblk_t *)&hp->ah_bus_private; 666 cfp->c_busnum = PCI_REG_BUS_G(pci_rp->pci_phys_hi); 667 cfp->c_devnum = PCI_REG_DEV_G(pci_rp->pci_phys_hi); 668 cfp->c_funcnum = PCI_REG_FUNC_G(pci_rp->pci_phys_hi); 669 670 *vaddrp = (caddr_t)offset; 671 672 /* Check if MMCFG is supported */ 673 if (!npe_is_mmcfg_supported(rdip)) { 674 return (npe_setup_std_pcicfg_acc(rdip, mp, hp, 675 offset, len)); 676 } 677 678 679 if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, rdip, 0, 680 "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) { 681 682 if (nelem != 4 || 683 cfp->c_busnum < ecfginfo[2] || 684 cfp->c_busnum > ecfginfo[3]) { 685 /* 686 * Invalid property or Doesn't contain the 687 * requested bus; fall back to standard 688 * (I/O-based) config access. 689 */ 690 ddi_prop_free(ecfginfo); 691 return (npe_setup_std_pcicfg_acc(rdip, mp, hp, 692 offset, len)); 693 } else { 694 pci_rp->pci_phys_low = ecfginfo[0]; 695 696 ddi_prop_free(ecfginfo); 697 698 pci_rp->pci_phys_low += ((cfp->c_busnum << 20) | 699 (cfp->c_devnum) << 15 | 700 (cfp->c_funcnum << 12)); 701 702 pci_rp->pci_size_low = PCIE_CONF_HDR_SIZE; 703 } 704 } else { 705 /* 706 * Couldn't find the MMCFG property -- fall back to 707 * standard config access 708 */ 709 return (npe_setup_std_pcicfg_acc(rdip, mp, hp, 710 offset, len)); 711 } 712 } 713 714 /* 715 * range check 716 */ 717 pci_rlength = (uint64_t)pci_rp->pci_size_low | 718 (uint64_t)pci_rp->pci_size_hi << 32; 719 if ((offset >= pci_rlength) || (len > pci_rlength) || 720 (offset + len > pci_rlength) || (offset + len < MAX(offset, len))) { 721 return (DDI_FAILURE); 722 } 723 724 /* 725 * convert the pci regsec into the generic regspec used by the 726 * parent root nexus driver. 727 */ 728 switch (space) { 729 case PCI_ADDR_IO: 730 reg.regspec_bustype = 1; 731 break; 732 case PCI_ADDR_CONFIG: 733 case PCI_ADDR_MEM64: 734 case PCI_ADDR_MEM32: 735 reg.regspec_bustype = 0; 736 break; 737 default: 738 return (DDI_FAILURE); 739 } 740 741 reg.regspec_addr = (uint64_t)pci_rp->pci_phys_mid << 32 | 742 (uint64_t)pci_rp->pci_phys_low; 743 reg.regspec_size = pci_rlength; 744 745 /* 746 * Adjust offset and length 747 * A non-zero length means override the one in the regspec. 748 */ 749 if (reg.regspec_addr + offset < MAX(reg.regspec_addr, offset)) 750 return (DDI_FAILURE); 751 reg.regspec_addr += offset; 752 if (len != 0) 753 reg.regspec_size = len; 754 755 756 mp->map_obj.rp = (struct regspec *)® 757 mp->map_flags |= DDI_MF_EXT_REGSPEC; 758 retval = ddi_map(dip, mp, (off_t)0, (off_t)0, vaddrp); 759 if (retval == DDI_SUCCESS) { 760 /* 761 * For config space gets force use of cautious access routines. 762 * These will handle default and protected mode accesses too. 763 */ 764 if (space == PCI_ADDR_CONFIG) { 765 ap = (ddi_acc_impl_t *)mp->map_handlep; 766 ap->ahi_acc_attr &= ~DDI_ACCATTR_DIRECT; 767 ap->ahi_acc_attr |= DDI_ACCATTR_CONFIG_SPACE; 768 ap->ahi_get8 = i_ddi_caut_get8; 769 ap->ahi_get16 = i_ddi_caut_get16; 770 ap->ahi_get32 = i_ddi_caut_get32; 771 ap->ahi_get64 = i_ddi_caut_get64; 772 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 773 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 774 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 775 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 776 } 777 if (DDI_FM_ACC_ERR_CAP(ddi_fm_capable(rdip)) && 778 mp->map_handlep->ah_acc.devacc_attr_access != 779 DDI_DEFAULT_ACC) { 780 ndi_fmc_insert(rdip, ACC_HANDLE, 781 (void *)mp->map_handlep, NULL); 782 } 783 } 784 return (retval); 785 } 786 787 788 789 /*ARGSUSED*/ 790 static int 791 npe_ctlops(dev_info_t *dip, dev_info_t *rdip, 792 ddi_ctl_enum_t ctlop, void *arg, void *result) 793 { 794 int totreg; 795 uint_t reglen; 796 pci_regspec_t *drv_regp; 797 struct attachspec *asp; 798 struct detachspec *dsp; 799 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, 800 ddi_get_instance(dip)); 801 802 switch (ctlop) { 803 case DDI_CTLOPS_REPORTDEV: 804 if (rdip == (dev_info_t *)0) 805 return (DDI_FAILURE); 806 cmn_err(CE_CONT, "?PCI Express-device: %s@%s, %s%d\n", 807 ddi_node_name(rdip), ddi_get_name_addr(rdip), 808 ddi_driver_name(rdip), ddi_get_instance(rdip)); 809 return (DDI_SUCCESS); 810 811 case DDI_CTLOPS_INITCHILD: 812 return (npe_initchild((dev_info_t *)arg)); 813 814 case DDI_CTLOPS_UNINITCHILD: 815 return (npe_removechild((dev_info_t *)arg)); 816 817 case DDI_CTLOPS_SIDDEV: 818 return (DDI_SUCCESS); 819 820 case DDI_CTLOPS_REGSIZE: 821 case DDI_CTLOPS_NREGS: 822 if (rdip == (dev_info_t *)0) 823 return (DDI_FAILURE); 824 825 *(int *)result = 0; 826 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, rdip, 827 DDI_PROP_DONTPASS, "reg", (int **)&drv_regp, 828 ®len) != DDI_PROP_SUCCESS) { 829 return (DDI_FAILURE); 830 } 831 832 totreg = (reglen * sizeof (int)) / sizeof (pci_regspec_t); 833 if (ctlop == DDI_CTLOPS_NREGS) 834 *(int *)result = totreg; 835 else if (ctlop == DDI_CTLOPS_REGSIZE) { 836 uint64_t val; 837 int rn; 838 839 rn = *(int *)arg; 840 if (rn >= totreg) { 841 ddi_prop_free(drv_regp); 842 return (DDI_FAILURE); 843 } 844 val = drv_regp[rn].pci_size_low | 845 (uint64_t)drv_regp[rn].pci_size_hi << 32; 846 if (val > OFF_MAX) { 847 int ce = CE_NOTE; 848 #ifdef DEBUG 849 ce = CE_WARN; 850 #endif 851 dev_err(rdip, ce, "failed to get register " 852 "size, value larger than OFF_MAX: 0x%" 853 PRIx64 "\n", val); 854 return (DDI_FAILURE); 855 } 856 *(off_t *)result = (off_t)val; 857 } 858 ddi_prop_free(drv_regp); 859 860 return (DDI_SUCCESS); 861 862 case DDI_CTLOPS_POWER: 863 { 864 power_req_t *reqp = (power_req_t *)arg; 865 /* 866 * We currently understand reporting of PCI_PM_IDLESPEED 867 * capability. Everything else is passed up. 868 */ 869 if ((reqp->request_type == PMR_REPORT_PMCAP) && 870 (reqp->req.report_pmcap_req.cap == PCI_PM_IDLESPEED)) 871 return (DDI_SUCCESS); 872 873 break; 874 } 875 876 case DDI_CTLOPS_PEEK: 877 case DDI_CTLOPS_POKE: 878 return (pci_common_peekpoke(dip, rdip, ctlop, arg, result)); 879 880 /* X86 systems support PME wakeup from suspended state */ 881 case DDI_CTLOPS_ATTACH: 882 if (!pcie_is_child(dip, rdip)) 883 return (DDI_SUCCESS); 884 885 asp = (struct attachspec *)arg; 886 if ((asp->when == DDI_POST) && (asp->result == DDI_SUCCESS)) { 887 pf_init(rdip, (void *)pci_p->pci_fm_ibc, asp->cmd); 888 (void) pcie_postattach_child(rdip); 889 } 890 891 /* only do this for immediate children */ 892 if (asp->cmd == DDI_RESUME && asp->when == DDI_PRE && 893 ddi_get_parent(rdip) == dip) 894 if (pci_pre_resume(rdip) != DDI_SUCCESS) { 895 /* Not good, better stop now. */ 896 cmn_err(CE_PANIC, 897 "Couldn't pre-resume device %p", 898 (void *) dip); 899 /* NOTREACHED */ 900 } 901 902 return (DDI_SUCCESS); 903 904 case DDI_CTLOPS_DETACH: 905 if (!pcie_is_child(dip, rdip)) 906 return (DDI_SUCCESS); 907 908 dsp = (struct detachspec *)arg; 909 910 if (dsp->when == DDI_PRE) 911 pf_fini(rdip, dsp->cmd); 912 913 /* only do this for immediate children */ 914 if (dsp->cmd == DDI_SUSPEND && dsp->when == DDI_POST && 915 ddi_get_parent(rdip) == dip) 916 if (pci_post_suspend(rdip) != DDI_SUCCESS) 917 return (DDI_FAILURE); 918 919 return (DDI_SUCCESS); 920 921 default: 922 break; 923 } 924 925 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 926 927 } 928 929 930 /* 931 * npe_intr_ops 932 */ 933 static int 934 npe_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op, 935 ddi_intr_handle_impl_t *hdlp, void *result) 936 { 937 return (pci_common_intr_ops(pdip, rdip, intr_op, hdlp, result)); 938 } 939 940 941 static int 942 npe_initchild(dev_info_t *child) 943 { 944 char name[80]; 945 pcie_bus_t *bus_p; 946 uint32_t regs; 947 ddi_acc_handle_t cfg_hdl; 948 949 /* 950 * Do not bind drivers to empty bridges. 951 * Fail above, if the bridge is found to be hotplug capable 952 */ 953 if (npe_disable_empty_bridges_workaround(child) == 1) 954 return (DDI_FAILURE); 955 956 if (pci_common_name_child(child, name, 80) != DDI_SUCCESS) 957 return (DDI_FAILURE); 958 959 ddi_set_name_addr(child, name); 960 961 /* 962 * Pseudo nodes indicate a prototype node with per-instance 963 * properties to be merged into the real h/w device node. 964 * The interpretation of the unit-address is DD[,F] 965 * where DD is the device id and F is the function. 966 */ 967 if (ndi_dev_is_persistent_node(child) == 0) { 968 extern int pci_allow_pseudo_children; 969 970 ddi_set_parent_data(child, NULL); 971 972 /* 973 * Try to merge the properties from this prototype 974 * node into real h/w nodes. 975 */ 976 if (ndi_merge_node(child, pci_common_name_child) == 977 DDI_SUCCESS) { 978 /* 979 * Merged ok - return failure to remove the node. 980 */ 981 ddi_set_name_addr(child, NULL); 982 return (DDI_FAILURE); 983 } 984 985 /* workaround for DDIVS to run under PCI Express */ 986 if (pci_allow_pseudo_children) { 987 /* 988 * If the "interrupts" property doesn't exist, 989 * this must be the ddivs no-intr case, and it returns 990 * DDI_SUCCESS instead of DDI_FAILURE. 991 */ 992 if (ddi_prop_get_int(DDI_DEV_T_ANY, child, 993 DDI_PROP_DONTPASS, "interrupts", -1) == -1) 994 return (DDI_SUCCESS); 995 /* 996 * Create the ddi_parent_private_data for a pseudo 997 * child. 998 */ 999 pci_common_set_parent_private_data(child); 1000 return (DDI_SUCCESS); 1001 } 1002 1003 /* 1004 * The child was not merged into a h/w node, 1005 * but there's not much we can do with it other 1006 * than return failure to cause the node to be removed. 1007 */ 1008 cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged", 1009 ddi_get_name(child), ddi_get_name_addr(child), 1010 ddi_get_name(child)); 1011 ddi_set_name_addr(child, NULL); 1012 return (DDI_NOT_WELL_FORMED); 1013 } 1014 1015 if (ddi_prop_get_int(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS, 1016 "interrupts", -1) != -1) 1017 pci_common_set_parent_private_data(child); 1018 else 1019 ddi_set_parent_data(child, NULL); 1020 1021 /* Disable certain errors on PCIe drivers for x86 platforms */ 1022 regs = pcie_get_aer_uce_mask() | npe_aer_uce_mask; 1023 pcie_set_aer_uce_mask(regs); 1024 regs = pcie_get_aer_ce_mask() | npe_aer_ce_mask; 1025 pcie_set_aer_ce_mask(regs); 1026 regs = pcie_get_aer_suce_mask() | npe_aer_suce_mask; 1027 pcie_set_aer_suce_mask(regs); 1028 1029 /* 1030 * If URs are disabled, mask SERRs as well, otherwise the system will 1031 * still be notified of URs 1032 */ 1033 if (npe_aer_uce_mask & PCIE_AER_UCE_UR) 1034 pcie_set_serr_mask(1); 1035 1036 if (pci_config_setup(child, &cfg_hdl) == DDI_SUCCESS) { 1037 npe_ck804_fix_aer_ptr(cfg_hdl); 1038 npe_nvidia_error_workaround(cfg_hdl); 1039 npe_intel_error_workaround(cfg_hdl); 1040 pci_config_teardown(&cfg_hdl); 1041 } 1042 1043 bus_p = PCIE_DIP2BUS(child); 1044 if (bus_p) { 1045 uint16_t device_id = (uint16_t)(bus_p->bus_dev_ven_id >> 16); 1046 uint16_t vendor_id = (uint16_t)(bus_p->bus_dev_ven_id & 0xFFFF); 1047 uint16_t rev_id = bus_p->bus_rev_id; 1048 1049 /* Disable AER for certain NVIDIA Chipsets */ 1050 if ((vendor_id == NVIDIA_VENDOR_ID) && 1051 (device_id == NVIDIA_CK804_DEVICE_ID) && 1052 (rev_id < NVIDIA_CK804_AER_VALID_REVID)) 1053 bus_p->bus_aer_off = 0; 1054 1055 pcie_init_dom(child); 1056 (void) pcie_initchild(child); 1057 } 1058 1059 return (DDI_SUCCESS); 1060 } 1061 1062 1063 static int 1064 npe_removechild(dev_info_t *dip) 1065 { 1066 pcie_uninitchild(dip); 1067 1068 ddi_set_name_addr(dip, NULL); 1069 1070 /* 1071 * Strip the node to properly convert it back to prototype form 1072 */ 1073 ddi_remove_minor_node(dip, NULL); 1074 1075 ddi_prop_remove_all(dip); 1076 1077 return (DDI_SUCCESS); 1078 } 1079 1080 static int 1081 npe_open(dev_t *devp, int flags, int otyp, cred_t *credp) 1082 { 1083 minor_t minor = getminor(*devp); 1084 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 1085 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, instance); 1086 int rv; 1087 1088 /* 1089 * Make sure the open is for the right file type. 1090 */ 1091 if (otyp != OTYP_CHR) 1092 return (EINVAL); 1093 1094 if (pci_p == NULL) 1095 return (ENXIO); 1096 1097 mutex_enter(&pci_p->pci_mutex); 1098 switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) { 1099 case PCI_TOOL_REG_MINOR_NUM: 1100 case PCI_TOOL_INTR_MINOR_NUM: 1101 break; 1102 default: 1103 /* Handle devctl ioctls */ 1104 rv = pcie_open(pci_p->pci_dip, devp, flags, otyp, credp); 1105 mutex_exit(&pci_p->pci_mutex); 1106 return (rv); 1107 } 1108 1109 /* Handle pcitool ioctls */ 1110 if (flags & FEXCL) { 1111 if (pci_p->pci_soft_state != PCI_SOFT_STATE_CLOSED) { 1112 mutex_exit(&pci_p->pci_mutex); 1113 cmn_err(CE_NOTE, "npe_open: busy"); 1114 return (EBUSY); 1115 } 1116 pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN_EXCL; 1117 } else { 1118 if (pci_p->pci_soft_state == PCI_SOFT_STATE_OPEN_EXCL) { 1119 mutex_exit(&pci_p->pci_mutex); 1120 cmn_err(CE_NOTE, "npe_open: busy"); 1121 return (EBUSY); 1122 } 1123 pci_p->pci_soft_state = PCI_SOFT_STATE_OPEN; 1124 } 1125 mutex_exit(&pci_p->pci_mutex); 1126 1127 return (0); 1128 } 1129 1130 static int 1131 npe_close(dev_t dev, int flags, int otyp, cred_t *credp) 1132 { 1133 minor_t minor = getminor(dev); 1134 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 1135 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, instance); 1136 int rv; 1137 1138 if (pci_p == NULL) 1139 return (ENXIO); 1140 1141 mutex_enter(&pci_p->pci_mutex); 1142 1143 switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) { 1144 case PCI_TOOL_REG_MINOR_NUM: 1145 case PCI_TOOL_INTR_MINOR_NUM: 1146 break; 1147 default: 1148 /* Handle devctl ioctls */ 1149 rv = pcie_close(pci_p->pci_dip, dev, flags, otyp, credp); 1150 mutex_exit(&pci_p->pci_mutex); 1151 return (rv); 1152 } 1153 1154 /* Handle pcitool ioctls */ 1155 pci_p->pci_soft_state = PCI_SOFT_STATE_CLOSED; 1156 mutex_exit(&pci_p->pci_mutex); 1157 return (0); 1158 } 1159 1160 static int 1161 npe_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1162 { 1163 minor_t minor = getminor(dev); 1164 int instance = PCI_MINOR_NUM_TO_INSTANCE(minor); 1165 pci_state_t *pci_p = ddi_get_soft_state(npe_statep, instance); 1166 int ret = ENOTTY; 1167 1168 if (pci_p == NULL) 1169 return (ENXIO); 1170 1171 switch (PCI_MINOR_NUM_TO_PCI_DEVNUM(minor)) { 1172 case PCI_TOOL_REG_MINOR_NUM: 1173 case PCI_TOOL_INTR_MINOR_NUM: 1174 /* To handle pcitool related ioctls */ 1175 ret = pci_common_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, 1176 credp, rvalp); 1177 break; 1178 default: 1179 /* To handle devctl and hotplug related ioctls */ 1180 ret = pcie_ioctl(pci_p->pci_dip, dev, cmd, arg, mode, credp, 1181 rvalp); 1182 break; 1183 } 1184 1185 return (ret); 1186 } 1187 1188 /*ARGSUSED*/ 1189 static int 1190 npe_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap, 1191 ddi_iblock_cookie_t *ibc) 1192 { 1193 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1194 ddi_get_instance(dip)); 1195 1196 ASSERT(ibc != NULL); 1197 *ibc = pcip->pci_fm_ibc; 1198 1199 return (pcip->pci_fmcap); 1200 } 1201 1202 static int 1203 npe_bus_get_eventcookie(dev_info_t *dip, dev_info_t *rdip, char *eventname, 1204 ddi_eventcookie_t *cookiep) 1205 { 1206 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1207 ddi_get_instance(dip)); 1208 1209 return (ndi_event_retrieve_cookie(pcip->pci_ndi_event_hdl, rdip, 1210 eventname, cookiep, NDI_EVENT_NOPASS)); 1211 } 1212 1213 static int 1214 npe_bus_add_eventcall(dev_info_t *dip, dev_info_t *rdip, 1215 ddi_eventcookie_t cookie, void (*callback)(dev_info_t *dip, 1216 ddi_eventcookie_t cookie, void *arg, void *bus_impldata), 1217 void *arg, ddi_callback_id_t *cb_id) 1218 { 1219 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1220 ddi_get_instance(dip)); 1221 1222 return (ndi_event_add_callback(pcip->pci_ndi_event_hdl, rdip, cookie, 1223 callback, arg, NDI_SLEEP, cb_id)); 1224 } 1225 1226 static int 1227 npe_bus_remove_eventcall(dev_info_t *dip, ddi_callback_id_t cb_id) 1228 { 1229 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1230 ddi_get_instance(dip)); 1231 return (ndi_event_remove_callback(pcip->pci_ndi_event_hdl, cb_id)); 1232 } 1233 1234 static int 1235 npe_bus_post_event(dev_info_t *dip, dev_info_t *rdip, 1236 ddi_eventcookie_t cookie, void *impl_data) 1237 { 1238 pci_state_t *pcip = ddi_get_soft_state(npe_statep, 1239 ddi_get_instance(dip)); 1240 return (ndi_event_do_callback(pcip->pci_ndi_event_hdl, rdip, cookie, 1241 impl_data)); 1242 1243 } 1244 1245 /*ARGSUSED*/ 1246 static int 1247 npe_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *no_used) 1248 { 1249 /* 1250 * On current x86 systems, npe's callback does not get called for failed 1251 * loads. If in the future this feature is used, the fault PA should be 1252 * logged in the derr->fme_bus_specific field. The appropriate PCIe 1253 * error handling code should be called and needs to be coordinated with 1254 * safe access handling. 1255 */ 1256 1257 return (DDI_FM_OK); 1258 } 1259