1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2019 Joyent, Inc. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/ddi.h> 29 #include <sys/kmem.h> 30 #include <sys/sysmacros.h> 31 #include <sys/sunddi.h> 32 #include <sys/sunpm.h> 33 #include <sys/epm.h> 34 #include <sys/sunndi.h> 35 #include <sys/ddi_impldefs.h> 36 #include <sys/ddi_implfuncs.h> 37 #include <sys/pcie.h> 38 #include <sys/pcie_impl.h> 39 #include <sys/promif.h> /* prom_printf */ 40 #include <sys/pcie_pwr.h> 41 42 /* 43 * This file implements the power management functionality for 44 * pci express switch and pci express-to-pci/pci-x bridge. All the 45 * code in this file is generic and is not specific to a particular chip. 46 * The algorithm, which decides when to go to a lower power is explained 47 * below: 48 * 49 * 1. Initially when no children are attached, the driver is idle from 50 * PM framework point of view ( PM idle/PM busy). 51 * 52 * 2. Driver is PM busy if either a reference count called pwr_hold is 53 * greater than zero or driver is already at the lowest possible power 54 * level. The lowest possible power level for the driver is equal to the 55 * highest power level among its children. The PM busy condition is 56 * indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component 57 * call is made for a nexus driver instance. 58 * 59 * 3. Driver is PM idle if the pwr_hold is zero and the lowest 60 * possible power level is less than the driver's current power level. 61 * At any point, only one pm_idle_component call is made for a nexus 62 * driver instance. 63 * 64 * 4. For any events like child attach, it increments pwr_hold and marks 65 * itslef busy, if it is not already done so. This temporary hold is 66 * removed when the event is complete. 67 * 68 * 5. Any child's power change requires the parent (this driver) to be 69 * full power. So it raises its power and increments pwr_hold. It also 70 * marks itself temporarily busy, if it is not already done. This hold 71 * is removed when the child power change is complete. 72 * 73 * 6. After each child power change, it evaluates what is the lowest 74 * possible power level. If the lowest possible power level is less than 75 * the current power level and pwr_hold is zero, then it marks itself 76 * idle. The lowest power level is equal or greater than the highest level 77 * among the children. It keeps track of children's power level by 78 * using counters. 79 * 80 * 7. Any code e.g., which is accessing the driver's own registers should 81 * place a temporary hold using pcie_pm_hold. 82 */ 83 84 static int pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new); 85 static void pwr_update_counters(int *countersp, int olevel, int nlevel); 86 static int pwr_level_allowed(pcie_pwr_t *pwr_p); 87 static void pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, 88 pcie_pwr_t *pwr_p); 89 static void pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, 90 pcie_pwr_t *pwr_p); 91 static void pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p); 92 static boolean_t pcie_is_pcie(dev_info_t *dip); 93 #ifdef DEBUG 94 static char *pcie_decode_pwr_op(pm_bus_power_op_t op); 95 #else 96 #define pcie_decode_pwr_op 97 #endif 98 99 /* 100 * power entry point. 101 * 102 * This function decides whether the PM request is honorable. 103 * If yes, it then does what's necessary for switch or 104 * bridge to change its power. 105 */ 106 /* ARGSUSED */ 107 int 108 pcie_power(dev_info_t *dip, int component, int level) 109 { 110 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip); 111 int *counters = pwr_p->pwr_counters; 112 int pmcaps = pwr_p->pwr_pmcaps; 113 int ret = DDI_FAILURE; 114 115 #if defined(__i386) || defined(__amd64) 116 if (dip) 117 return (DDI_SUCCESS); 118 #endif /* defined(__i386) || defined(__amd64) */ 119 120 ASSERT(level != PM_LEVEL_UNKNOWN); 121 /* PM should not asking for a level, which is unsupported */ 122 ASSERT(level == PM_LEVEL_D0 || level == PM_LEVEL_D3 || 123 (level == PM_LEVEL_D1 && (pmcaps & PCIE_SUPPORTS_D1)) || 124 (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2))); 125 126 mutex_enter(&pwr_p->pwr_lock); 127 PCIE_DBG("%s(%d): pcie_power: change from %d to %d\n", 128 ddi_driver_name(dip), ddi_get_instance(dip), pwr_p->pwr_func_lvl, 129 level); 130 if (pwr_p->pwr_func_lvl == level) { 131 PCIE_DBG("%s(%d): pcie_power: already at %d\n", 132 ddi_driver_name(dip), ddi_get_instance(dip), level); 133 ret = DDI_SUCCESS; 134 goto pcie_pwr_done; 135 } 136 137 if (level < pwr_p->pwr_func_lvl) { 138 /* 139 * Going to lower power. Reject this if we are either busy 140 * or there is a hold. 141 */ 142 if (pwr_p->pwr_flags & PCIE_PM_BUSY) { 143 PCIE_DBG("%s(%d): pcie_power: rejecting change to %d " 144 "as busy\n", ddi_driver_name(dip), 145 ddi_get_instance(dip), level); 146 goto pcie_pwr_done; 147 } 148 149 /* 150 * Now we know that we are neither busy nor there is a hold. 151 * At this point none of the children should be at full power. 152 * Reject the request if level reqested is lower than the level 153 * possible. 154 */ 155 ASSERT(!counters[PCIE_D0_INDEX] && 156 !counters[PCIE_UNKNOWN_INDEX]); 157 if (level < pwr_level_allowed(pwr_p)) { 158 PCIE_DBG("%s(%d): pcie_power: rejecting level %d as" 159 " %d is the lowest possible\n", 160 ddi_driver_name(dip), ddi_get_instance(dip), level, 161 pwr_level_allowed(pwr_p)); 162 goto pcie_pwr_done; 163 } 164 } 165 166 if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) { 167 PCIE_DBG("%s(%d): pcie_power: attempt to change to %d " 168 " failed \n", ddi_driver_name(dip), ddi_get_instance(dip), 169 level); 170 goto pcie_pwr_done; 171 } 172 pwr_p->pwr_func_lvl = level; 173 PCIE_DBG("%s(%d): pcie_power: level changed to %d \n", 174 ddi_driver_name(dip), ddi_get_instance(dip), level); 175 ret = DDI_SUCCESS; 176 177 pcie_pwr_done: 178 mutex_exit(&pwr_p->pwr_lock); 179 return (ret); 180 } 181 182 /* 183 * Called by pcie_power() only. Caller holds the pwr_lock. 184 * 185 * dip - dev_info pointer 186 * pwr_p - pm info for the node. 187 * new - new level 188 */ 189 static int 190 pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new) 191 { 192 uint16_t pmcsr; 193 194 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 195 ASSERT(new != pwr_p->pwr_func_lvl); 196 pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset); 197 pmcsr &= ~PCI_PMCSR_STATE_MASK; 198 switch (new) { 199 case PM_LEVEL_D0: 200 pmcsr |= PCI_PMCSR_D0; 201 break; 202 203 case PM_LEVEL_D1: 204 pmcsr |= PCI_PMCSR_D1; 205 break; 206 207 case PM_LEVEL_D2: 208 pmcsr |= PCI_PMCSR_D2; 209 break; 210 211 case PM_LEVEL_D3: 212 pmcsr |= PCI_PMCSR_D3HOT; 213 break; 214 215 default: 216 ASSERT(0); 217 break; 218 } 219 /* Save config space, if going to D3 */ 220 if (new == PM_LEVEL_D3) { 221 PCIE_DBG("%s(%d): pwr_change: saving config space regs\n", 222 ddi_driver_name(dip), ddi_get_instance(dip)); 223 if (pci_save_config_regs(dip) != DDI_SUCCESS) { 224 PCIE_DBG("%s(%d): pcie_pwr_change: failed to save " 225 "config space regs\n", ddi_driver_name(dip), 226 ddi_get_instance(dip)); 227 return (DDI_FAILURE); 228 } 229 } 230 231 pci_config_put16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset, pmcsr); 232 233 /* 234 * TBD: Taken from pci_pci driver. Is this required? 235 * No bus transactions should occur without waiting for 236 * settle time specified in PCI PM spec rev 2.1 sec 5.6.1 237 * To make things simple, just use the max time specified for 238 * all state transitions. 239 */ 240 delay(drv_usectohz(PCI_CLK_SETTLE_TIME)); 241 242 /* 243 * Restore config space if coming out of D3 244 */ 245 if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) { 246 PCIE_DBG("%s(%d): pcie_pwr_change: restoring config space\n", 247 ddi_driver_name(dip), ddi_get_instance(dip)); 248 if (pci_restore_config_regs(dip) != DDI_SUCCESS) { 249 PCIE_DBG("%s(%d): pcie_pwr_change: failed to restore " 250 "config space regs\n", ddi_driver_name(dip), 251 ddi_get_instance(dip)); 252 return (DDI_FAILURE); 253 } 254 } 255 return (DDI_SUCCESS); 256 } 257 258 /* 259 * bus_ctlops.bus_power function. 260 * 261 * This function handles PRE_ POST_ change notifications, sent by 262 * PM framework related to child's power level change. It marks itself 263 * idle or busy based on the children's power level. 264 */ 265 int 266 pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, 267 void *arg, void *result) 268 { 269 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip); 270 int *counters = pwr_p->pwr_counters; /* nexus counters */ 271 int *child_counters; /* per child dip counters */ 272 pm_bp_child_pwrchg_t *bpc; 273 pm_bp_has_changed_t *bphc; 274 dev_info_t *cdip; 275 int new_level; 276 int old_level; 277 int rv = DDI_SUCCESS; 278 int level_allowed, comp; 279 280 #if defined(__i386) || defined(__amd64) 281 if (dip) 282 return (DDI_SUCCESS); 283 #endif /* defined(__i386) || defined(__amd64) */ 284 285 switch (op) { 286 case BUS_POWER_PRE_NOTIFICATION: 287 case BUS_POWER_POST_NOTIFICATION: 288 bpc = (pm_bp_child_pwrchg_t *)arg; 289 cdip = bpc->bpc_dip; 290 new_level = bpc->bpc_nlevel; 291 old_level = bpc->bpc_olevel; 292 comp = bpc->bpc_comp; 293 break; 294 295 case BUS_POWER_HAS_CHANGED: 296 bphc = (pm_bp_has_changed_t *)arg; 297 cdip = bphc->bphc_dip; 298 new_level = bphc->bphc_nlevel; 299 old_level = bphc->bphc_olevel; 300 comp = bphc->bphc_comp; 301 break; 302 303 default: 304 break; 305 306 } 307 308 ASSERT(pwr_p); 309 mutex_enter(&pwr_p->pwr_lock); 310 switch (op) { 311 case BUS_POWER_PRE_NOTIFICATION: 312 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n", 313 ddi_driver_name(dip), ddi_get_instance(dip), 314 ddi_driver_name(cdip), ddi_get_instance(cdip), 315 pcie_decode_pwr_op(op), old_level, new_level); 316 /* 317 * If the nexus doesn't want the child to go into 318 * non-D0 state, mark the child busy. This way PM 319 * framework will never try to lower the child's power. 320 * In case of pm_lower_power, marking busy won't help. 321 * So we need to specifically reject the attempt to 322 * go to non-D0 state. 323 */ 324 if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) { 325 if (!PCIE_IS_COMPS_COUNTED(cdip)) { 326 PCIE_DBG("%s(%d): pcie_bus_power: marking " 327 "child busy to disable pm \n", 328 ddi_driver_name(dip), 329 ddi_get_instance(dip)); 330 (void) pm_busy_component(cdip, 0); 331 } 332 if (new_level < PM_LEVEL_D0 && !comp) { 333 PCIE_DBG("%s(%d): pcie_bus_power: rejecting " 334 "child's attempt to go to %d\n", 335 ddi_driver_name(dip), ddi_get_instance(dip), 336 new_level); 337 rv = DDI_FAILURE; 338 } 339 } 340 mutex_exit(&pwr_p->pwr_lock); 341 if (rv == DDI_SUCCESS) 342 rv = pcie_pm_hold(dip); 343 return (rv); 344 345 case BUS_POWER_HAS_CHANGED: 346 case BUS_POWER_POST_NOTIFICATION: 347 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n", 348 ddi_driver_name(dip), ddi_get_instance(dip), 349 ddi_driver_name(cdip), ddi_get_instance(cdip), 350 pcie_decode_pwr_op(op), old_level, new_level); 351 /* 352 * Child device power changed 353 * If pm components of this child aren't accounted for 354 * then add the components to the counters. This can't 355 * be done in POST_ATTACH ctlop as pm info isn't created 356 * by then. Also because a driver can make a pm call during 357 * the attach. 358 */ 359 if (!PCIE_IS_COMPS_COUNTED(cdip)) { 360 (void) pcie_pm_add_child(dip, cdip); 361 if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) && 362 (op == BUS_POWER_HAS_CHANGED)) { 363 PCIE_DBG("%s(%d): pcie_bus_power: marking " 364 "child busy to disable pm \n", 365 ddi_driver_name(dip), 366 ddi_get_instance(dip)); 367 (void) pm_busy_component(cdip, 0); 368 /* 369 * If the driver has already changed to lower 370 * power(pm_power_has_changed) on its own, 371 * there is nothing we can do other than 372 * logging the warning message on the console. 373 */ 374 if (new_level < PM_LEVEL_D0) 375 cmn_err(CE_WARN, "!Downstream device " 376 "%s@%d went to non-D0 state: " 377 "possible loss of link\n", 378 ddi_driver_name(cdip), 379 ddi_get_instance(cdip)); 380 } 381 } 382 383 384 /* 385 * If it is POST and device PM is supported, release the 386 * hold done in PRE. 387 */ 388 if (op == BUS_POWER_POST_NOTIFICATION && 389 PCIE_SUPPORTS_DEVICE_PM(dip)) { 390 pcie_pm_subrelease(dip, pwr_p); 391 } 392 393 if (*((int *)result) == DDI_FAILURE) { 394 PCIE_DBG("%s(%d): pcie_bus_power: change for %s%d " 395 "failed\n", ddi_driver_name(dip), 396 ddi_get_instance(dip), ddi_driver_name(cdip), 397 ddi_get_instance(cdip)); 398 break; 399 } 400 /* Modify counters appropriately */ 401 pwr_update_counters(counters, old_level, new_level); 402 403 child_counters = PCIE_CHILD_COUNTERS(cdip); 404 pwr_update_counters(child_counters, old_level, new_level); 405 406 /* If no device PM, return */ 407 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) 408 break; 409 410 level_allowed = pwr_level_allowed(pwr_p); 411 /* 412 * Check conditions for marking busy 413 * Check the flag to set this busy only once for multiple 414 * busy conditions. Mark busy if our current lowest possible 415 * is equal or greater to the current level. 416 */ 417 if (level_allowed >= pwr_p->pwr_func_lvl && 418 !(pwr_p->pwr_flags & PCIE_PM_BUSY)) { 419 PCIE_DBG("%s(%d): pcie_bus_power: marking busy\n", 420 ddi_driver_name(dip), ddi_get_instance(dip)); 421 (void) pm_busy_component(dip, 0); 422 pwr_p->pwr_flags |= PCIE_PM_BUSY; 423 break; 424 } 425 /* 426 * Check conditions for marking idle. 427 * If our lowest possible level is less than our current 428 * level mark idle. Mark idle only if it is not already done. 429 */ 430 if ((level_allowed < pwr_p->pwr_func_lvl) && 431 (pwr_p->pwr_hold == 0) && 432 (pwr_p->pwr_flags & PCIE_PM_BUSY)) { 433 /* 434 * For pci express, we should check here whether 435 * the link is in L1 state or not. 436 */ 437 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n", 438 ddi_driver_name(dip), ddi_get_instance(dip)); 439 (void) pm_idle_component(dip, 0); 440 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 441 break; 442 } 443 break; 444 445 default: 446 mutex_exit(&pwr_p->pwr_lock); 447 return (pm_busop_bus_power(dip, impl_arg, op, arg, result)); 448 } 449 mutex_exit(&pwr_p->pwr_lock); 450 return (rv); 451 } 452 453 /* 454 * Decrement the count of children at olevel by one and increment 455 * count of children at nlevel by one. 456 */ 457 static void 458 pwr_update_counters(int *countersp, int olevel, int nlevel) 459 { 460 uint32_t index; 461 462 ASSERT(olevel >= PM_LEVEL_UNKNOWN && olevel <= PM_LEVEL_D0); 463 ASSERT(nlevel >= PM_LEVEL_UNKNOWN && nlevel <= PM_LEVEL_D0); 464 465 index = (olevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : olevel); 466 countersp[index]--; 467 index = (nlevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : nlevel); 468 countersp[index]++; 469 } 470 471 /* 472 * Returns the lowest possible power level allowed for nexus 473 * based on children's power level. Lowest possible level is 474 * equal to the highest level among the children. It also checks 475 * for the supported level 476 * UNKNOWN = D0 > D1 > D2 > D3 477 */ 478 static int 479 pwr_level_allowed(pcie_pwr_t *pwr_p) 480 { 481 int *counters = pwr_p->pwr_counters; 482 int i, j; 483 484 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 485 /* 486 * Search from UNKNOWN to D2. unknown is same as D0. 487 * find the highest level among the children. If that 488 * level is supported, return that level. If not, 489 * find the next higher supported level and return that 490 * level. For example, if the D1 is the highest among 491 * children and if D1 isn't supported return D0 as the 492 * lowest possible level. We don't need to look at D3 493 * as that is the default lowest level and it is always 494 * supported. 495 */ 496 for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) { 497 if (counters[i]) { 498 if (i == PCIE_UNKNOWN_INDEX) 499 return (PM_LEVEL_D0); 500 /* 501 * i is the highest level among children. If this is 502 * supported, return i. 503 */ 504 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, i)) 505 return (i); 506 /* find the next higher supported level */ 507 for (j = i + 1; j <= PCIE_D0_INDEX; j++) { 508 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, j)) 509 return (j); 510 } 511 } 512 } 513 514 return (PM_LEVEL_D3); 515 } 516 517 /* 518 * Update the counters with number pm components of the child 519 * all components are assumed to be at UNKNOWN level. 520 */ 521 static void 522 pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) 523 { 524 int comps = PM_NUMCMPTS(cdip); 525 pcie_pm_t *pcie_pm_p; 526 pcie_pwr_child_t *cpwr_p; 527 528 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 529 if (!comps) 530 return; 531 532 PCIE_DBG("%s(%d): pcie_add_comps: unknown level counter incremented " 533 "from %d by %d because of %s@%d\n", 534 ddi_driver_name(dip), ddi_get_instance(dip), 535 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps, 536 ddi_driver_name(cdip), ddi_get_instance(cdip)); 537 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX] += comps; 538 /* 539 * Allocate counters per child. This is a part of pcie 540 * pm info. If there is no pcie pm info, allocate it here. 541 * pcie pm info might already be there for pci express nexus 542 * driver e.g. pcieb. For all leaf nodes, it is allocated here. 543 */ 544 if ((pcie_pm_p = PCIE_PMINFO(cdip)) == NULL) { 545 pcie_pm_p = (pcie_pm_t *)kmem_zalloc( 546 sizeof (pcie_pm_t), KM_SLEEP); 547 PCIE_SET_PMINFO(cdip, pcie_pm_p); 548 } 549 cpwr_p = (pcie_pwr_child_t *)kmem_zalloc(sizeof (pcie_pwr_child_t), 550 KM_SLEEP); 551 pcie_pm_p->pcie_par_pminfo = cpwr_p; 552 (cpwr_p->pwr_child_counters)[PCIE_UNKNOWN_INDEX] += comps; 553 } 554 555 /* 556 * Remove the pm components of a child from our counters. 557 */ 558 static void 559 pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) 560 { 561 int i; 562 int *child_counters; 563 564 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 565 if (!(PCIE_PMINFO(cdip)) || !PCIE_PAR_PMINFO(cdip)) { 566 if (PCIE_SUPPORTS_DEVICE_PM(dip)) { 567 /* 568 * Driver never made a PM call and we didn't create 569 * any counters for this device. This also means that 570 * hold made at the PRE_ATTACH time, still remains. 571 * Remove the hold now. The correct thing to do is to 572 * stay at full power when a child is at full power 573 * whether a driver is there or not. This will be 574 * implemented in the future. 575 */ 576 pcie_pm_subrelease(dip, pwr_p); 577 } 578 return; 579 } 580 PCIE_DBG("%s(%d): pcie_remove_comps:counters decremented because of " 581 "%s@%d\n", ddi_driver_name(dip), ddi_get_instance(dip), 582 ddi_driver_name(cdip), ddi_get_instance(cdip)); 583 child_counters = PCIE_CHILD_COUNTERS(cdip); 584 /* 585 * Adjust the nexus counters. No need to adjust per child dip 586 * counters as we are freeing the per child dip info. 587 */ 588 for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) { 589 ASSERT((pwr_p->pwr_counters)[i] >= child_counters[i]); 590 (pwr_p->pwr_counters)[i] -= child_counters[i]; 591 } 592 /* remove both parent pm info and pcie pminfo itself */ 593 kmem_free(PCIE_PAR_PMINFO(cdip), sizeof (pcie_pwr_child_t)); 594 kmem_free(PCIE_PMINFO(cdip), sizeof (pcie_pm_t)); 595 PCIE_RESET_PMINFO(cdip); 596 } 597 598 /* 599 * Power management related initialization common to px and pcieb 600 */ 601 int 602 pwr_common_setup(dev_info_t *dip) 603 { 604 pcie_pm_t *pcie_pm_p; 605 pcie_pwr_t *pwr_p; 606 int pminfo_created = 0; 607 608 /* Create pminfo, if it doesn't exist already */ 609 if ((pcie_pm_p = PCIE_PMINFO(dip)) == NULL) { 610 pcie_pm_p = (pcie_pm_t *)kmem_zalloc( 611 sizeof (pcie_pm_t), KM_SLEEP); 612 PCIE_SET_PMINFO(dip, pcie_pm_p); 613 pminfo_created = 1; 614 } 615 pwr_p = (pcie_pwr_t *)kmem_zalloc(sizeof (pcie_pwr_t), KM_SLEEP); 616 mutex_init(&pwr_p->pwr_lock, NULL, MUTEX_DRIVER, NULL); 617 /* Initialize the power level and default level support */ 618 pwr_p->pwr_func_lvl = PM_LEVEL_UNKNOWN; 619 pwr_p->pwr_pmcaps = PCIE_DEFAULT_LEVEL_SUPPORTED; 620 621 if (pcie_plat_pwr_setup(dip) != DDI_SUCCESS) 622 goto pwr_common_err; 623 624 pcie_pm_p->pcie_pwr_p = pwr_p; 625 return (DDI_SUCCESS); 626 627 pwr_common_err: 628 mutex_destroy(&pwr_p->pwr_lock); 629 kmem_free(pwr_p, sizeof (pcie_pwr_t)); 630 if (pminfo_created) { 631 PCIE_RESET_PMINFO(dip); 632 kmem_free(pcie_pm_p, sizeof (pcie_pm_t)); 633 } 634 return (DDI_FAILURE); 635 636 } 637 638 /* 639 * Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach 640 */ 641 void 642 pwr_common_teardown(dev_info_t *dip) 643 { 644 pcie_pm_t *pcie_pm_p = PCIE_PMINFO(dip); 645 pcie_pwr_t *pwr_p; 646 647 if (!pcie_pm_p || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 648 return; 649 650 pcie_plat_pwr_teardown(dip); 651 mutex_destroy(&pwr_p->pwr_lock); 652 pcie_pm_p->pcie_pwr_p = NULL; 653 kmem_free(pwr_p, sizeof (pcie_pwr_t)); 654 /* 655 * If the parent didn't store have any pm info about 656 * this node, that means parent doesn't need pminfo when it handles 657 * POST_DETACH for this node. For example, if dip is the dip of 658 * root complex, then there is no parent pm info. 659 */ 660 if (!PCIE_PAR_PMINFO(dip)) { 661 kmem_free(pcie_pm_p, sizeof (pcie_pm_t)); 662 PCIE_RESET_PMINFO(dip); 663 } 664 } 665 666 /* 667 * Raises the power and marks itself busy. 668 */ 669 int 670 pcie_pm_hold(dev_info_t *dip) 671 { 672 pcie_pwr_t *pwr_p; 673 674 /* If no PM info or no device PM, return */ 675 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) || 676 !(PCIE_SUPPORTS_DEVICE_PM(dip))) 677 return (DDI_SUCCESS); 678 679 /* 680 * If we are not at full power, then powerup. 681 * Need to be at full power so that link can be 682 * at L0. Similarly for PCI/PCI-X bus, it should be 683 * at full power. 684 */ 685 mutex_enter(&pwr_p->pwr_lock); 686 ASSERT(pwr_p->pwr_hold >= 0); 687 PCIE_DBG("%s(%d): pm_hold: incrementing hold \n", 688 ddi_driver_name(dip), ddi_get_instance(dip)); 689 pwr_p->pwr_hold++; 690 /* Mark itself busy, if it is not done already */ 691 if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) { 692 PCIE_DBG("%s(%d): pm_hold: marking busy\n", 693 ddi_driver_name(dip), ddi_get_instance(dip)); 694 pwr_p->pwr_flags |= PCIE_PM_BUSY; 695 (void) pm_busy_component(dip, 0); 696 } 697 if (pwr_p->pwr_func_lvl == PM_LEVEL_D0) { 698 mutex_exit(&pwr_p->pwr_lock); 699 return (DDI_SUCCESS); 700 } 701 mutex_exit(&pwr_p->pwr_lock); 702 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { 703 PCIE_DBG("%s(%d): pm_hold: attempt to raise power " 704 "from %d to %d failed\n", ddi_driver_name(dip), 705 ddi_get_instance(dip), pwr_p->pwr_func_lvl, 706 PM_LEVEL_D0); 707 pcie_pm_release(dip); 708 return (DDI_FAILURE); 709 } 710 return (DDI_SUCCESS); 711 } 712 713 /* 714 * Reverse the things done in pcie_pm_hold 715 */ 716 void 717 pcie_pm_release(dev_info_t *dip) 718 { 719 pcie_pwr_t *pwr_p; 720 721 /* If no PM info or no device PM, return */ 722 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) || 723 !(PCIE_SUPPORTS_DEVICE_PM(dip))) 724 return; 725 726 mutex_enter(&pwr_p->pwr_lock); 727 pcie_pm_subrelease(dip, pwr_p); 728 mutex_exit(&pwr_p->pwr_lock); 729 } 730 731 static void 732 pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p) 733 { 734 int level; 735 736 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 737 ASSERT(pwr_p->pwr_hold > 0); 738 PCIE_DBG("%s(%d): pm_subrelease: decrementing hold \n", 739 ddi_driver_name(dip), ddi_get_instance(dip)); 740 pwr_p->pwr_hold--; 741 ASSERT(pwr_p->pwr_hold >= 0); 742 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); 743 level = pwr_level_allowed(pwr_p); 744 if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) { 745 PCIE_DBG("%s(%d): pm_subrelease: marking idle \n", 746 ddi_driver_name(dip), ddi_get_instance(dip)); 747 (void) pm_idle_component(dip, 0); 748 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 749 } 750 } 751 752 /* 753 * Called when the child makes the first power management call. 754 * sets up the counters. All the components of the child device are 755 * assumed to be at unknown level. It also releases the power hold 756 * pwr_p - parent's pwr_t 757 * cdip - child's dip 758 */ 759 int 760 pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip) 761 { 762 pcie_pwr_t *pwr_p; 763 764 /* If no PM info, return */ 765 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 766 return (DDI_SUCCESS); 767 768 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 769 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0); 770 pcie_add_comps(dip, cdip, pwr_p); 771 772 /* If no device power management then return */ 773 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) 774 return (DDI_SUCCESS); 775 776 /* 777 * We have informed PM that we are busy at PRE_ATTACH time for 778 * this child. Release the hold and but don't clear the busy bit. 779 * If a device never changes power, hold will not be released 780 * and we stay at full power. 781 */ 782 ASSERT(pwr_p->pwr_hold > 0); 783 PCIE_DBG("%s(%d): pm_add_child: decrementing hold \n", 784 ddi_driver_name(dip), ddi_get_instance(dip)); 785 pwr_p->pwr_hold--; 786 /* 787 * We must have made sure that busy bit 788 * is set when we put the hold 789 */ 790 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); 791 return (DDI_SUCCESS); 792 } 793 794 /* 795 * Adjust the counters when a child detaches 796 * Marks itself idle if the idle conditions are met. 797 * Called at POST_DETACH time 798 */ 799 int 800 pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip) 801 { 802 int *counters; 803 int total; 804 pcie_pwr_t *pwr_p; 805 806 /* If no PM info, return */ 807 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 808 return (DDI_SUCCESS); 809 810 counters = pwr_p->pwr_counters; 811 mutex_enter(&pwr_p->pwr_lock); 812 pcie_remove_comps(dip, cdip, pwr_p); 813 /* If no device power management then return */ 814 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) { 815 mutex_exit(&pwr_p->pwr_lock); 816 return (DDI_SUCCESS); 817 } 818 total = (counters[PCIE_D0_INDEX] + counters[PCIE_UNKNOWN_INDEX] + 819 counters[PCIE_D1_INDEX] + counters[PCIE_D2_INDEX] + 820 counters[PCIE_D3_INDEX]); 821 /* 822 * Mark idle if either there are no children or our lowest 823 * possible level is less than the current level. Mark idle 824 * only if it is not already done. 825 */ 826 if ((pwr_p->pwr_hold == 0) && 827 (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) { 828 if (pwr_p->pwr_flags & PCIE_PM_BUSY) { 829 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n", 830 ddi_driver_name(dip), ddi_get_instance(dip)); 831 (void) pm_idle_component(dip, 0); 832 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 833 } 834 } 835 mutex_exit(&pwr_p->pwr_lock); 836 return (DDI_SUCCESS); 837 } 838 839 boolean_t 840 pcie_is_pcie(dev_info_t *dip) 841 { 842 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 843 ASSERT(bus_p); 844 return (bus_p->bus_pcie_off != 0); 845 } 846 847 /* 848 * Called by px_attach or pcieb_attach:: DDI_RESUME 849 */ 850 int 851 pcie_pwr_resume(dev_info_t *dip) 852 { 853 dev_info_t *cdip; 854 pcie_pwr_t *pwr_p = NULL; 855 856 #if defined(__i386) || defined(__amd64) 857 if (dip) 858 return (DDI_SUCCESS); 859 #endif /* defined(__i386) || defined(__amd64) */ 860 861 if (PCIE_PMINFO(dip)) 862 pwr_p = PCIE_NEXUS_PMINFO(dip); 863 864 if (pwr_p) { 865 /* Inform the PM framework that dip is at full power */ 866 if (PCIE_SUPPORTS_DEVICE_PM(dip)) { 867 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0); 868 (void) pm_raise_power(dip, 0, 869 pwr_p->pwr_func_lvl); 870 } 871 } 872 873 /* 874 * Code taken from pci driver. 875 * Restore config registers for children that did not save 876 * their own registers. Children pwr states are UNKNOWN after 877 * a resume since it is possible for the PM framework to call 878 * resume without an actual power cycle. (ie if suspend fails). 879 */ 880 for (cdip = ddi_get_child(dip); cdip != NULL; 881 cdip = ddi_get_next_sibling(cdip)) { 882 boolean_t is_pcie; 883 884 /* 885 * Not interested in children who are not already 886 * init'ed. They will be set up by init_child(). 887 */ 888 if (i_ddi_node_state(cdip) < DS_INITIALIZED) { 889 PCIE_DBG("%s(%d): " 890 "DDI_RESUME: skipping %s%d not in CF1\n", 891 ddi_driver_name(dip), ddi_get_instance(dip), 892 ddi_driver_name(cdip), ddi_get_instance(cdip)); 893 continue; 894 } 895 896 /* 897 * Only restore config registers if saved by nexus. 898 */ 899 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 900 "nexus-saved-config-regs") != 1) 901 continue; 902 903 PCIE_DBG("%s(%d): " 904 "DDI_RESUME: nexus restoring %s%d config regs\n", 905 ddi_driver_name(dip), ddi_get_instance(dip), 906 ddi_driver_name(cdip), ddi_get_instance(cdip)); 907 908 /* clear errors left by OBP scrubbing */ 909 pcie_clear_errors(cdip); 910 911 /* PCIe workaround: disable errors during 4K config resore */ 912 is_pcie = pcie_is_pcie(cdip); 913 if (is_pcie) 914 pcie_disable_errors(cdip); 915 (void) pci_restore_config_regs(cdip); 916 if (is_pcie) { 917 pcie_enable_errors(cdip); 918 (void) pcie_enable_ce(cdip); 919 } 920 921 if (ndi_prop_remove(DDI_DEV_T_NONE, cdip, 922 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { 923 PCIE_DBG("%s(%d): %s%d can't remove prop %s", 924 ddi_driver_name(dip), ddi_get_instance(dip), 925 ddi_driver_name(cdip), ddi_get_instance(cdip), 926 "nexus-saved-config-regs"); 927 } 928 } 929 return (DDI_SUCCESS); 930 } 931 932 /* 933 * Called by pcie_detach or pcieb_detach:: DDI_SUSPEND 934 */ 935 int 936 pcie_pwr_suspend(dev_info_t *dip) 937 { 938 dev_info_t *cdip; 939 int i, *counters; /* per nexus counters */ 940 int *child_counters = NULL; /* per child dip counters */ 941 pcie_pwr_t *pwr_p = NULL; 942 943 #if defined(__i386) || defined(__amd64) 944 if (dip) 945 return (DDI_SUCCESS); 946 #endif /* defined(__i386) || defined(__amd64) */ 947 948 if (PCIE_PMINFO(dip)) 949 pwr_p = PCIE_NEXUS_PMINFO(dip); 950 951 /* 952 * Mark all children to be unknown and bring our power level 953 * to full, if required. This is to avoid any panics while 954 * accessing the child's config space. 955 */ 956 if (pwr_p) { 957 mutex_enter(&pwr_p->pwr_lock); 958 if (PCIE_SUPPORTS_DEVICE_PM(dip) && 959 pwr_p->pwr_func_lvl != PM_LEVEL_D0) { 960 mutex_exit(&pwr_p->pwr_lock); 961 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != 962 DDI_SUCCESS) { 963 PCIE_DBG("%s(%d): pwr_suspend: attempt " 964 "to raise power from %d to %d " 965 "failed\n", ddi_driver_name(dip), 966 ddi_get_instance(dip), pwr_p->pwr_func_lvl, 967 PM_LEVEL_D0); 968 return (DDI_FAILURE); 969 } 970 mutex_enter(&pwr_p->pwr_lock); 971 } 972 counters = pwr_p->pwr_counters; 973 /* 974 * Update the nexus counters. At the resume time all 975 * components are considered to be at unknown level. Use the 976 * fact that counters for unknown level are at the end. 977 */ 978 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) { 979 counters[PCIE_UNKNOWN_INDEX] += counters[i]; 980 counters[i] = 0; 981 } 982 mutex_exit(&pwr_p->pwr_lock); 983 } 984 985 /* 986 * Code taken from pci driver. 987 * Save the state of the configuration headers of child 988 * nodes. 989 */ 990 for (cdip = ddi_get_child(dip); cdip != NULL; 991 cdip = ddi_get_next_sibling(cdip)) { 992 boolean_t is_pcie; 993 994 /* 995 * Not interested in children who are not already 996 * init'ed. They will be set up in init_child(). 997 */ 998 if (i_ddi_node_state(cdip) < DS_INITIALIZED) { 999 PCIE_DBG("%s(%d): DDI_SUSPEND: skipping " 1000 "%s%d not in CF1\n", ddi_driver_name(dip), 1001 ddi_get_instance(dip), ddi_driver_name(cdip), 1002 ddi_get_instance(cdip)); 1003 continue; 1004 } 1005 /* 1006 * Update per child dip counters, if any. Counters 1007 * will not exist if the child is not power manageable 1008 * or if its power entry is never invoked. 1009 */ 1010 if (PCIE_PMINFO(cdip) && PCIE_PAR_PMINFO(cdip)) 1011 child_counters = PCIE_CHILD_COUNTERS(cdip); 1012 if (child_counters && pwr_p) { 1013 mutex_enter(&pwr_p->pwr_lock); 1014 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) { 1015 child_counters[PCIE_UNKNOWN_INDEX] += 1016 child_counters[i]; 1017 child_counters[i] = 0; 1018 } 1019 mutex_exit(&pwr_p->pwr_lock); 1020 } 1021 1022 /* 1023 * Only save config registers if not already saved by child. 1024 */ 1025 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1026 SAVED_CONFIG_REGS) == 1) { 1027 continue; 1028 } 1029 1030 /* 1031 * The nexus needs to save config registers. Create a property 1032 * so it knows to restore on resume. 1033 */ 1034 if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip, 1035 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { 1036 PCIE_DBG("%s(%d): %s%d can't update prop %s", 1037 ddi_driver_name(dip), ddi_get_instance(dip), 1038 ddi_driver_name(cdip), ddi_get_instance(cdip), 1039 "nexus-saved-config-regs"); 1040 } 1041 PCIE_DBG("%s(%d): DDI_SUSPEND: saving config space for" 1042 " %s%d\n", ddi_driver_name(dip), ddi_get_instance(dip), 1043 ddi_driver_name(cdip), ddi_get_instance(cdip)); 1044 1045 /* PCIe workaround: disable errors during 4K config save */ 1046 is_pcie = pcie_is_pcie(cdip); 1047 if (is_pcie) 1048 pcie_disable_errors(cdip); 1049 (void) pci_save_config_regs(cdip); 1050 if (is_pcie) { 1051 pcie_enable_errors(cdip); 1052 (void) pcie_enable_ce(cdip); 1053 } 1054 } 1055 return (DDI_SUCCESS); 1056 } 1057 1058 #ifdef DEBUG 1059 /* 1060 * Description of bus_power_op. 1061 */ 1062 typedef struct pcie_buspwr_desc { 1063 pm_bus_power_op_t pwr_op; 1064 char *pwr_desc; 1065 } pcie_buspwr_desc_t; 1066 1067 static pcie_buspwr_desc_t pcie_buspwr_desc[] = { 1068 {BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"}, 1069 {BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"}, 1070 {BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"}, 1071 {BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"}, 1072 {BUS_POWER_HAS_CHANGED, "HAS_CHANGED"}, 1073 {BUS_POWER_NOINVOL, "NOINVOL"}, 1074 {-1, NULL} 1075 }; 1076 1077 /* 1078 * Returns description of the bus_power_op. 1079 */ 1080 static char * 1081 pcie_decode_pwr_op(pm_bus_power_op_t op) 1082 { 1083 pcie_buspwr_desc_t *descp = pcie_buspwr_desc; 1084 1085 for (; descp->pwr_desc; descp++) { 1086 if (op == descp->pwr_op) 1087 return (descp->pwr_desc); 1088 } 1089 return ("UNKNOWN OP"); 1090 } 1091 #endif 1092