1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/kmem.h> 29 #include <sys/sysmacros.h> 30 #include <sys/sunddi.h> 31 #include <sys/sunpm.h> 32 #include <sys/epm.h> 33 #include <sys/sunndi.h> 34 #include <sys/ddi_impldefs.h> 35 #include <sys/ddi_implfuncs.h> 36 #include <sys/pcie.h> 37 #include <sys/pcie_impl.h> 38 #include <sys/promif.h> /* prom_printf */ 39 #include <sys/pcie_pwr.h> 40 41 #if defined(DEBUG) 42 43 #define DBG pcie_pwr_dbg 44 static void pcie_pwr_dbg(dev_info_t *dip, char *fmt, ...); 45 static uint_t pcie_pwr_print = 0; 46 47 #else /* DEBUG */ 48 49 #define DBG 0 && 50 51 #endif /* DEBUG */ 52 53 /* 54 * This file implements the power management functionality for 55 * pci express switch and pci express-to-pci/pci-x bridge. All the 56 * code in this file is generic and is not specific to a particular chip. 57 * The algorithm, which decides when to go to a lower power is explained 58 * below: 59 * 60 * 1. Initially when no children are attached, the driver is idle from 61 * PM framework point of view ( PM idle/PM busy). 62 * 63 * 2. Driver is PM busy if either a reference count called pwr_hold is 64 * greater than zero or driver is already at the lowest possible power 65 * level. The lowest possible power level for the driver is equal to the 66 * highest power level among its children. The PM busy condition is 67 * indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component 68 * call is made for a nexus driver instance. 69 * 70 * 3. Driver is PM idle if the pwr_hold is zero and the lowest 71 * possible power level is less than the driver's current power level. 72 * At any point, only one pm_idle_component call is made for a nexus 73 * driver instance. 74 * 75 * 4. For any events like child attach, it increments pwr_hold and marks 76 * itslef busy, if it is not already done so. This temporary hold is 77 * removed when the event is complete. 78 * 79 * 5. Any child's power change requires the parent (this driver) to be 80 * full power. So it raises its power and increments pwr_hold. It also 81 * marks itself temporarily busy, if it is not already done. This hold 82 * is removed when the child power change is complete. 83 * 84 * 6. After each child power change, it evaluates what is the lowest 85 * possible power level. If the lowest possible power level is less than 86 * the current power level and pwr_hold is zero, then it marks itself 87 * idle. The lowest power level is equal or greater than the highest level 88 * among the children. It keeps track of children's power level by 89 * using counters. 90 * 91 * 7. Any code e.g., which is accessing the driver's own registers should 92 * place a temporary hold using pcie_pm_hold. 93 */ 94 95 static int pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new); 96 static void pwr_update_counters(int *countersp, int olevel, int nlevel); 97 static int pwr_level_allowed(pcie_pwr_t *pwr_p); 98 static void pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, 99 pcie_pwr_t *pwr_p); 100 static void pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, 101 pcie_pwr_t *pwr_p); 102 static void pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p); 103 static boolean_t pcie_is_pcie(dev_info_t *dip); 104 #ifdef DEBUG 105 static char *pcie_decode_pwr_op(pm_bus_power_op_t op); 106 #else 107 #define pcie_decode_pwr_op 108 #endif 109 110 /* 111 * power entry point. 112 * 113 * This function decides whether the PM request is honorable. 114 * If yes, it then does what's necessary for switch or 115 * bridge to change its power. 116 */ 117 /* ARGSUSED */ 118 int 119 pcie_power(dev_info_t *dip, int component, int level) 120 { 121 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip); 122 int *counters = pwr_p->pwr_counters; 123 int pmcaps = pwr_p->pwr_pmcaps; 124 int ret = DDI_FAILURE; 125 126 #if defined(__i386) || defined(__amd64) 127 if (dip) 128 return (DDI_SUCCESS); 129 #endif /* defined(__i386) || defined(__amd64) */ 130 131 ASSERT(level != PM_LEVEL_UNKNOWN); 132 /* PM should not asking for a level, which is unsupported */ 133 ASSERT(level == PM_LEVEL_D0 || level == PM_LEVEL_D3 || 134 (level == PM_LEVEL_D1 && (pmcaps & PCIE_SUPPORTS_D1)) || 135 (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2))); 136 137 mutex_enter(&pwr_p->pwr_lock); 138 DBG(dip, "pcie_power: change from %d to %d\n", 139 pwr_p->pwr_func_lvl, level); 140 if (pwr_p->pwr_func_lvl == level) { 141 DBG(dip, "pcie_power: already at %d\n", level); 142 ret = DDI_SUCCESS; 143 goto pcie_pwr_done; 144 } 145 146 if (level < pwr_p->pwr_func_lvl) { 147 /* 148 * Going to lower power. Reject this if we are either busy 149 * or there is a hold. 150 */ 151 if (pwr_p->pwr_flags & PCIE_PM_BUSY) { 152 DBG(dip, "pcie_power: rejecting change to %d " 153 "as busy\n", level); 154 goto pcie_pwr_done; 155 } 156 157 /* 158 * Now we know that we are neither busy nor there is a hold. 159 * At this point none of the children should be at full power. 160 * Reject the request if level reqested is lower than the level 161 * possible. 162 */ 163 ASSERT(!counters[PCIE_D0_INDEX] && 164 !counters[PCIE_UNKNOWN_INDEX]); 165 if (level < pwr_level_allowed(pwr_p)) { 166 DBG(dip, "pcie_power: rejecting level %d as" 167 " %d is the lowest possible\n", level, 168 pwr_level_allowed(pwr_p)); 169 goto pcie_pwr_done; 170 } 171 } 172 173 if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) { 174 DBG(dip, "pcie_power: attempt to change to %d " 175 " failed \n", level); 176 goto pcie_pwr_done; 177 } 178 pwr_p->pwr_func_lvl = level; 179 DBG(dip, "pcie_power: level changed to %d \n", level); 180 ret = DDI_SUCCESS; 181 182 pcie_pwr_done: 183 mutex_exit(&pwr_p->pwr_lock); 184 return (ret); 185 } 186 187 /* 188 * Called by pcie_power() only. Caller holds the pwr_lock. 189 * 190 * dip - dev_info pointer 191 * pwr_p - pm info for the node. 192 * new - new level 193 */ 194 static int 195 pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new) 196 { 197 uint16_t pmcsr; 198 199 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 200 ASSERT(new != pwr_p->pwr_func_lvl); 201 pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset); 202 pmcsr &= ~PCI_PMCSR_STATE_MASK; 203 switch (new) { 204 case PM_LEVEL_D0: 205 pmcsr |= PCI_PMCSR_D0; 206 break; 207 208 case PM_LEVEL_D1: 209 pmcsr |= PCI_PMCSR_D1; 210 break; 211 212 case PM_LEVEL_D2: 213 pmcsr |= PCI_PMCSR_D2; 214 break; 215 216 case PM_LEVEL_D3: 217 pmcsr |= PCI_PMCSR_D3HOT; 218 break; 219 220 default: 221 ASSERT(0); 222 break; 223 } 224 /* Save config space, if going to D3 */ 225 if (new == PM_LEVEL_D3) { 226 DBG(dip, "pwr_change: saving config space regs\n"); 227 if (pci_save_config_regs(dip) != DDI_SUCCESS) { 228 DBG(dip, "pcie_pwr_change: failed to save " 229 "config space regs\n"); 230 return (DDI_FAILURE); 231 } 232 } 233 234 pci_config_put16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset, pmcsr); 235 236 /* 237 * TBD: Taken from pci_pci driver. Is this required? 238 * No bus transactions should occur without waiting for 239 * settle time specified in PCI PM spec rev 2.1 sec 5.6.1 240 * To make things simple, just use the max time specified for 241 * all state transitions. 242 */ 243 delay(drv_usectohz(PCI_CLK_SETTLE_TIME)); 244 245 /* 246 * Restore config space if coming out of D3 247 */ 248 if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) { 249 DBG(dip, "pcie_pwr_change: restoring config space\n"); 250 if (pci_restore_config_regs(dip) != DDI_SUCCESS) { 251 DBG(dip, "pcie_pwr_change: failed to restore " 252 "config space regs\n"); 253 return (DDI_FAILURE); 254 } 255 } 256 return (DDI_SUCCESS); 257 } 258 259 /* 260 * bus_ctlops.bus_power function. 261 * 262 * This function handles PRE_ POST_ change notifications, sent by 263 * PM framework related to child's power level change. It marks itself 264 * idle or busy based on the children's power level. 265 */ 266 int 267 pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, 268 void *arg, void *result) 269 { 270 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip); 271 int *counters = pwr_p->pwr_counters; /* nexus counters */ 272 int *child_counters; /* per child dip counters */ 273 pm_bp_child_pwrchg_t *bpc; 274 pm_bp_has_changed_t *bphc; 275 dev_info_t *cdip; 276 int new_level; 277 int old_level; 278 int rv = DDI_SUCCESS; 279 int level_allowed, comp; 280 281 #if defined(__i386) || defined(__amd64) 282 if (dip) 283 return (DDI_SUCCESS); 284 #endif /* defined(__i386) || defined(__amd64) */ 285 286 switch (op) { 287 case BUS_POWER_PRE_NOTIFICATION: 288 case BUS_POWER_POST_NOTIFICATION: 289 bpc = (pm_bp_child_pwrchg_t *)arg; 290 cdip = bpc->bpc_dip; 291 new_level = bpc->bpc_nlevel; 292 old_level = bpc->bpc_olevel; 293 comp = bpc->bpc_comp; 294 break; 295 296 case BUS_POWER_HAS_CHANGED: 297 bphc = (pm_bp_has_changed_t *)arg; 298 cdip = bphc->bphc_dip; 299 new_level = bphc->bphc_nlevel; 300 old_level = bphc->bphc_olevel; 301 comp = bphc->bphc_comp; 302 break; 303 304 default: 305 break; 306 307 } 308 309 ASSERT(pwr_p); 310 mutex_enter(&pwr_p->pwr_lock); 311 switch (op) { 312 case BUS_POWER_PRE_NOTIFICATION: 313 DBG(dip, "pcie_bus_power: %s@%d op %s %d->%d\n", 314 ddi_driver_name(cdip), ddi_get_instance(cdip), 315 pcie_decode_pwr_op(op), old_level, new_level); 316 /* 317 * If the nexus doesn't want the child to go into 318 * non-D0 state, mark the child busy. This way PM 319 * framework will never try to lower the child's power. 320 * In case of pm_lower_power, marking busy won't help. 321 * So we need to specifically reject the attempt to 322 * go to non-D0 state. 323 */ 324 if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) { 325 if (!PCIE_IS_COMPS_COUNTED(cdip)) { 326 DBG(dip, "pcie_bus_power: marking child " 327 "busy to disable pm \n"); 328 (void) pm_busy_component(cdip, 0); 329 } 330 if (new_level < PM_LEVEL_D0 && !comp) { 331 DBG(dip, "pcie_bus_power: rejecting " 332 "child's attempt to go to %d\n", new_level); 333 rv = DDI_FAILURE; 334 } 335 } 336 mutex_exit(&pwr_p->pwr_lock); 337 if (rv == DDI_SUCCESS) 338 rv = pcie_pm_hold(dip); 339 return (rv); 340 341 case BUS_POWER_HAS_CHANGED: 342 case BUS_POWER_POST_NOTIFICATION: 343 DBG(dip, "pcie_bus_power: %s@%d op %s %d->%d\n", 344 ddi_driver_name(cdip), ddi_get_instance(cdip), 345 pcie_decode_pwr_op(op), old_level, new_level); 346 /* 347 * Child device power changed 348 * If pm components of this child aren't accounted for 349 * then add the components to the counters. This can't 350 * be done in POST_ATTACH ctlop as pm info isn't created 351 * by then. Also because a driver can make a pm call during 352 * the attach. 353 */ 354 if (!PCIE_IS_COMPS_COUNTED(cdip)) { 355 (void) pcie_pm_add_child(dip, cdip); 356 if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) && 357 (op == BUS_POWER_HAS_CHANGED)) { 358 DBG(dip, "pcie_bus_power: marking child " 359 "busy to disable pm \n"); 360 (void) pm_busy_component(cdip, 0); 361 /* 362 * If the driver has already changed to lower 363 * power(pm_power_has_changed) on its own, 364 * there is nothing we can do other than 365 * logging the warning message on the console. 366 */ 367 if (new_level < PM_LEVEL_D0) 368 cmn_err(CE_WARN, "!Downstream device " 369 "%s@%d went to non-D0 state: " 370 "possible loss of link\n", 371 ddi_driver_name(cdip), 372 ddi_get_instance(cdip)); 373 } 374 } 375 376 377 /* 378 * If it is POST and device PM is supported, release the 379 * hold done in PRE. 380 */ 381 if (op == BUS_POWER_POST_NOTIFICATION && 382 PCIE_SUPPORTS_DEVICE_PM(dip)) { 383 pcie_pm_subrelease(dip, pwr_p); 384 } 385 386 if (*((int *)result) == DDI_FAILURE) { 387 DBG(dip, "pcie_bus_power: change for %s%d failed\n", 388 ddi_driver_name(cdip), ddi_get_instance(cdip)); 389 break; 390 } 391 /* Modify counters appropriately */ 392 pwr_update_counters(counters, old_level, new_level); 393 394 child_counters = PCIE_CHILD_COUNTERS(cdip); 395 pwr_update_counters(child_counters, old_level, new_level); 396 397 /* If no device PM, return */ 398 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) 399 break; 400 401 level_allowed = pwr_level_allowed(pwr_p); 402 /* 403 * Check conditions for marking busy 404 * Check the flag to set this busy only once for multiple 405 * busy conditions. Mark busy if our current lowest possible 406 * is equal or greater to the current level. 407 */ 408 if (level_allowed >= pwr_p->pwr_func_lvl && 409 !(pwr_p->pwr_flags & PCIE_PM_BUSY)) { 410 DBG(dip, "pcie_bus_power: marking busy\n"); 411 (void) pm_busy_component(dip, 0); 412 pwr_p->pwr_flags |= PCIE_PM_BUSY; 413 break; 414 } 415 /* 416 * Check conditions for marking idle. 417 * If our lowest possible level is less than our current 418 * level mark idle. Mark idle only if it is not already done. 419 */ 420 if ((level_allowed < pwr_p->pwr_func_lvl) && 421 (pwr_p->pwr_hold == 0) && 422 (pwr_p->pwr_flags & PCIE_PM_BUSY)) { 423 /* 424 * For pci express, we should check here whether 425 * the link is in L1 state or not. 426 */ 427 DBG(dip, "pcie_bus_power: marking idle\n"); 428 (void) pm_idle_component(dip, 0); 429 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 430 break; 431 } 432 break; 433 434 default: 435 mutex_exit(&pwr_p->pwr_lock); 436 return (pm_busop_bus_power(dip, impl_arg, op, arg, result)); 437 } 438 mutex_exit(&pwr_p->pwr_lock); 439 return (rv); 440 } 441 442 /* 443 * Decrement the count of children at olevel by one and increment 444 * count of children at nlevel by one. 445 */ 446 static void 447 pwr_update_counters(int *countersp, int olevel, int nlevel) 448 { 449 uint32_t index; 450 451 ASSERT(olevel >= PM_LEVEL_UNKNOWN && olevel <= PM_LEVEL_D0); 452 ASSERT(nlevel >= PM_LEVEL_UNKNOWN && nlevel <= PM_LEVEL_D0); 453 454 index = (olevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : olevel); 455 countersp[index]--; 456 index = (nlevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : nlevel); 457 countersp[index]++; 458 } 459 460 /* 461 * Returns the lowest possible power level allowed for nexus 462 * based on children's power level. Lowest possible level is 463 * equal to the highest level among the children. It also checks 464 * for the supported level 465 * UNKNOWN = D0 > D1 > D2 > D3 466 */ 467 static int 468 pwr_level_allowed(pcie_pwr_t *pwr_p) 469 { 470 int *counters = pwr_p->pwr_counters; 471 int i, j; 472 473 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 474 /* 475 * Search from UNKNOWN to D2. unknown is same as D0. 476 * find the highest level among the children. If that 477 * level is supported, return that level. If not, 478 * find the next higher supported level and return that 479 * level. For example, if the D1 is the highest among 480 * children and if D1 isn't supported return D0 as the 481 * lowest possible level. We don't need to look at D3 482 * as that is the default lowest level and it is always 483 * supported. 484 */ 485 for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) { 486 if (counters[i]) { 487 if (i == PCIE_UNKNOWN_INDEX) 488 return (PM_LEVEL_D0); 489 /* 490 * i is the highest level among children. If this is 491 * supported, return i. 492 */ 493 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, i)) 494 return (i); 495 /* find the next higher supported level */ 496 for (j = i + 1; j <= PCIE_D0_INDEX; j++) { 497 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, j)) 498 return (j); 499 } 500 } 501 } 502 503 return (PM_LEVEL_D3); 504 } 505 506 /* 507 * Update the counters with number pm components of the child 508 * all components are assumed to be at UNKNOWN level. 509 */ 510 static void 511 pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) 512 { 513 int comps = PM_NUMCMPTS(cdip); 514 pcie_pm_t *pcie_pm_p; 515 pcie_pwr_child_t *cpwr_p; 516 517 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 518 if (!comps) 519 return; 520 521 DBG(dip, "pcie_add_comps: unknown level counter incremented " 522 "from %d by %d because of %s@%d\n", 523 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps, 524 ddi_driver_name(cdip), ddi_get_instance(cdip)); 525 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX] += comps; 526 /* 527 * Allocate counters per child. This is a part of pcie 528 * pm info. If there is no pcie pm info, allocate it here. 529 * pcie pm info might already be there for pci express nexus 530 * driver e.g. pcieb. For all leaf nodes, it is allocated here. 531 */ 532 if ((pcie_pm_p = PCIE_PMINFO(cdip)) == NULL) { 533 pcie_pm_p = (pcie_pm_t *)kmem_zalloc( 534 sizeof (pcie_pm_t), KM_SLEEP); 535 PCIE_SET_PMINFO(cdip, pcie_pm_p); 536 } 537 cpwr_p = (pcie_pwr_child_t *)kmem_zalloc(sizeof (pcie_pwr_child_t), 538 KM_SLEEP); 539 pcie_pm_p->pcie_par_pminfo = cpwr_p; 540 (cpwr_p->pwr_child_counters)[PCIE_UNKNOWN_INDEX] += comps; 541 } 542 543 /* 544 * Remove the pm components of a child from our counters. 545 */ 546 static void 547 pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) 548 { 549 int i; 550 int *child_counters; 551 552 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 553 if (!(PCIE_PMINFO(cdip)) || !PCIE_PAR_PMINFO(cdip)) { 554 if (PCIE_SUPPORTS_DEVICE_PM(dip)) { 555 /* 556 * Driver never made a PM call and we didn't create 557 * any counters for this device. This also means that 558 * hold made at the PRE_ATTACH time, still remains. 559 * Remove the hold now. The correct thing to do is to 560 * stay at full power when a child is at full power 561 * whether a driver is there or not. This will be 562 * implemented in the future. 563 */ 564 pcie_pm_subrelease(dip, pwr_p); 565 } 566 return; 567 } 568 DBG(dip, "pcie_remove_comps:counters decremented because of " 569 "%s@%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); 570 child_counters = PCIE_CHILD_COUNTERS(cdip); 571 /* 572 * Adjust the nexus counters. No need to adjust per child dip 573 * counters as we are freeing the per child dip info. 574 */ 575 for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) { 576 ASSERT((pwr_p->pwr_counters)[i] >= child_counters[i]); 577 (pwr_p->pwr_counters)[i] -= child_counters[i]; 578 } 579 /* remove both parent pm info and pcie pminfo itself */ 580 kmem_free(PCIE_PAR_PMINFO(cdip), sizeof (pcie_pwr_child_t)); 581 kmem_free(PCIE_PMINFO(cdip), sizeof (pcie_pm_t)); 582 PCIE_RESET_PMINFO(cdip); 583 } 584 585 /* 586 * Power management related initialization common to px and pcieb 587 */ 588 int 589 pwr_common_setup(dev_info_t *dip) 590 { 591 pcie_pm_t *pcie_pm_p; 592 pcie_pwr_t *pwr_p; 593 int pminfo_created = 0; 594 595 /* Create pminfo, if it doesn't exist already */ 596 if ((pcie_pm_p = PCIE_PMINFO(dip)) == NULL) { 597 pcie_pm_p = (pcie_pm_t *)kmem_zalloc( 598 sizeof (pcie_pm_t), KM_SLEEP); 599 PCIE_SET_PMINFO(dip, pcie_pm_p); 600 pminfo_created = 1; 601 } 602 pwr_p = (pcie_pwr_t *)kmem_zalloc(sizeof (pcie_pwr_t), KM_SLEEP); 603 mutex_init(&pwr_p->pwr_lock, NULL, MUTEX_DRIVER, NULL); 604 /* Initialize the power level and default level support */ 605 pwr_p->pwr_func_lvl = PM_LEVEL_UNKNOWN; 606 pwr_p->pwr_pmcaps = PCIE_DEFAULT_LEVEL_SUPPORTED; 607 608 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 609 "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) { 610 DBG(dip, "can't create pm-want-child-notification \n"); 611 goto pwr_common_err; 612 } 613 pcie_pm_p->pcie_pwr_p = pwr_p; 614 615 return (DDI_SUCCESS); 616 617 pwr_common_err: 618 mutex_destroy(&pwr_p->pwr_lock); 619 kmem_free(pwr_p, sizeof (pcie_pwr_t)); 620 if (pminfo_created) { 621 PCIE_RESET_PMINFO(dip); 622 kmem_free(pcie_pm_p, sizeof (pcie_pm_t)); 623 } 624 return (DDI_FAILURE); 625 626 } 627 628 /* 629 * Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach 630 */ 631 void 632 pwr_common_teardown(dev_info_t *dip) 633 { 634 pcie_pm_t *pcie_pm_p = PCIE_PMINFO(dip); 635 pcie_pwr_t *pwr_p; 636 637 if (!pcie_pm_p || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 638 return; 639 640 (void) ddi_prop_remove(DDI_DEV_T_NONE, dip, 641 "pm-want-child-notification?"); 642 mutex_destroy(&pwr_p->pwr_lock); 643 pcie_pm_p->pcie_pwr_p = NULL; 644 kmem_free(pwr_p, sizeof (pcie_pwr_t)); 645 /* 646 * If the parent didn't store have any pm info about 647 * this node, that means parent doesn't need pminfo when it handles 648 * POST_DETACH for this node. For example, if dip is the dip of 649 * root complex, then there is no parent pm info. 650 */ 651 if (!PCIE_PAR_PMINFO(dip)) { 652 kmem_free(pcie_pm_p, sizeof (pcie_pm_t)); 653 PCIE_RESET_PMINFO(dip); 654 } 655 } 656 657 /* 658 * Raises the power and marks itself busy. 659 */ 660 int 661 pcie_pm_hold(dev_info_t *dip) 662 { 663 pcie_pwr_t *pwr_p; 664 665 /* If no PM info or no device PM, return */ 666 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) || 667 !(PCIE_SUPPORTS_DEVICE_PM(dip))) 668 return (DDI_SUCCESS); 669 670 /* 671 * If we are not at full power, then powerup. 672 * Need to be at full power so that link can be 673 * at L0. Similarly for PCI/PCI-X bus, it should be 674 * at full power. 675 */ 676 mutex_enter(&pwr_p->pwr_lock); 677 ASSERT(pwr_p->pwr_hold >= 0); 678 DBG(dip, "pm_hold: incrementing hold \n"); 679 pwr_p->pwr_hold++; 680 /* Mark itself busy, if it is not done already */ 681 if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) { 682 DBG(dip, "pm_hold: marking busy\n"); 683 pwr_p->pwr_flags |= PCIE_PM_BUSY; 684 (void) pm_busy_component(dip, 0); 685 } 686 if (pwr_p->pwr_func_lvl == PM_LEVEL_D0) { 687 mutex_exit(&pwr_p->pwr_lock); 688 return (DDI_SUCCESS); 689 } 690 mutex_exit(&pwr_p->pwr_lock); 691 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { 692 DBG(dip, "pm_hold: attempt to raise power " 693 "from %d to %d failed\n", pwr_p->pwr_func_lvl, 694 PM_LEVEL_D0); 695 pcie_pm_release(dip); 696 return (DDI_FAILURE); 697 } 698 return (DDI_SUCCESS); 699 } 700 701 /* 702 * Reverse the things done in pcie_pm_hold 703 */ 704 void 705 pcie_pm_release(dev_info_t *dip) 706 { 707 pcie_pwr_t *pwr_p; 708 709 /* If no PM info or no device PM, return */ 710 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) || 711 !(PCIE_SUPPORTS_DEVICE_PM(dip))) 712 return; 713 714 mutex_enter(&pwr_p->pwr_lock); 715 pcie_pm_subrelease(dip, pwr_p); 716 mutex_exit(&pwr_p->pwr_lock); 717 } 718 719 static void 720 pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p) 721 { 722 int level; 723 724 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 725 ASSERT(pwr_p->pwr_hold > 0); 726 DBG(dip, "pm_subrelease: decrementing hold \n"); 727 pwr_p->pwr_hold--; 728 ASSERT(pwr_p->pwr_hold >= 0); 729 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); 730 level = pwr_level_allowed(pwr_p); 731 if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) { 732 DBG(dip, "pm_subrelease: marking idle \n"); 733 (void) pm_idle_component(dip, 0); 734 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 735 } 736 } 737 738 /* 739 * Called when the child makes the first power management call. 740 * sets up the counters. All the components of the child device are 741 * assumed to be at unknown level. It also releases the power hold 742 * pwr_p - parent's pwr_t 743 * cdip - child's dip 744 */ 745 int 746 pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip) 747 { 748 pcie_pwr_t *pwr_p; 749 750 /* If no PM info, return */ 751 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 752 return (DDI_SUCCESS); 753 754 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 755 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0); 756 pcie_add_comps(dip, cdip, pwr_p); 757 758 /* If no device power management then return */ 759 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) 760 return (DDI_SUCCESS); 761 762 /* 763 * We have informed PM that we are busy at PRE_ATTACH time for 764 * this child. Release the hold and but don't clear the busy bit. 765 * If a device never changes power, hold will not be released 766 * and we stay at full power. 767 */ 768 ASSERT(pwr_p->pwr_hold > 0); 769 DBG(dip, "pm_add_child: decrementing hold \n"); 770 pwr_p->pwr_hold--; 771 /* 772 * We must have made sure that busy bit 773 * is set when we put the hold 774 */ 775 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); 776 return (DDI_SUCCESS); 777 } 778 779 /* 780 * Adjust the counters when a child detaches 781 * Marks itself idle if the idle conditions are met. 782 * Called at POST_DETACH time 783 */ 784 int 785 pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip) 786 { 787 int *counters; 788 int total; 789 pcie_pwr_t *pwr_p; 790 791 /* If no PM info, return */ 792 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 793 return (DDI_SUCCESS); 794 795 counters = pwr_p->pwr_counters; 796 mutex_enter(&pwr_p->pwr_lock); 797 pcie_remove_comps(dip, cdip, pwr_p); 798 /* If no device power management then return */ 799 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) { 800 mutex_exit(&pwr_p->pwr_lock); 801 return (DDI_SUCCESS); 802 } 803 total = (counters[PCIE_D0_INDEX] + counters[PCIE_UNKNOWN_INDEX] + 804 counters[PCIE_D1_INDEX] + counters[PCIE_D2_INDEX] + 805 counters[PCIE_D3_INDEX]); 806 /* 807 * Mark idle if either there are no children or our lowest 808 * possible level is less than the current level. Mark idle 809 * only if it is not already done. 810 */ 811 if ((pwr_p->pwr_hold == 0) && 812 (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) { 813 if (pwr_p->pwr_flags & PCIE_PM_BUSY) { 814 DBG(dip, "pcie_bus_power: marking idle\n"); 815 (void) pm_idle_component(dip, 0); 816 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 817 } 818 } 819 mutex_exit(&pwr_p->pwr_lock); 820 return (DDI_SUCCESS); 821 } 822 823 boolean_t 824 pcie_is_pcie(dev_info_t *dip) 825 { 826 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 827 ASSERT(bus_p); 828 return (bus_p->bus_pcie_off != 0); 829 } 830 831 /* 832 * Called by px_attach or pcieb_attach:: DDI_RESUME 833 */ 834 int 835 pcie_pwr_resume(dev_info_t *dip) 836 { 837 dev_info_t *cdip; 838 pcie_pwr_t *pwr_p = NULL; 839 840 #if defined(__i386) || defined(__amd64) 841 if (dip) 842 return (DDI_SUCCESS); 843 #endif /* defined(__i386) || defined(__amd64) */ 844 845 if (PCIE_PMINFO(dip)) 846 pwr_p = PCIE_NEXUS_PMINFO(dip); 847 848 if (pwr_p) { 849 /* Inform the PM framework that dip is at full power */ 850 if (PCIE_SUPPORTS_DEVICE_PM(dip)) { 851 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0); 852 (void) pm_raise_power(dip, 0, 853 pwr_p->pwr_func_lvl); 854 } 855 } 856 857 /* 858 * Code taken from pci driver. 859 * Restore config registers for children that did not save 860 * their own registers. Children pwr states are UNKNOWN after 861 * a resume since it is possible for the PM framework to call 862 * resume without an actual power cycle. (ie if suspend fails). 863 */ 864 for (cdip = ddi_get_child(dip); cdip != NULL; 865 cdip = ddi_get_next_sibling(cdip)) { 866 boolean_t is_pcie; 867 868 /* 869 * Not interested in children who are not already 870 * init'ed. They will be set up by init_child(). 871 */ 872 if (i_ddi_node_state(cdip) < DS_INITIALIZED) { 873 DBG(dip, 874 "DDI_RESUME: skipping %s%d not in CF1\n", 875 ddi_driver_name(cdip), ddi_get_instance(cdip)); 876 continue; 877 } 878 879 /* 880 * Only restore config registers if saved by nexus. 881 */ 882 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 883 "nexus-saved-config-regs") != 1) 884 continue; 885 886 DBG(dip, 887 "DDI_RESUME: nexus restoring %s%d config regs\n", 888 ddi_driver_name(cdip), ddi_get_instance(cdip)); 889 890 /* clear errors left by OBP scrubbing */ 891 pcie_clear_errors(cdip); 892 893 /* PCIe workaround: disable errors during 4K config resore */ 894 if (is_pcie = pcie_is_pcie(cdip)) 895 pcie_disable_errors(cdip); 896 (void) pci_restore_config_regs(cdip); 897 if (is_pcie) { 898 pcie_enable_errors(cdip); 899 (void) pcie_enable_ce(cdip); 900 } 901 902 if (ndi_prop_remove(DDI_DEV_T_NONE, cdip, 903 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { 904 DBG(dip, "%s%d can't remove prop %s", 905 ddi_driver_name(cdip), ddi_get_instance(cdip), 906 "nexus-saved-config-regs"); 907 } 908 } 909 return (DDI_SUCCESS); 910 } 911 912 /* 913 * Called by pcie_detach or pcieb_detach:: DDI_SUSPEND 914 */ 915 int 916 pcie_pwr_suspend(dev_info_t *dip) 917 { 918 dev_info_t *cdip; 919 int i, *counters; /* per nexus counters */ 920 int *child_counters = NULL; /* per child dip counters */ 921 pcie_pwr_t *pwr_p = NULL; 922 923 #if defined(__i386) || defined(__amd64) 924 if (dip) 925 return (DDI_SUCCESS); 926 #endif /* defined(__i386) || defined(__amd64) */ 927 928 if (PCIE_PMINFO(dip)) 929 pwr_p = PCIE_NEXUS_PMINFO(dip); 930 931 /* 932 * Mark all children to be unknown and bring our power level 933 * to full, if required. This is to avoid any panics while 934 * accessing the child's config space. 935 */ 936 if (pwr_p) { 937 mutex_enter(&pwr_p->pwr_lock); 938 if (PCIE_SUPPORTS_DEVICE_PM(dip) && 939 pwr_p->pwr_func_lvl != PM_LEVEL_D0) { 940 mutex_exit(&pwr_p->pwr_lock); 941 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != 942 DDI_SUCCESS) { 943 DBG(dip, "pwr_suspend: attempt " 944 "to raise power from %d to %d " 945 "failed\n", pwr_p->pwr_func_lvl, 946 PM_LEVEL_D0); 947 return (DDI_FAILURE); 948 } 949 mutex_enter(&pwr_p->pwr_lock); 950 } 951 counters = pwr_p->pwr_counters; 952 /* 953 * Update the nexus counters. At the resume time all 954 * components are considered to be at unknown level. Use the 955 * fact that counters for unknown level are at the end. 956 */ 957 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) { 958 counters[PCIE_UNKNOWN_INDEX] += counters[i]; 959 counters[i] = 0; 960 } 961 mutex_exit(&pwr_p->pwr_lock); 962 } 963 964 /* 965 * Code taken from pci driver. 966 * Save the state of the configuration headers of child 967 * nodes. 968 */ 969 for (cdip = ddi_get_child(dip); cdip != NULL; 970 cdip = ddi_get_next_sibling(cdip)) { 971 boolean_t is_pcie; 972 973 /* 974 * Not interested in children who are not already 975 * init'ed. They will be set up in init_child(). 976 */ 977 if (i_ddi_node_state(cdip) < DS_INITIALIZED) { 978 DBG(dip, "DDI_SUSPEND: skipping " 979 "%s%d not in CF1\n", ddi_driver_name(cdip), 980 ddi_get_instance(cdip)); 981 continue; 982 } 983 /* 984 * Update per child dip counters, if any. Counters 985 * will not exist if the child is not power manageable 986 * or if its power entry is never invoked. 987 */ 988 if (PCIE_PMINFO(cdip) && PCIE_PAR_PMINFO(cdip)) 989 child_counters = PCIE_CHILD_COUNTERS(cdip); 990 if (child_counters && pwr_p) { 991 mutex_enter(&pwr_p->pwr_lock); 992 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) { 993 child_counters[PCIE_UNKNOWN_INDEX] += 994 child_counters[i]; 995 child_counters[i] = 0; 996 } 997 mutex_exit(&pwr_p->pwr_lock); 998 } 999 1000 /* 1001 * Only save config registers if not already saved by child. 1002 */ 1003 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1004 SAVED_CONFIG_REGS) == 1) { 1005 continue; 1006 } 1007 1008 /* 1009 * The nexus needs to save config registers. Create a property 1010 * so it knows to restore on resume. 1011 */ 1012 if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip, 1013 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { 1014 DBG(dip, "%s%d can't update prop %s", 1015 ddi_driver_name(cdip), ddi_get_instance(cdip), 1016 "nexus-saved-config-regs"); 1017 } 1018 DBG(dip, "DDI_SUSPEND: saving config space for" 1019 " %s%d\n", ddi_driver_name(cdip), ddi_get_instance(cdip)); 1020 1021 /* PCIe workaround: disable errors during 4K config save */ 1022 if (is_pcie = pcie_is_pcie(cdip)) 1023 pcie_disable_errors(cdip); 1024 (void) pci_save_config_regs(cdip); 1025 if (is_pcie) { 1026 pcie_enable_errors(cdip); 1027 (void) pcie_enable_ce(cdip); 1028 } 1029 } 1030 return (DDI_SUCCESS); 1031 } 1032 1033 #ifdef DEBUG 1034 /* 1035 * Description of bus_power_op. 1036 */ 1037 typedef struct pcie_buspwr_desc { 1038 pm_bus_power_op_t pwr_op; 1039 char *pwr_desc; 1040 } pcie_buspwr_desc_t; 1041 1042 static pcie_buspwr_desc_t pcie_buspwr_desc[] = { 1043 {BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"}, 1044 {BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"}, 1045 {BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"}, 1046 {BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"}, 1047 {BUS_POWER_HAS_CHANGED, "HAS_CHANGED"}, 1048 {BUS_POWER_NOINVOL, "NOINVOL"}, 1049 {-1, NULL} 1050 }; 1051 1052 /* 1053 * Returns description of the bus_power_op. 1054 */ 1055 static char * 1056 pcie_decode_pwr_op(pm_bus_power_op_t op) 1057 { 1058 pcie_buspwr_desc_t *descp = pcie_buspwr_desc; 1059 1060 for (; descp->pwr_desc; descp++) { 1061 if (op == descp->pwr_op) 1062 return (descp->pwr_desc); 1063 } 1064 return ("UNKNOWN OP"); 1065 } 1066 1067 static void 1068 pcie_pwr_dbg(dev_info_t *dip, char *fmt, ...) 1069 { 1070 va_list ap; 1071 if (!pcie_pwr_print) 1072 return; 1073 1074 if (dip) 1075 prom_printf("%s(%d): pcie pwr: ", ddi_driver_name(dip), 1076 ddi_get_instance(dip)); 1077 body: 1078 va_start(ap, fmt); 1079 if (ap) 1080 prom_vprintf(fmt, ap); 1081 else 1082 prom_printf(fmt); 1083 1084 va_end(ap); 1085 } 1086 1087 #endif 1088