1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/kmem.h> 29 #include <sys/sysmacros.h> 30 #include <sys/sunddi.h> 31 #include <sys/sunpm.h> 32 #include <sys/epm.h> 33 #include <sys/sunndi.h> 34 #include <sys/ddi_impldefs.h> 35 #include <sys/ddi_implfuncs.h> 36 #include <sys/pcie.h> 37 #include <sys/pcie_impl.h> 38 #include <sys/promif.h> /* prom_printf */ 39 #include <sys/pcie_pwr.h> 40 41 /* 42 * This file implements the power management functionality for 43 * pci express switch and pci express-to-pci/pci-x bridge. All the 44 * code in this file is generic and is not specific to a particular chip. 45 * The algorithm, which decides when to go to a lower power is explained 46 * below: 47 * 48 * 1. Initially when no children are attached, the driver is idle from 49 * PM framework point of view ( PM idle/PM busy). 50 * 51 * 2. Driver is PM busy if either a reference count called pwr_hold is 52 * greater than zero or driver is already at the lowest possible power 53 * level. The lowest possible power level for the driver is equal to the 54 * highest power level among its children. The PM busy condition is 55 * indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component 56 * call is made for a nexus driver instance. 57 * 58 * 3. Driver is PM idle if the pwr_hold is zero and the lowest 59 * possible power level is less than the driver's current power level. 60 * At any point, only one pm_idle_component call is made for a nexus 61 * driver instance. 62 * 63 * 4. For any events like child attach, it increments pwr_hold and marks 64 * itslef busy, if it is not already done so. This temporary hold is 65 * removed when the event is complete. 66 * 67 * 5. Any child's power change requires the parent (this driver) to be 68 * full power. So it raises its power and increments pwr_hold. It also 69 * marks itself temporarily busy, if it is not already done. This hold 70 * is removed when the child power change is complete. 71 * 72 * 6. After each child power change, it evaluates what is the lowest 73 * possible power level. If the lowest possible power level is less than 74 * the current power level and pwr_hold is zero, then it marks itself 75 * idle. The lowest power level is equal or greater than the highest level 76 * among the children. It keeps track of children's power level by 77 * using counters. 78 * 79 * 7. Any code e.g., which is accessing the driver's own registers should 80 * place a temporary hold using pcie_pm_hold. 81 */ 82 83 static int pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new); 84 static void pwr_update_counters(int *countersp, int olevel, int nlevel); 85 static int pwr_level_allowed(pcie_pwr_t *pwr_p); 86 static void pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, 87 pcie_pwr_t *pwr_p); 88 static void pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, 89 pcie_pwr_t *pwr_p); 90 static void pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p); 91 static boolean_t pcie_is_pcie(dev_info_t *dip); 92 #ifdef DEBUG 93 static char *pcie_decode_pwr_op(pm_bus_power_op_t op); 94 #else 95 #define pcie_decode_pwr_op 96 #endif 97 98 /* 99 * power entry point. 100 * 101 * This function decides whether the PM request is honorable. 102 * If yes, it then does what's necessary for switch or 103 * bridge to change its power. 104 */ 105 /* ARGSUSED */ 106 int 107 pcie_power(dev_info_t *dip, int component, int level) 108 { 109 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip); 110 int *counters = pwr_p->pwr_counters; 111 int pmcaps = pwr_p->pwr_pmcaps; 112 int ret = DDI_FAILURE; 113 114 #if defined(__i386) || defined(__amd64) 115 if (dip) 116 return (DDI_SUCCESS); 117 #endif /* defined(__i386) || defined(__amd64) */ 118 119 ASSERT(level != PM_LEVEL_UNKNOWN); 120 /* PM should not asking for a level, which is unsupported */ 121 ASSERT(level == PM_LEVEL_D0 || level == PM_LEVEL_D3 || 122 (level == PM_LEVEL_D1 && (pmcaps & PCIE_SUPPORTS_D1)) || 123 (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2))); 124 125 mutex_enter(&pwr_p->pwr_lock); 126 PCIE_DBG("%s(%d): pcie_power: change from %d to %d\n", 127 ddi_driver_name(dip), ddi_get_instance(dip), pwr_p->pwr_func_lvl, 128 level); 129 if (pwr_p->pwr_func_lvl == level) { 130 PCIE_DBG("%s(%d): pcie_power: already at %d\n", 131 ddi_driver_name(dip), ddi_get_instance(dip), level); 132 ret = DDI_SUCCESS; 133 goto pcie_pwr_done; 134 } 135 136 if (level < pwr_p->pwr_func_lvl) { 137 /* 138 * Going to lower power. Reject this if we are either busy 139 * or there is a hold. 140 */ 141 if (pwr_p->pwr_flags & PCIE_PM_BUSY) { 142 PCIE_DBG("%s(%d): pcie_power: rejecting change to %d " 143 "as busy\n", ddi_driver_name(dip), 144 ddi_get_instance(dip), level); 145 goto pcie_pwr_done; 146 } 147 148 /* 149 * Now we know that we are neither busy nor there is a hold. 150 * At this point none of the children should be at full power. 151 * Reject the request if level reqested is lower than the level 152 * possible. 153 */ 154 ASSERT(!counters[PCIE_D0_INDEX] && 155 !counters[PCIE_UNKNOWN_INDEX]); 156 if (level < pwr_level_allowed(pwr_p)) { 157 PCIE_DBG("%s(%d): pcie_power: rejecting level %d as" 158 " %d is the lowest possible\n", 159 ddi_driver_name(dip), ddi_get_instance(dip), level, 160 pwr_level_allowed(pwr_p)); 161 goto pcie_pwr_done; 162 } 163 } 164 165 if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) { 166 PCIE_DBG("%s(%d): pcie_power: attempt to change to %d " 167 " failed \n", ddi_driver_name(dip), ddi_get_instance(dip), 168 level); 169 goto pcie_pwr_done; 170 } 171 pwr_p->pwr_func_lvl = level; 172 PCIE_DBG("%s(%d): pcie_power: level changed to %d \n", 173 ddi_driver_name(dip), ddi_get_instance(dip), level); 174 ret = DDI_SUCCESS; 175 176 pcie_pwr_done: 177 mutex_exit(&pwr_p->pwr_lock); 178 return (ret); 179 } 180 181 /* 182 * Called by pcie_power() only. Caller holds the pwr_lock. 183 * 184 * dip - dev_info pointer 185 * pwr_p - pm info for the node. 186 * new - new level 187 */ 188 static int 189 pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new) 190 { 191 uint16_t pmcsr; 192 193 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 194 ASSERT(new != pwr_p->pwr_func_lvl); 195 pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset); 196 pmcsr &= ~PCI_PMCSR_STATE_MASK; 197 switch (new) { 198 case PM_LEVEL_D0: 199 pmcsr |= PCI_PMCSR_D0; 200 break; 201 202 case PM_LEVEL_D1: 203 pmcsr |= PCI_PMCSR_D1; 204 break; 205 206 case PM_LEVEL_D2: 207 pmcsr |= PCI_PMCSR_D2; 208 break; 209 210 case PM_LEVEL_D3: 211 pmcsr |= PCI_PMCSR_D3HOT; 212 break; 213 214 default: 215 ASSERT(0); 216 break; 217 } 218 /* Save config space, if going to D3 */ 219 if (new == PM_LEVEL_D3) { 220 PCIE_DBG("%s(%d): pwr_change: saving config space regs\n", 221 ddi_driver_name(dip), ddi_get_instance(dip)); 222 if (pci_save_config_regs(dip) != DDI_SUCCESS) { 223 PCIE_DBG("%s(%d): pcie_pwr_change: failed to save " 224 "config space regs\n", ddi_driver_name(dip), 225 ddi_get_instance(dip)); 226 return (DDI_FAILURE); 227 } 228 } 229 230 pci_config_put16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset, pmcsr); 231 232 /* 233 * TBD: Taken from pci_pci driver. Is this required? 234 * No bus transactions should occur without waiting for 235 * settle time specified in PCI PM spec rev 2.1 sec 5.6.1 236 * To make things simple, just use the max time specified for 237 * all state transitions. 238 */ 239 delay(drv_usectohz(PCI_CLK_SETTLE_TIME)); 240 241 /* 242 * Restore config space if coming out of D3 243 */ 244 if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) { 245 PCIE_DBG("%s(%d): pcie_pwr_change: restoring config space\n", 246 ddi_driver_name(dip), ddi_get_instance(dip)); 247 if (pci_restore_config_regs(dip) != DDI_SUCCESS) { 248 PCIE_DBG("%s(%d): pcie_pwr_change: failed to restore " 249 "config space regs\n", ddi_driver_name(dip), 250 ddi_get_instance(dip)); 251 return (DDI_FAILURE); 252 } 253 } 254 return (DDI_SUCCESS); 255 } 256 257 /* 258 * bus_ctlops.bus_power function. 259 * 260 * This function handles PRE_ POST_ change notifications, sent by 261 * PM framework related to child's power level change. It marks itself 262 * idle or busy based on the children's power level. 263 */ 264 int 265 pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, 266 void *arg, void *result) 267 { 268 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip); 269 int *counters = pwr_p->pwr_counters; /* nexus counters */ 270 int *child_counters; /* per child dip counters */ 271 pm_bp_child_pwrchg_t *bpc; 272 pm_bp_has_changed_t *bphc; 273 dev_info_t *cdip; 274 int new_level; 275 int old_level; 276 int rv = DDI_SUCCESS; 277 int level_allowed, comp; 278 279 #if defined(__i386) || defined(__amd64) 280 if (dip) 281 return (DDI_SUCCESS); 282 #endif /* defined(__i386) || defined(__amd64) */ 283 284 switch (op) { 285 case BUS_POWER_PRE_NOTIFICATION: 286 case BUS_POWER_POST_NOTIFICATION: 287 bpc = (pm_bp_child_pwrchg_t *)arg; 288 cdip = bpc->bpc_dip; 289 new_level = bpc->bpc_nlevel; 290 old_level = bpc->bpc_olevel; 291 comp = bpc->bpc_comp; 292 break; 293 294 case BUS_POWER_HAS_CHANGED: 295 bphc = (pm_bp_has_changed_t *)arg; 296 cdip = bphc->bphc_dip; 297 new_level = bphc->bphc_nlevel; 298 old_level = bphc->bphc_olevel; 299 comp = bphc->bphc_comp; 300 break; 301 302 default: 303 break; 304 305 } 306 307 ASSERT(pwr_p); 308 mutex_enter(&pwr_p->pwr_lock); 309 switch (op) { 310 case BUS_POWER_PRE_NOTIFICATION: 311 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n", 312 ddi_driver_name(dip), ddi_get_instance(dip), 313 ddi_driver_name(cdip), ddi_get_instance(cdip), 314 pcie_decode_pwr_op(op), old_level, new_level); 315 /* 316 * If the nexus doesn't want the child to go into 317 * non-D0 state, mark the child busy. This way PM 318 * framework will never try to lower the child's power. 319 * In case of pm_lower_power, marking busy won't help. 320 * So we need to specifically reject the attempt to 321 * go to non-D0 state. 322 */ 323 if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) { 324 if (!PCIE_IS_COMPS_COUNTED(cdip)) { 325 PCIE_DBG("%s(%d): pcie_bus_power: marking " 326 "child busy to disable pm \n", 327 ddi_driver_name(dip), 328 ddi_get_instance(dip)); 329 (void) pm_busy_component(cdip, 0); 330 } 331 if (new_level < PM_LEVEL_D0 && !comp) { 332 PCIE_DBG("%s(%d): pcie_bus_power: rejecting " 333 "child's attempt to go to %d\n", 334 ddi_driver_name(dip), ddi_get_instance(dip), 335 new_level); 336 rv = DDI_FAILURE; 337 } 338 } 339 mutex_exit(&pwr_p->pwr_lock); 340 if (rv == DDI_SUCCESS) 341 rv = pcie_pm_hold(dip); 342 return (rv); 343 344 case BUS_POWER_HAS_CHANGED: 345 case BUS_POWER_POST_NOTIFICATION: 346 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n", 347 ddi_driver_name(dip), ddi_get_instance(dip), 348 ddi_driver_name(cdip), ddi_get_instance(cdip), 349 pcie_decode_pwr_op(op), old_level, new_level); 350 /* 351 * Child device power changed 352 * If pm components of this child aren't accounted for 353 * then add the components to the counters. This can't 354 * be done in POST_ATTACH ctlop as pm info isn't created 355 * by then. Also because a driver can make a pm call during 356 * the attach. 357 */ 358 if (!PCIE_IS_COMPS_COUNTED(cdip)) { 359 (void) pcie_pm_add_child(dip, cdip); 360 if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) && 361 (op == BUS_POWER_HAS_CHANGED)) { 362 PCIE_DBG("%s(%d): pcie_bus_power: marking " 363 "child busy to disable pm \n", 364 ddi_driver_name(dip), 365 ddi_get_instance(dip)); 366 (void) pm_busy_component(cdip, 0); 367 /* 368 * If the driver has already changed to lower 369 * power(pm_power_has_changed) on its own, 370 * there is nothing we can do other than 371 * logging the warning message on the console. 372 */ 373 if (new_level < PM_LEVEL_D0) 374 cmn_err(CE_WARN, "!Downstream device " 375 "%s@%d went to non-D0 state: " 376 "possible loss of link\n", 377 ddi_driver_name(cdip), 378 ddi_get_instance(cdip)); 379 } 380 } 381 382 383 /* 384 * If it is POST and device PM is supported, release the 385 * hold done in PRE. 386 */ 387 if (op == BUS_POWER_POST_NOTIFICATION && 388 PCIE_SUPPORTS_DEVICE_PM(dip)) { 389 pcie_pm_subrelease(dip, pwr_p); 390 } 391 392 if (*((int *)result) == DDI_FAILURE) { 393 PCIE_DBG("%s(%d): pcie_bus_power: change for %s%d " 394 "failed\n", ddi_driver_name(dip), 395 ddi_get_instance(dip), ddi_driver_name(cdip), 396 ddi_get_instance(cdip)); 397 break; 398 } 399 /* Modify counters appropriately */ 400 pwr_update_counters(counters, old_level, new_level); 401 402 child_counters = PCIE_CHILD_COUNTERS(cdip); 403 pwr_update_counters(child_counters, old_level, new_level); 404 405 /* If no device PM, return */ 406 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) 407 break; 408 409 level_allowed = pwr_level_allowed(pwr_p); 410 /* 411 * Check conditions for marking busy 412 * Check the flag to set this busy only once for multiple 413 * busy conditions. Mark busy if our current lowest possible 414 * is equal or greater to the current level. 415 */ 416 if (level_allowed >= pwr_p->pwr_func_lvl && 417 !(pwr_p->pwr_flags & PCIE_PM_BUSY)) { 418 PCIE_DBG("%s(%d): pcie_bus_power: marking busy\n", 419 ddi_driver_name(dip), ddi_get_instance(dip)); 420 (void) pm_busy_component(dip, 0); 421 pwr_p->pwr_flags |= PCIE_PM_BUSY; 422 break; 423 } 424 /* 425 * Check conditions for marking idle. 426 * If our lowest possible level is less than our current 427 * level mark idle. Mark idle only if it is not already done. 428 */ 429 if ((level_allowed < pwr_p->pwr_func_lvl) && 430 (pwr_p->pwr_hold == 0) && 431 (pwr_p->pwr_flags & PCIE_PM_BUSY)) { 432 /* 433 * For pci express, we should check here whether 434 * the link is in L1 state or not. 435 */ 436 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n", 437 ddi_driver_name(dip), ddi_get_instance(dip)); 438 (void) pm_idle_component(dip, 0); 439 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 440 break; 441 } 442 break; 443 444 default: 445 mutex_exit(&pwr_p->pwr_lock); 446 return (pm_busop_bus_power(dip, impl_arg, op, arg, result)); 447 } 448 mutex_exit(&pwr_p->pwr_lock); 449 return (rv); 450 } 451 452 /* 453 * Decrement the count of children at olevel by one and increment 454 * count of children at nlevel by one. 455 */ 456 static void 457 pwr_update_counters(int *countersp, int olevel, int nlevel) 458 { 459 uint32_t index; 460 461 ASSERT(olevel >= PM_LEVEL_UNKNOWN && olevel <= PM_LEVEL_D0); 462 ASSERT(nlevel >= PM_LEVEL_UNKNOWN && nlevel <= PM_LEVEL_D0); 463 464 index = (olevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : olevel); 465 countersp[index]--; 466 index = (nlevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : nlevel); 467 countersp[index]++; 468 } 469 470 /* 471 * Returns the lowest possible power level allowed for nexus 472 * based on children's power level. Lowest possible level is 473 * equal to the highest level among the children. It also checks 474 * for the supported level 475 * UNKNOWN = D0 > D1 > D2 > D3 476 */ 477 static int 478 pwr_level_allowed(pcie_pwr_t *pwr_p) 479 { 480 int *counters = pwr_p->pwr_counters; 481 int i, j; 482 483 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 484 /* 485 * Search from UNKNOWN to D2. unknown is same as D0. 486 * find the highest level among the children. If that 487 * level is supported, return that level. If not, 488 * find the next higher supported level and return that 489 * level. For example, if the D1 is the highest among 490 * children and if D1 isn't supported return D0 as the 491 * lowest possible level. We don't need to look at D3 492 * as that is the default lowest level and it is always 493 * supported. 494 */ 495 for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) { 496 if (counters[i]) { 497 if (i == PCIE_UNKNOWN_INDEX) 498 return (PM_LEVEL_D0); 499 /* 500 * i is the highest level among children. If this is 501 * supported, return i. 502 */ 503 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, i)) 504 return (i); 505 /* find the next higher supported level */ 506 for (j = i + 1; j <= PCIE_D0_INDEX; j++) { 507 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, j)) 508 return (j); 509 } 510 } 511 } 512 513 return (PM_LEVEL_D3); 514 } 515 516 /* 517 * Update the counters with number pm components of the child 518 * all components are assumed to be at UNKNOWN level. 519 */ 520 static void 521 pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) 522 { 523 int comps = PM_NUMCMPTS(cdip); 524 pcie_pm_t *pcie_pm_p; 525 pcie_pwr_child_t *cpwr_p; 526 527 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 528 if (!comps) 529 return; 530 531 PCIE_DBG("%s(%d): pcie_add_comps: unknown level counter incremented " 532 "from %d by %d because of %s@%d\n", 533 ddi_driver_name(dip), ddi_get_instance(dip), 534 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps, 535 ddi_driver_name(cdip), ddi_get_instance(cdip)); 536 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX] += comps; 537 /* 538 * Allocate counters per child. This is a part of pcie 539 * pm info. If there is no pcie pm info, allocate it here. 540 * pcie pm info might already be there for pci express nexus 541 * driver e.g. pcieb. For all leaf nodes, it is allocated here. 542 */ 543 if ((pcie_pm_p = PCIE_PMINFO(cdip)) == NULL) { 544 pcie_pm_p = (pcie_pm_t *)kmem_zalloc( 545 sizeof (pcie_pm_t), KM_SLEEP); 546 PCIE_SET_PMINFO(cdip, pcie_pm_p); 547 } 548 cpwr_p = (pcie_pwr_child_t *)kmem_zalloc(sizeof (pcie_pwr_child_t), 549 KM_SLEEP); 550 pcie_pm_p->pcie_par_pminfo = cpwr_p; 551 (cpwr_p->pwr_child_counters)[PCIE_UNKNOWN_INDEX] += comps; 552 } 553 554 /* 555 * Remove the pm components of a child from our counters. 556 */ 557 static void 558 pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p) 559 { 560 int i; 561 int *child_counters; 562 563 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 564 if (!(PCIE_PMINFO(cdip)) || !PCIE_PAR_PMINFO(cdip)) { 565 if (PCIE_SUPPORTS_DEVICE_PM(dip)) { 566 /* 567 * Driver never made a PM call and we didn't create 568 * any counters for this device. This also means that 569 * hold made at the PRE_ATTACH time, still remains. 570 * Remove the hold now. The correct thing to do is to 571 * stay at full power when a child is at full power 572 * whether a driver is there or not. This will be 573 * implemented in the future. 574 */ 575 pcie_pm_subrelease(dip, pwr_p); 576 } 577 return; 578 } 579 PCIE_DBG("%s(%d): pcie_remove_comps:counters decremented because of " 580 "%s@%d\n", ddi_driver_name(dip), ddi_get_instance(dip), 581 ddi_driver_name(cdip), ddi_get_instance(cdip)); 582 child_counters = PCIE_CHILD_COUNTERS(cdip); 583 /* 584 * Adjust the nexus counters. No need to adjust per child dip 585 * counters as we are freeing the per child dip info. 586 */ 587 for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) { 588 ASSERT((pwr_p->pwr_counters)[i] >= child_counters[i]); 589 (pwr_p->pwr_counters)[i] -= child_counters[i]; 590 } 591 /* remove both parent pm info and pcie pminfo itself */ 592 kmem_free(PCIE_PAR_PMINFO(cdip), sizeof (pcie_pwr_child_t)); 593 kmem_free(PCIE_PMINFO(cdip), sizeof (pcie_pm_t)); 594 PCIE_RESET_PMINFO(cdip); 595 } 596 597 /* 598 * Power management related initialization common to px and pcieb 599 */ 600 int 601 pwr_common_setup(dev_info_t *dip) 602 { 603 pcie_pm_t *pcie_pm_p; 604 pcie_pwr_t *pwr_p; 605 int pminfo_created = 0; 606 607 /* Create pminfo, if it doesn't exist already */ 608 if ((pcie_pm_p = PCIE_PMINFO(dip)) == NULL) { 609 pcie_pm_p = (pcie_pm_t *)kmem_zalloc( 610 sizeof (pcie_pm_t), KM_SLEEP); 611 PCIE_SET_PMINFO(dip, pcie_pm_p); 612 pminfo_created = 1; 613 } 614 pwr_p = (pcie_pwr_t *)kmem_zalloc(sizeof (pcie_pwr_t), KM_SLEEP); 615 mutex_init(&pwr_p->pwr_lock, NULL, MUTEX_DRIVER, NULL); 616 /* Initialize the power level and default level support */ 617 pwr_p->pwr_func_lvl = PM_LEVEL_UNKNOWN; 618 pwr_p->pwr_pmcaps = PCIE_DEFAULT_LEVEL_SUPPORTED; 619 620 if (pcie_plat_pwr_setup(dip) != DDI_SUCCESS) 621 goto pwr_common_err; 622 623 pcie_pm_p->pcie_pwr_p = pwr_p; 624 return (DDI_SUCCESS); 625 626 pwr_common_err: 627 mutex_destroy(&pwr_p->pwr_lock); 628 kmem_free(pwr_p, sizeof (pcie_pwr_t)); 629 if (pminfo_created) { 630 PCIE_RESET_PMINFO(dip); 631 kmem_free(pcie_pm_p, sizeof (pcie_pm_t)); 632 } 633 return (DDI_FAILURE); 634 635 } 636 637 /* 638 * Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach 639 */ 640 void 641 pwr_common_teardown(dev_info_t *dip) 642 { 643 pcie_pm_t *pcie_pm_p = PCIE_PMINFO(dip); 644 pcie_pwr_t *pwr_p; 645 646 if (!pcie_pm_p || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 647 return; 648 649 pcie_plat_pwr_teardown(dip); 650 mutex_destroy(&pwr_p->pwr_lock); 651 pcie_pm_p->pcie_pwr_p = NULL; 652 kmem_free(pwr_p, sizeof (pcie_pwr_t)); 653 /* 654 * If the parent didn't store have any pm info about 655 * this node, that means parent doesn't need pminfo when it handles 656 * POST_DETACH for this node. For example, if dip is the dip of 657 * root complex, then there is no parent pm info. 658 */ 659 if (!PCIE_PAR_PMINFO(dip)) { 660 kmem_free(pcie_pm_p, sizeof (pcie_pm_t)); 661 PCIE_RESET_PMINFO(dip); 662 } 663 } 664 665 /* 666 * Raises the power and marks itself busy. 667 */ 668 int 669 pcie_pm_hold(dev_info_t *dip) 670 { 671 pcie_pwr_t *pwr_p; 672 673 /* If no PM info or no device PM, return */ 674 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) || 675 !(PCIE_SUPPORTS_DEVICE_PM(dip))) 676 return (DDI_SUCCESS); 677 678 /* 679 * If we are not at full power, then powerup. 680 * Need to be at full power so that link can be 681 * at L0. Similarly for PCI/PCI-X bus, it should be 682 * at full power. 683 */ 684 mutex_enter(&pwr_p->pwr_lock); 685 ASSERT(pwr_p->pwr_hold >= 0); 686 PCIE_DBG("%s(%d): pm_hold: incrementing hold \n", 687 ddi_driver_name(dip), ddi_get_instance(dip)); 688 pwr_p->pwr_hold++; 689 /* Mark itself busy, if it is not done already */ 690 if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) { 691 PCIE_DBG("%s(%d): pm_hold: marking busy\n", 692 ddi_driver_name(dip), ddi_get_instance(dip)); 693 pwr_p->pwr_flags |= PCIE_PM_BUSY; 694 (void) pm_busy_component(dip, 0); 695 } 696 if (pwr_p->pwr_func_lvl == PM_LEVEL_D0) { 697 mutex_exit(&pwr_p->pwr_lock); 698 return (DDI_SUCCESS); 699 } 700 mutex_exit(&pwr_p->pwr_lock); 701 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) { 702 PCIE_DBG("%s(%d): pm_hold: attempt to raise power " 703 "from %d to %d failed\n", ddi_driver_name(dip), 704 ddi_get_instance(dip), pwr_p->pwr_func_lvl, 705 PM_LEVEL_D0); 706 pcie_pm_release(dip); 707 return (DDI_FAILURE); 708 } 709 return (DDI_SUCCESS); 710 } 711 712 /* 713 * Reverse the things done in pcie_pm_hold 714 */ 715 void 716 pcie_pm_release(dev_info_t *dip) 717 { 718 pcie_pwr_t *pwr_p; 719 720 /* If no PM info or no device PM, return */ 721 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) || 722 !(PCIE_SUPPORTS_DEVICE_PM(dip))) 723 return; 724 725 mutex_enter(&pwr_p->pwr_lock); 726 pcie_pm_subrelease(dip, pwr_p); 727 mutex_exit(&pwr_p->pwr_lock); 728 } 729 730 static void 731 pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p) 732 { 733 int level; 734 735 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 736 ASSERT(pwr_p->pwr_hold > 0); 737 PCIE_DBG("%s(%d): pm_subrelease: decrementing hold \n", 738 ddi_driver_name(dip), ddi_get_instance(dip)); 739 pwr_p->pwr_hold--; 740 ASSERT(pwr_p->pwr_hold >= 0); 741 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); 742 level = pwr_level_allowed(pwr_p); 743 if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) { 744 PCIE_DBG("%s(%d): pm_subrelease: marking idle \n", 745 ddi_driver_name(dip), ddi_get_instance(dip)); 746 (void) pm_idle_component(dip, 0); 747 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 748 } 749 } 750 751 /* 752 * Called when the child makes the first power management call. 753 * sets up the counters. All the components of the child device are 754 * assumed to be at unknown level. It also releases the power hold 755 * pwr_p - parent's pwr_t 756 * cdip - child's dip 757 */ 758 int 759 pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip) 760 { 761 pcie_pwr_t *pwr_p; 762 763 /* If no PM info, return */ 764 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 765 return (DDI_SUCCESS); 766 767 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock)); 768 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0); 769 pcie_add_comps(dip, cdip, pwr_p); 770 771 /* If no device power management then return */ 772 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) 773 return (DDI_SUCCESS); 774 775 /* 776 * We have informed PM that we are busy at PRE_ATTACH time for 777 * this child. Release the hold and but don't clear the busy bit. 778 * If a device never changes power, hold will not be released 779 * and we stay at full power. 780 */ 781 ASSERT(pwr_p->pwr_hold > 0); 782 PCIE_DBG("%s(%d): pm_add_child: decrementing hold \n", 783 ddi_driver_name(dip), ddi_get_instance(dip)); 784 pwr_p->pwr_hold--; 785 /* 786 * We must have made sure that busy bit 787 * is set when we put the hold 788 */ 789 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY); 790 return (DDI_SUCCESS); 791 } 792 793 /* 794 * Adjust the counters when a child detaches 795 * Marks itself idle if the idle conditions are met. 796 * Called at POST_DETACH time 797 */ 798 int 799 pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip) 800 { 801 int *counters; 802 int total; 803 pcie_pwr_t *pwr_p; 804 805 /* If no PM info, return */ 806 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip))) 807 return (DDI_SUCCESS); 808 809 counters = pwr_p->pwr_counters; 810 mutex_enter(&pwr_p->pwr_lock); 811 pcie_remove_comps(dip, cdip, pwr_p); 812 /* If no device power management then return */ 813 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) { 814 mutex_exit(&pwr_p->pwr_lock); 815 return (DDI_SUCCESS); 816 } 817 total = (counters[PCIE_D0_INDEX] + counters[PCIE_UNKNOWN_INDEX] + 818 counters[PCIE_D1_INDEX] + counters[PCIE_D2_INDEX] + 819 counters[PCIE_D3_INDEX]); 820 /* 821 * Mark idle if either there are no children or our lowest 822 * possible level is less than the current level. Mark idle 823 * only if it is not already done. 824 */ 825 if ((pwr_p->pwr_hold == 0) && 826 (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) { 827 if (pwr_p->pwr_flags & PCIE_PM_BUSY) { 828 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n", 829 ddi_driver_name(dip), ddi_get_instance(dip)); 830 (void) pm_idle_component(dip, 0); 831 pwr_p->pwr_flags &= ~PCIE_PM_BUSY; 832 } 833 } 834 mutex_exit(&pwr_p->pwr_lock); 835 return (DDI_SUCCESS); 836 } 837 838 boolean_t 839 pcie_is_pcie(dev_info_t *dip) 840 { 841 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip); 842 ASSERT(bus_p); 843 return (bus_p->bus_pcie_off != 0); 844 } 845 846 /* 847 * Called by px_attach or pcieb_attach:: DDI_RESUME 848 */ 849 int 850 pcie_pwr_resume(dev_info_t *dip) 851 { 852 dev_info_t *cdip; 853 pcie_pwr_t *pwr_p = NULL; 854 855 #if defined(__i386) || defined(__amd64) 856 if (dip) 857 return (DDI_SUCCESS); 858 #endif /* defined(__i386) || defined(__amd64) */ 859 860 if (PCIE_PMINFO(dip)) 861 pwr_p = PCIE_NEXUS_PMINFO(dip); 862 863 if (pwr_p) { 864 /* Inform the PM framework that dip is at full power */ 865 if (PCIE_SUPPORTS_DEVICE_PM(dip)) { 866 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0); 867 (void) pm_raise_power(dip, 0, 868 pwr_p->pwr_func_lvl); 869 } 870 } 871 872 /* 873 * Code taken from pci driver. 874 * Restore config registers for children that did not save 875 * their own registers. Children pwr states are UNKNOWN after 876 * a resume since it is possible for the PM framework to call 877 * resume without an actual power cycle. (ie if suspend fails). 878 */ 879 for (cdip = ddi_get_child(dip); cdip != NULL; 880 cdip = ddi_get_next_sibling(cdip)) { 881 boolean_t is_pcie; 882 883 /* 884 * Not interested in children who are not already 885 * init'ed. They will be set up by init_child(). 886 */ 887 if (i_ddi_node_state(cdip) < DS_INITIALIZED) { 888 PCIE_DBG("%s(%d): " 889 "DDI_RESUME: skipping %s%d not in CF1\n", 890 ddi_driver_name(dip), ddi_get_instance(dip), 891 ddi_driver_name(cdip), ddi_get_instance(cdip)); 892 continue; 893 } 894 895 /* 896 * Only restore config registers if saved by nexus. 897 */ 898 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 899 "nexus-saved-config-regs") != 1) 900 continue; 901 902 PCIE_DBG("%s(%d): " 903 "DDI_RESUME: nexus restoring %s%d config regs\n", 904 ddi_driver_name(dip), ddi_get_instance(dip), 905 ddi_driver_name(cdip), ddi_get_instance(cdip)); 906 907 /* clear errors left by OBP scrubbing */ 908 pcie_clear_errors(cdip); 909 910 /* PCIe workaround: disable errors during 4K config resore */ 911 if (is_pcie = pcie_is_pcie(cdip)) 912 pcie_disable_errors(cdip); 913 (void) pci_restore_config_regs(cdip); 914 if (is_pcie) { 915 pcie_enable_errors(cdip); 916 (void) pcie_enable_ce(cdip); 917 } 918 919 if (ndi_prop_remove(DDI_DEV_T_NONE, cdip, 920 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { 921 PCIE_DBG("%s(%d): %s%d can't remove prop %s", 922 ddi_driver_name(dip), ddi_get_instance(dip), 923 ddi_driver_name(cdip), ddi_get_instance(cdip), 924 "nexus-saved-config-regs"); 925 } 926 } 927 return (DDI_SUCCESS); 928 } 929 930 /* 931 * Called by pcie_detach or pcieb_detach:: DDI_SUSPEND 932 */ 933 int 934 pcie_pwr_suspend(dev_info_t *dip) 935 { 936 dev_info_t *cdip; 937 int i, *counters; /* per nexus counters */ 938 int *child_counters = NULL; /* per child dip counters */ 939 pcie_pwr_t *pwr_p = NULL; 940 941 #if defined(__i386) || defined(__amd64) 942 if (dip) 943 return (DDI_SUCCESS); 944 #endif /* defined(__i386) || defined(__amd64) */ 945 946 if (PCIE_PMINFO(dip)) 947 pwr_p = PCIE_NEXUS_PMINFO(dip); 948 949 /* 950 * Mark all children to be unknown and bring our power level 951 * to full, if required. This is to avoid any panics while 952 * accessing the child's config space. 953 */ 954 if (pwr_p) { 955 mutex_enter(&pwr_p->pwr_lock); 956 if (PCIE_SUPPORTS_DEVICE_PM(dip) && 957 pwr_p->pwr_func_lvl != PM_LEVEL_D0) { 958 mutex_exit(&pwr_p->pwr_lock); 959 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != 960 DDI_SUCCESS) { 961 PCIE_DBG("%s(%d): pwr_suspend: attempt " 962 "to raise power from %d to %d " 963 "failed\n", ddi_driver_name(dip), 964 ddi_get_instance(dip), pwr_p->pwr_func_lvl, 965 PM_LEVEL_D0); 966 return (DDI_FAILURE); 967 } 968 mutex_enter(&pwr_p->pwr_lock); 969 } 970 counters = pwr_p->pwr_counters; 971 /* 972 * Update the nexus counters. At the resume time all 973 * components are considered to be at unknown level. Use the 974 * fact that counters for unknown level are at the end. 975 */ 976 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) { 977 counters[PCIE_UNKNOWN_INDEX] += counters[i]; 978 counters[i] = 0; 979 } 980 mutex_exit(&pwr_p->pwr_lock); 981 } 982 983 /* 984 * Code taken from pci driver. 985 * Save the state of the configuration headers of child 986 * nodes. 987 */ 988 for (cdip = ddi_get_child(dip); cdip != NULL; 989 cdip = ddi_get_next_sibling(cdip)) { 990 boolean_t is_pcie; 991 992 /* 993 * Not interested in children who are not already 994 * init'ed. They will be set up in init_child(). 995 */ 996 if (i_ddi_node_state(cdip) < DS_INITIALIZED) { 997 PCIE_DBG("%s(%d): DDI_SUSPEND: skipping " 998 "%s%d not in CF1\n", ddi_driver_name(dip), 999 ddi_get_instance(dip), ddi_driver_name(cdip), 1000 ddi_get_instance(cdip)); 1001 continue; 1002 } 1003 /* 1004 * Update per child dip counters, if any. Counters 1005 * will not exist if the child is not power manageable 1006 * or if its power entry is never invoked. 1007 */ 1008 if (PCIE_PMINFO(cdip) && PCIE_PAR_PMINFO(cdip)) 1009 child_counters = PCIE_CHILD_COUNTERS(cdip); 1010 if (child_counters && pwr_p) { 1011 mutex_enter(&pwr_p->pwr_lock); 1012 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) { 1013 child_counters[PCIE_UNKNOWN_INDEX] += 1014 child_counters[i]; 1015 child_counters[i] = 0; 1016 } 1017 mutex_exit(&pwr_p->pwr_lock); 1018 } 1019 1020 /* 1021 * Only save config registers if not already saved by child. 1022 */ 1023 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1024 SAVED_CONFIG_REGS) == 1) { 1025 continue; 1026 } 1027 1028 /* 1029 * The nexus needs to save config registers. Create a property 1030 * so it knows to restore on resume. 1031 */ 1032 if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip, 1033 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) { 1034 PCIE_DBG("%s(%d): %s%d can't update prop %s", 1035 ddi_driver_name(dip), ddi_get_instance(dip), 1036 ddi_driver_name(cdip), ddi_get_instance(cdip), 1037 "nexus-saved-config-regs"); 1038 } 1039 PCIE_DBG("%s(%d): DDI_SUSPEND: saving config space for" 1040 " %s%d\n", ddi_driver_name(dip), ddi_get_instance(dip), 1041 ddi_driver_name(cdip), ddi_get_instance(cdip)); 1042 1043 /* PCIe workaround: disable errors during 4K config save */ 1044 if (is_pcie = pcie_is_pcie(cdip)) 1045 pcie_disable_errors(cdip); 1046 (void) pci_save_config_regs(cdip); 1047 if (is_pcie) { 1048 pcie_enable_errors(cdip); 1049 (void) pcie_enable_ce(cdip); 1050 } 1051 } 1052 return (DDI_SUCCESS); 1053 } 1054 1055 #ifdef DEBUG 1056 /* 1057 * Description of bus_power_op. 1058 */ 1059 typedef struct pcie_buspwr_desc { 1060 pm_bus_power_op_t pwr_op; 1061 char *pwr_desc; 1062 } pcie_buspwr_desc_t; 1063 1064 static pcie_buspwr_desc_t pcie_buspwr_desc[] = { 1065 {BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"}, 1066 {BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"}, 1067 {BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"}, 1068 {BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"}, 1069 {BUS_POWER_HAS_CHANGED, "HAS_CHANGED"}, 1070 {BUS_POWER_NOINVOL, "NOINVOL"}, 1071 {-1, NULL} 1072 }; 1073 1074 /* 1075 * Returns description of the bus_power_op. 1076 */ 1077 static char * 1078 pcie_decode_pwr_op(pm_bus_power_op_t op) 1079 { 1080 pcie_buspwr_desc_t *descp = pcie_buspwr_desc; 1081 1082 for (; descp->pwr_desc; descp++) { 1083 if (op == descp->pwr_op) 1084 return (descp->pwr_desc); 1085 } 1086 return ("UNKNOWN OP"); 1087 } 1088 #endif 1089