1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/kmem.h>
29 #include <sys/sysmacros.h>
30 #include <sys/sunddi.h>
31 #include <sys/sunpm.h>
32 #include <sys/epm.h>
33 #include <sys/sunndi.h>
34 #include <sys/ddi_impldefs.h>
35 #include <sys/ddi_implfuncs.h>
36 #include <sys/pcie.h>
37 #include <sys/pcie_impl.h>
38 #include <sys/promif.h> /* prom_printf */
39 #include <sys/pcie_pwr.h>
40
41 /*
42 * This file implements the power management functionality for
43 * pci express switch and pci express-to-pci/pci-x bridge. All the
44 * code in this file is generic and is not specific to a particular chip.
45 * The algorithm, which decides when to go to a lower power is explained
46 * below:
47 *
48 * 1. Initially when no children are attached, the driver is idle from
49 * PM framework point of view ( PM idle/PM busy).
50 *
51 * 2. Driver is PM busy if either a reference count called pwr_hold is
52 * greater than zero or driver is already at the lowest possible power
53 * level. The lowest possible power level for the driver is equal to the
54 * highest power level among its children. The PM busy condition is
55 * indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component
56 * call is made for a nexus driver instance.
57 *
58 * 3. Driver is PM idle if the pwr_hold is zero and the lowest
59 * possible power level is less than the driver's current power level.
60 * At any point, only one pm_idle_component call is made for a nexus
61 * driver instance.
62 *
63 * 4. For any events like child attach, it increments pwr_hold and marks
64 * itslef busy, if it is not already done so. This temporary hold is
65 * removed when the event is complete.
66 *
67 * 5. Any child's power change requires the parent (this driver) to be
68 * full power. So it raises its power and increments pwr_hold. It also
69 * marks itself temporarily busy, if it is not already done. This hold
70 * is removed when the child power change is complete.
71 *
72 * 6. After each child power change, it evaluates what is the lowest
73 * possible power level. If the lowest possible power level is less than
74 * the current power level and pwr_hold is zero, then it marks itself
75 * idle. The lowest power level is equal or greater than the highest level
76 * among the children. It keeps track of children's power level by
77 * using counters.
78 *
79 * 7. Any code e.g., which is accessing the driver's own registers should
80 * place a temporary hold using pcie_pm_hold.
81 */
82
83 static int pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new);
84 static void pwr_update_counters(int *countersp, int olevel, int nlevel);
85 static int pwr_level_allowed(pcie_pwr_t *pwr_p);
86 static void pcie_add_comps(dev_info_t *dip, dev_info_t *cdip,
87 pcie_pwr_t *pwr_p);
88 static void pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip,
89 pcie_pwr_t *pwr_p);
90 static void pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p);
91 static boolean_t pcie_is_pcie(dev_info_t *dip);
92 #ifdef DEBUG
93 static char *pcie_decode_pwr_op(pm_bus_power_op_t op);
94 #else
95 #define pcie_decode_pwr_op
96 #endif
97
98 /*
99 * power entry point.
100 *
101 * This function decides whether the PM request is honorable.
102 * If yes, it then does what's necessary for switch or
103 * bridge to change its power.
104 */
105 /* ARGSUSED */
106 int
pcie_power(dev_info_t * dip,int component,int level)107 pcie_power(dev_info_t *dip, int component, int level)
108 {
109 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip);
110 int *counters = pwr_p->pwr_counters;
111 int pmcaps = pwr_p->pwr_pmcaps;
112 int ret = DDI_FAILURE;
113
114 #if defined(__i386) || defined(__amd64)
115 if (dip)
116 return (DDI_SUCCESS);
117 #endif /* defined(__i386) || defined(__amd64) */
118
119 ASSERT(level != PM_LEVEL_UNKNOWN);
120 /* PM should not asking for a level, which is unsupported */
121 ASSERT(level == PM_LEVEL_D0 || level == PM_LEVEL_D3 ||
122 (level == PM_LEVEL_D1 && (pmcaps & PCIE_SUPPORTS_D1)) ||
123 (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2)));
124
125 mutex_enter(&pwr_p->pwr_lock);
126 PCIE_DBG("%s(%d): pcie_power: change from %d to %d\n",
127 ddi_driver_name(dip), ddi_get_instance(dip), pwr_p->pwr_func_lvl,
128 level);
129 if (pwr_p->pwr_func_lvl == level) {
130 PCIE_DBG("%s(%d): pcie_power: already at %d\n",
131 ddi_driver_name(dip), ddi_get_instance(dip), level);
132 ret = DDI_SUCCESS;
133 goto pcie_pwr_done;
134 }
135
136 if (level < pwr_p->pwr_func_lvl) {
137 /*
138 * Going to lower power. Reject this if we are either busy
139 * or there is a hold.
140 */
141 if (pwr_p->pwr_flags & PCIE_PM_BUSY) {
142 PCIE_DBG("%s(%d): pcie_power: rejecting change to %d "
143 "as busy\n", ddi_driver_name(dip),
144 ddi_get_instance(dip), level);
145 goto pcie_pwr_done;
146 }
147
148 /*
149 * Now we know that we are neither busy nor there is a hold.
150 * At this point none of the children should be at full power.
151 * Reject the request if level reqested is lower than the level
152 * possible.
153 */
154 ASSERT(!counters[PCIE_D0_INDEX] &&
155 !counters[PCIE_UNKNOWN_INDEX]);
156 if (level < pwr_level_allowed(pwr_p)) {
157 PCIE_DBG("%s(%d): pcie_power: rejecting level %d as"
158 " %d is the lowest possible\n",
159 ddi_driver_name(dip), ddi_get_instance(dip), level,
160 pwr_level_allowed(pwr_p));
161 goto pcie_pwr_done;
162 }
163 }
164
165 if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) {
166 PCIE_DBG("%s(%d): pcie_power: attempt to change to %d "
167 " failed \n", ddi_driver_name(dip), ddi_get_instance(dip),
168 level);
169 goto pcie_pwr_done;
170 }
171 pwr_p->pwr_func_lvl = level;
172 PCIE_DBG("%s(%d): pcie_power: level changed to %d \n",
173 ddi_driver_name(dip), ddi_get_instance(dip), level);
174 ret = DDI_SUCCESS;
175
176 pcie_pwr_done:
177 mutex_exit(&pwr_p->pwr_lock);
178 return (ret);
179 }
180
181 /*
182 * Called by pcie_power() only. Caller holds the pwr_lock.
183 *
184 * dip - dev_info pointer
185 * pwr_p - pm info for the node.
186 * new - new level
187 */
188 static int
pcie_pwr_change(dev_info_t * dip,pcie_pwr_t * pwr_p,int new)189 pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new)
190 {
191 uint16_t pmcsr;
192
193 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
194 ASSERT(new != pwr_p->pwr_func_lvl);
195 pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset);
196 pmcsr &= ~PCI_PMCSR_STATE_MASK;
197 switch (new) {
198 case PM_LEVEL_D0:
199 pmcsr |= PCI_PMCSR_D0;
200 break;
201
202 case PM_LEVEL_D1:
203 pmcsr |= PCI_PMCSR_D1;
204 break;
205
206 case PM_LEVEL_D2:
207 pmcsr |= PCI_PMCSR_D2;
208 break;
209
210 case PM_LEVEL_D3:
211 pmcsr |= PCI_PMCSR_D3HOT;
212 break;
213
214 default:
215 ASSERT(0);
216 break;
217 }
218 /* Save config space, if going to D3 */
219 if (new == PM_LEVEL_D3) {
220 PCIE_DBG("%s(%d): pwr_change: saving config space regs\n",
221 ddi_driver_name(dip), ddi_get_instance(dip));
222 if (pci_save_config_regs(dip) != DDI_SUCCESS) {
223 PCIE_DBG("%s(%d): pcie_pwr_change: failed to save "
224 "config space regs\n", ddi_driver_name(dip),
225 ddi_get_instance(dip));
226 return (DDI_FAILURE);
227 }
228 }
229
230 pci_config_put16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset, pmcsr);
231
232 /*
233 * TBD: Taken from pci_pci driver. Is this required?
234 * No bus transactions should occur without waiting for
235 * settle time specified in PCI PM spec rev 2.1 sec 5.6.1
236 * To make things simple, just use the max time specified for
237 * all state transitions.
238 */
239 delay(drv_usectohz(PCI_CLK_SETTLE_TIME));
240
241 /*
242 * Restore config space if coming out of D3
243 */
244 if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) {
245 PCIE_DBG("%s(%d): pcie_pwr_change: restoring config space\n",
246 ddi_driver_name(dip), ddi_get_instance(dip));
247 if (pci_restore_config_regs(dip) != DDI_SUCCESS) {
248 PCIE_DBG("%s(%d): pcie_pwr_change: failed to restore "
249 "config space regs\n", ddi_driver_name(dip),
250 ddi_get_instance(dip));
251 return (DDI_FAILURE);
252 }
253 }
254 return (DDI_SUCCESS);
255 }
256
257 /*
258 * bus_ctlops.bus_power function.
259 *
260 * This function handles PRE_ POST_ change notifications, sent by
261 * PM framework related to child's power level change. It marks itself
262 * idle or busy based on the children's power level.
263 */
264 int
pcie_bus_power(dev_info_t * dip,void * impl_arg,pm_bus_power_op_t op,void * arg,void * result)265 pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
266 void *arg, void *result)
267 {
268 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip);
269 int *counters = pwr_p->pwr_counters; /* nexus counters */
270 int *child_counters; /* per child dip counters */
271 pm_bp_child_pwrchg_t *bpc;
272 pm_bp_has_changed_t *bphc;
273 dev_info_t *cdip;
274 int new_level;
275 int old_level;
276 int rv = DDI_SUCCESS;
277 int level_allowed, comp;
278
279 #if defined(__i386) || defined(__amd64)
280 if (dip)
281 return (DDI_SUCCESS);
282 #endif /* defined(__i386) || defined(__amd64) */
283
284 switch (op) {
285 case BUS_POWER_PRE_NOTIFICATION:
286 case BUS_POWER_POST_NOTIFICATION:
287 bpc = (pm_bp_child_pwrchg_t *)arg;
288 cdip = bpc->bpc_dip;
289 new_level = bpc->bpc_nlevel;
290 old_level = bpc->bpc_olevel;
291 comp = bpc->bpc_comp;
292 break;
293
294 case BUS_POWER_HAS_CHANGED:
295 bphc = (pm_bp_has_changed_t *)arg;
296 cdip = bphc->bphc_dip;
297 new_level = bphc->bphc_nlevel;
298 old_level = bphc->bphc_olevel;
299 comp = bphc->bphc_comp;
300 break;
301
302 default:
303 break;
304
305 }
306
307 ASSERT(pwr_p);
308 mutex_enter(&pwr_p->pwr_lock);
309 switch (op) {
310 case BUS_POWER_PRE_NOTIFICATION:
311 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n",
312 ddi_driver_name(dip), ddi_get_instance(dip),
313 ddi_driver_name(cdip), ddi_get_instance(cdip),
314 pcie_decode_pwr_op(op), old_level, new_level);
315 /*
316 * If the nexus doesn't want the child to go into
317 * non-D0 state, mark the child busy. This way PM
318 * framework will never try to lower the child's power.
319 * In case of pm_lower_power, marking busy won't help.
320 * So we need to specifically reject the attempt to
321 * go to non-D0 state.
322 */
323 if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) {
324 if (!PCIE_IS_COMPS_COUNTED(cdip)) {
325 PCIE_DBG("%s(%d): pcie_bus_power: marking "
326 "child busy to disable pm \n",
327 ddi_driver_name(dip),
328 ddi_get_instance(dip));
329 (void) pm_busy_component(cdip, 0);
330 }
331 if (new_level < PM_LEVEL_D0 && !comp) {
332 PCIE_DBG("%s(%d): pcie_bus_power: rejecting "
333 "child's attempt to go to %d\n",
334 ddi_driver_name(dip), ddi_get_instance(dip),
335 new_level);
336 rv = DDI_FAILURE;
337 }
338 }
339 mutex_exit(&pwr_p->pwr_lock);
340 if (rv == DDI_SUCCESS)
341 rv = pcie_pm_hold(dip);
342 return (rv);
343
344 case BUS_POWER_HAS_CHANGED:
345 case BUS_POWER_POST_NOTIFICATION:
346 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n",
347 ddi_driver_name(dip), ddi_get_instance(dip),
348 ddi_driver_name(cdip), ddi_get_instance(cdip),
349 pcie_decode_pwr_op(op), old_level, new_level);
350 /*
351 * Child device power changed
352 * If pm components of this child aren't accounted for
353 * then add the components to the counters. This can't
354 * be done in POST_ATTACH ctlop as pm info isn't created
355 * by then. Also because a driver can make a pm call during
356 * the attach.
357 */
358 if (!PCIE_IS_COMPS_COUNTED(cdip)) {
359 (void) pcie_pm_add_child(dip, cdip);
360 if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) &&
361 (op == BUS_POWER_HAS_CHANGED)) {
362 PCIE_DBG("%s(%d): pcie_bus_power: marking "
363 "child busy to disable pm \n",
364 ddi_driver_name(dip),
365 ddi_get_instance(dip));
366 (void) pm_busy_component(cdip, 0);
367 /*
368 * If the driver has already changed to lower
369 * power(pm_power_has_changed) on its own,
370 * there is nothing we can do other than
371 * logging the warning message on the console.
372 */
373 if (new_level < PM_LEVEL_D0)
374 cmn_err(CE_WARN, "!Downstream device "
375 "%s@%d went to non-D0 state: "
376 "possible loss of link\n",
377 ddi_driver_name(cdip),
378 ddi_get_instance(cdip));
379 }
380 }
381
382
383 /*
384 * If it is POST and device PM is supported, release the
385 * hold done in PRE.
386 */
387 if (op == BUS_POWER_POST_NOTIFICATION &&
388 PCIE_SUPPORTS_DEVICE_PM(dip)) {
389 pcie_pm_subrelease(dip, pwr_p);
390 }
391
392 if (*((int *)result) == DDI_FAILURE) {
393 PCIE_DBG("%s(%d): pcie_bus_power: change for %s%d "
394 "failed\n", ddi_driver_name(dip),
395 ddi_get_instance(dip), ddi_driver_name(cdip),
396 ddi_get_instance(cdip));
397 break;
398 }
399 /* Modify counters appropriately */
400 pwr_update_counters(counters, old_level, new_level);
401
402 child_counters = PCIE_CHILD_COUNTERS(cdip);
403 pwr_update_counters(child_counters, old_level, new_level);
404
405 /* If no device PM, return */
406 if (!PCIE_SUPPORTS_DEVICE_PM(dip))
407 break;
408
409 level_allowed = pwr_level_allowed(pwr_p);
410 /*
411 * Check conditions for marking busy
412 * Check the flag to set this busy only once for multiple
413 * busy conditions. Mark busy if our current lowest possible
414 * is equal or greater to the current level.
415 */
416 if (level_allowed >= pwr_p->pwr_func_lvl &&
417 !(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
418 PCIE_DBG("%s(%d): pcie_bus_power: marking busy\n",
419 ddi_driver_name(dip), ddi_get_instance(dip));
420 (void) pm_busy_component(dip, 0);
421 pwr_p->pwr_flags |= PCIE_PM_BUSY;
422 break;
423 }
424 /*
425 * Check conditions for marking idle.
426 * If our lowest possible level is less than our current
427 * level mark idle. Mark idle only if it is not already done.
428 */
429 if ((level_allowed < pwr_p->pwr_func_lvl) &&
430 (pwr_p->pwr_hold == 0) &&
431 (pwr_p->pwr_flags & PCIE_PM_BUSY)) {
432 /*
433 * For pci express, we should check here whether
434 * the link is in L1 state or not.
435 */
436 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n",
437 ddi_driver_name(dip), ddi_get_instance(dip));
438 (void) pm_idle_component(dip, 0);
439 pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
440 break;
441 }
442 break;
443
444 default:
445 mutex_exit(&pwr_p->pwr_lock);
446 return (pm_busop_bus_power(dip, impl_arg, op, arg, result));
447 }
448 mutex_exit(&pwr_p->pwr_lock);
449 return (rv);
450 }
451
452 /*
453 * Decrement the count of children at olevel by one and increment
454 * count of children at nlevel by one.
455 */
456 static void
pwr_update_counters(int * countersp,int olevel,int nlevel)457 pwr_update_counters(int *countersp, int olevel, int nlevel)
458 {
459 uint32_t index;
460
461 ASSERT(olevel >= PM_LEVEL_UNKNOWN && olevel <= PM_LEVEL_D0);
462 ASSERT(nlevel >= PM_LEVEL_UNKNOWN && nlevel <= PM_LEVEL_D0);
463
464 index = (olevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : olevel);
465 countersp[index]--;
466 index = (nlevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : nlevel);
467 countersp[index]++;
468 }
469
470 /*
471 * Returns the lowest possible power level allowed for nexus
472 * based on children's power level. Lowest possible level is
473 * equal to the highest level among the children. It also checks
474 * for the supported level
475 * UNKNOWN = D0 > D1 > D2 > D3
476 */
477 static int
pwr_level_allowed(pcie_pwr_t * pwr_p)478 pwr_level_allowed(pcie_pwr_t *pwr_p)
479 {
480 int *counters = pwr_p->pwr_counters;
481 int i, j;
482
483 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
484 /*
485 * Search from UNKNOWN to D2. unknown is same as D0.
486 * find the highest level among the children. If that
487 * level is supported, return that level. If not,
488 * find the next higher supported level and return that
489 * level. For example, if the D1 is the highest among
490 * children and if D1 isn't supported return D0 as the
491 * lowest possible level. We don't need to look at D3
492 * as that is the default lowest level and it is always
493 * supported.
494 */
495 for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) {
496 if (counters[i]) {
497 if (i == PCIE_UNKNOWN_INDEX)
498 return (PM_LEVEL_D0);
499 /*
500 * i is the highest level among children. If this is
501 * supported, return i.
502 */
503 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, i))
504 return (i);
505 /* find the next higher supported level */
506 for (j = i + 1; j <= PCIE_D0_INDEX; j++) {
507 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, j))
508 return (j);
509 }
510 }
511 }
512
513 return (PM_LEVEL_D3);
514 }
515
516 /*
517 * Update the counters with number pm components of the child
518 * all components are assumed to be at UNKNOWN level.
519 */
520 static void
pcie_add_comps(dev_info_t * dip,dev_info_t * cdip,pcie_pwr_t * pwr_p)521 pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p)
522 {
523 int comps = PM_NUMCMPTS(cdip);
524 pcie_pm_t *pcie_pm_p;
525 pcie_pwr_child_t *cpwr_p;
526
527 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
528 if (!comps)
529 return;
530
531 PCIE_DBG("%s(%d): pcie_add_comps: unknown level counter incremented "
532 "from %d by %d because of %s@%d\n",
533 ddi_driver_name(dip), ddi_get_instance(dip),
534 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps,
535 ddi_driver_name(cdip), ddi_get_instance(cdip));
536 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX] += comps;
537 /*
538 * Allocate counters per child. This is a part of pcie
539 * pm info. If there is no pcie pm info, allocate it here.
540 * pcie pm info might already be there for pci express nexus
541 * driver e.g. pcieb. For all leaf nodes, it is allocated here.
542 */
543 if ((pcie_pm_p = PCIE_PMINFO(cdip)) == NULL) {
544 pcie_pm_p = (pcie_pm_t *)kmem_zalloc(
545 sizeof (pcie_pm_t), KM_SLEEP);
546 PCIE_SET_PMINFO(cdip, pcie_pm_p);
547 }
548 cpwr_p = (pcie_pwr_child_t *)kmem_zalloc(sizeof (pcie_pwr_child_t),
549 KM_SLEEP);
550 pcie_pm_p->pcie_par_pminfo = cpwr_p;
551 (cpwr_p->pwr_child_counters)[PCIE_UNKNOWN_INDEX] += comps;
552 }
553
554 /*
555 * Remove the pm components of a child from our counters.
556 */
557 static void
pcie_remove_comps(dev_info_t * dip,dev_info_t * cdip,pcie_pwr_t * pwr_p)558 pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p)
559 {
560 int i;
561 int *child_counters;
562
563 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
564 if (!(PCIE_PMINFO(cdip)) || !PCIE_PAR_PMINFO(cdip)) {
565 if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
566 /*
567 * Driver never made a PM call and we didn't create
568 * any counters for this device. This also means that
569 * hold made at the PRE_ATTACH time, still remains.
570 * Remove the hold now. The correct thing to do is to
571 * stay at full power when a child is at full power
572 * whether a driver is there or not. This will be
573 * implemented in the future.
574 */
575 pcie_pm_subrelease(dip, pwr_p);
576 }
577 return;
578 }
579 PCIE_DBG("%s(%d): pcie_remove_comps:counters decremented because of "
580 "%s@%d\n", ddi_driver_name(dip), ddi_get_instance(dip),
581 ddi_driver_name(cdip), ddi_get_instance(cdip));
582 child_counters = PCIE_CHILD_COUNTERS(cdip);
583 /*
584 * Adjust the nexus counters. No need to adjust per child dip
585 * counters as we are freeing the per child dip info.
586 */
587 for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) {
588 ASSERT((pwr_p->pwr_counters)[i] >= child_counters[i]);
589 (pwr_p->pwr_counters)[i] -= child_counters[i];
590 }
591 /* remove both parent pm info and pcie pminfo itself */
592 kmem_free(PCIE_PAR_PMINFO(cdip), sizeof (pcie_pwr_child_t));
593 kmem_free(PCIE_PMINFO(cdip), sizeof (pcie_pm_t));
594 PCIE_RESET_PMINFO(cdip);
595 }
596
597 /*
598 * Power management related initialization common to px and pcieb
599 */
600 int
pwr_common_setup(dev_info_t * dip)601 pwr_common_setup(dev_info_t *dip)
602 {
603 pcie_pm_t *pcie_pm_p;
604 pcie_pwr_t *pwr_p;
605 int pminfo_created = 0;
606
607 /* Create pminfo, if it doesn't exist already */
608 if ((pcie_pm_p = PCIE_PMINFO(dip)) == NULL) {
609 pcie_pm_p = (pcie_pm_t *)kmem_zalloc(
610 sizeof (pcie_pm_t), KM_SLEEP);
611 PCIE_SET_PMINFO(dip, pcie_pm_p);
612 pminfo_created = 1;
613 }
614 pwr_p = (pcie_pwr_t *)kmem_zalloc(sizeof (pcie_pwr_t), KM_SLEEP);
615 mutex_init(&pwr_p->pwr_lock, NULL, MUTEX_DRIVER, NULL);
616 /* Initialize the power level and default level support */
617 pwr_p->pwr_func_lvl = PM_LEVEL_UNKNOWN;
618 pwr_p->pwr_pmcaps = PCIE_DEFAULT_LEVEL_SUPPORTED;
619
620 if (pcie_plat_pwr_setup(dip) != DDI_SUCCESS)
621 goto pwr_common_err;
622
623 pcie_pm_p->pcie_pwr_p = pwr_p;
624 return (DDI_SUCCESS);
625
626 pwr_common_err:
627 mutex_destroy(&pwr_p->pwr_lock);
628 kmem_free(pwr_p, sizeof (pcie_pwr_t));
629 if (pminfo_created) {
630 PCIE_RESET_PMINFO(dip);
631 kmem_free(pcie_pm_p, sizeof (pcie_pm_t));
632 }
633 return (DDI_FAILURE);
634
635 }
636
637 /*
638 * Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach
639 */
640 void
pwr_common_teardown(dev_info_t * dip)641 pwr_common_teardown(dev_info_t *dip)
642 {
643 pcie_pm_t *pcie_pm_p = PCIE_PMINFO(dip);
644 pcie_pwr_t *pwr_p;
645
646 if (!pcie_pm_p || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
647 return;
648
649 pcie_plat_pwr_teardown(dip);
650 mutex_destroy(&pwr_p->pwr_lock);
651 pcie_pm_p->pcie_pwr_p = NULL;
652 kmem_free(pwr_p, sizeof (pcie_pwr_t));
653 /*
654 * If the parent didn't store have any pm info about
655 * this node, that means parent doesn't need pminfo when it handles
656 * POST_DETACH for this node. For example, if dip is the dip of
657 * root complex, then there is no parent pm info.
658 */
659 if (!PCIE_PAR_PMINFO(dip)) {
660 kmem_free(pcie_pm_p, sizeof (pcie_pm_t));
661 PCIE_RESET_PMINFO(dip);
662 }
663 }
664
665 /*
666 * Raises the power and marks itself busy.
667 */
668 int
pcie_pm_hold(dev_info_t * dip)669 pcie_pm_hold(dev_info_t *dip)
670 {
671 pcie_pwr_t *pwr_p;
672
673 /* If no PM info or no device PM, return */
674 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) ||
675 !(PCIE_SUPPORTS_DEVICE_PM(dip)))
676 return (DDI_SUCCESS);
677
678 /*
679 * If we are not at full power, then powerup.
680 * Need to be at full power so that link can be
681 * at L0. Similarly for PCI/PCI-X bus, it should be
682 * at full power.
683 */
684 mutex_enter(&pwr_p->pwr_lock);
685 ASSERT(pwr_p->pwr_hold >= 0);
686 PCIE_DBG("%s(%d): pm_hold: incrementing hold \n",
687 ddi_driver_name(dip), ddi_get_instance(dip));
688 pwr_p->pwr_hold++;
689 /* Mark itself busy, if it is not done already */
690 if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
691 PCIE_DBG("%s(%d): pm_hold: marking busy\n",
692 ddi_driver_name(dip), ddi_get_instance(dip));
693 pwr_p->pwr_flags |= PCIE_PM_BUSY;
694 (void) pm_busy_component(dip, 0);
695 }
696 if (pwr_p->pwr_func_lvl == PM_LEVEL_D0) {
697 mutex_exit(&pwr_p->pwr_lock);
698 return (DDI_SUCCESS);
699 }
700 mutex_exit(&pwr_p->pwr_lock);
701 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) {
702 PCIE_DBG("%s(%d): pm_hold: attempt to raise power "
703 "from %d to %d failed\n", ddi_driver_name(dip),
704 ddi_get_instance(dip), pwr_p->pwr_func_lvl,
705 PM_LEVEL_D0);
706 pcie_pm_release(dip);
707 return (DDI_FAILURE);
708 }
709 return (DDI_SUCCESS);
710 }
711
712 /*
713 * Reverse the things done in pcie_pm_hold
714 */
715 void
pcie_pm_release(dev_info_t * dip)716 pcie_pm_release(dev_info_t *dip)
717 {
718 pcie_pwr_t *pwr_p;
719
720 /* If no PM info or no device PM, return */
721 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) ||
722 !(PCIE_SUPPORTS_DEVICE_PM(dip)))
723 return;
724
725 mutex_enter(&pwr_p->pwr_lock);
726 pcie_pm_subrelease(dip, pwr_p);
727 mutex_exit(&pwr_p->pwr_lock);
728 }
729
730 static void
pcie_pm_subrelease(dev_info_t * dip,pcie_pwr_t * pwr_p)731 pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p)
732 {
733 int level;
734
735 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
736 ASSERT(pwr_p->pwr_hold > 0);
737 PCIE_DBG("%s(%d): pm_subrelease: decrementing hold \n",
738 ddi_driver_name(dip), ddi_get_instance(dip));
739 pwr_p->pwr_hold--;
740 ASSERT(pwr_p->pwr_hold >= 0);
741 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY);
742 level = pwr_level_allowed(pwr_p);
743 if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) {
744 PCIE_DBG("%s(%d): pm_subrelease: marking idle \n",
745 ddi_driver_name(dip), ddi_get_instance(dip));
746 (void) pm_idle_component(dip, 0);
747 pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
748 }
749 }
750
751 /*
752 * Called when the child makes the first power management call.
753 * sets up the counters. All the components of the child device are
754 * assumed to be at unknown level. It also releases the power hold
755 * pwr_p - parent's pwr_t
756 * cdip - child's dip
757 */
758 int
pcie_pm_add_child(dev_info_t * dip,dev_info_t * cdip)759 pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip)
760 {
761 pcie_pwr_t *pwr_p;
762
763 /* If no PM info, return */
764 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
765 return (DDI_SUCCESS);
766
767 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
768 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0);
769 pcie_add_comps(dip, cdip, pwr_p);
770
771 /* If no device power management then return */
772 if (!PCIE_SUPPORTS_DEVICE_PM(dip))
773 return (DDI_SUCCESS);
774
775 /*
776 * We have informed PM that we are busy at PRE_ATTACH time for
777 * this child. Release the hold and but don't clear the busy bit.
778 * If a device never changes power, hold will not be released
779 * and we stay at full power.
780 */
781 ASSERT(pwr_p->pwr_hold > 0);
782 PCIE_DBG("%s(%d): pm_add_child: decrementing hold \n",
783 ddi_driver_name(dip), ddi_get_instance(dip));
784 pwr_p->pwr_hold--;
785 /*
786 * We must have made sure that busy bit
787 * is set when we put the hold
788 */
789 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY);
790 return (DDI_SUCCESS);
791 }
792
793 /*
794 * Adjust the counters when a child detaches
795 * Marks itself idle if the idle conditions are met.
796 * Called at POST_DETACH time
797 */
798 int
pcie_pm_remove_child(dev_info_t * dip,dev_info_t * cdip)799 pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip)
800 {
801 int *counters;
802 int total;
803 pcie_pwr_t *pwr_p;
804
805 /* If no PM info, return */
806 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
807 return (DDI_SUCCESS);
808
809 counters = pwr_p->pwr_counters;
810 mutex_enter(&pwr_p->pwr_lock);
811 pcie_remove_comps(dip, cdip, pwr_p);
812 /* If no device power management then return */
813 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) {
814 mutex_exit(&pwr_p->pwr_lock);
815 return (DDI_SUCCESS);
816 }
817 total = (counters[PCIE_D0_INDEX] + counters[PCIE_UNKNOWN_INDEX] +
818 counters[PCIE_D1_INDEX] + counters[PCIE_D2_INDEX] +
819 counters[PCIE_D3_INDEX]);
820 /*
821 * Mark idle if either there are no children or our lowest
822 * possible level is less than the current level. Mark idle
823 * only if it is not already done.
824 */
825 if ((pwr_p->pwr_hold == 0) &&
826 (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) {
827 if (pwr_p->pwr_flags & PCIE_PM_BUSY) {
828 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n",
829 ddi_driver_name(dip), ddi_get_instance(dip));
830 (void) pm_idle_component(dip, 0);
831 pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
832 }
833 }
834 mutex_exit(&pwr_p->pwr_lock);
835 return (DDI_SUCCESS);
836 }
837
838 boolean_t
pcie_is_pcie(dev_info_t * dip)839 pcie_is_pcie(dev_info_t *dip)
840 {
841 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
842 ASSERT(bus_p);
843 return (bus_p->bus_pcie_off != 0);
844 }
845
846 /*
847 * Called by px_attach or pcieb_attach:: DDI_RESUME
848 */
849 int
pcie_pwr_resume(dev_info_t * dip)850 pcie_pwr_resume(dev_info_t *dip)
851 {
852 dev_info_t *cdip;
853 pcie_pwr_t *pwr_p = NULL;
854
855 #if defined(__i386) || defined(__amd64)
856 if (dip)
857 return (DDI_SUCCESS);
858 #endif /* defined(__i386) || defined(__amd64) */
859
860 if (PCIE_PMINFO(dip))
861 pwr_p = PCIE_NEXUS_PMINFO(dip);
862
863 if (pwr_p) {
864 /* Inform the PM framework that dip is at full power */
865 if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
866 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0);
867 (void) pm_raise_power(dip, 0,
868 pwr_p->pwr_func_lvl);
869 }
870 }
871
872 /*
873 * Code taken from pci driver.
874 * Restore config registers for children that did not save
875 * their own registers. Children pwr states are UNKNOWN after
876 * a resume since it is possible for the PM framework to call
877 * resume without an actual power cycle. (ie if suspend fails).
878 */
879 for (cdip = ddi_get_child(dip); cdip != NULL;
880 cdip = ddi_get_next_sibling(cdip)) {
881 boolean_t is_pcie;
882
883 /*
884 * Not interested in children who are not already
885 * init'ed. They will be set up by init_child().
886 */
887 if (i_ddi_node_state(cdip) < DS_INITIALIZED) {
888 PCIE_DBG("%s(%d): "
889 "DDI_RESUME: skipping %s%d not in CF1\n",
890 ddi_driver_name(dip), ddi_get_instance(dip),
891 ddi_driver_name(cdip), ddi_get_instance(cdip));
892 continue;
893 }
894
895 /*
896 * Only restore config registers if saved by nexus.
897 */
898 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
899 "nexus-saved-config-regs") != 1)
900 continue;
901
902 PCIE_DBG("%s(%d): "
903 "DDI_RESUME: nexus restoring %s%d config regs\n",
904 ddi_driver_name(dip), ddi_get_instance(dip),
905 ddi_driver_name(cdip), ddi_get_instance(cdip));
906
907 /* clear errors left by OBP scrubbing */
908 pcie_clear_errors(cdip);
909
910 /* PCIe workaround: disable errors during 4K config resore */
911 if (is_pcie = pcie_is_pcie(cdip))
912 pcie_disable_errors(cdip);
913 (void) pci_restore_config_regs(cdip);
914 if (is_pcie) {
915 pcie_enable_errors(cdip);
916 (void) pcie_enable_ce(cdip);
917 }
918
919 if (ndi_prop_remove(DDI_DEV_T_NONE, cdip,
920 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
921 PCIE_DBG("%s(%d): %s%d can't remove prop %s",
922 ddi_driver_name(dip), ddi_get_instance(dip),
923 ddi_driver_name(cdip), ddi_get_instance(cdip),
924 "nexus-saved-config-regs");
925 }
926 }
927 return (DDI_SUCCESS);
928 }
929
930 /*
931 * Called by pcie_detach or pcieb_detach:: DDI_SUSPEND
932 */
933 int
pcie_pwr_suspend(dev_info_t * dip)934 pcie_pwr_suspend(dev_info_t *dip)
935 {
936 dev_info_t *cdip;
937 int i, *counters; /* per nexus counters */
938 int *child_counters = NULL; /* per child dip counters */
939 pcie_pwr_t *pwr_p = NULL;
940
941 #if defined(__i386) || defined(__amd64)
942 if (dip)
943 return (DDI_SUCCESS);
944 #endif /* defined(__i386) || defined(__amd64) */
945
946 if (PCIE_PMINFO(dip))
947 pwr_p = PCIE_NEXUS_PMINFO(dip);
948
949 /*
950 * Mark all children to be unknown and bring our power level
951 * to full, if required. This is to avoid any panics while
952 * accessing the child's config space.
953 */
954 if (pwr_p) {
955 mutex_enter(&pwr_p->pwr_lock);
956 if (PCIE_SUPPORTS_DEVICE_PM(dip) &&
957 pwr_p->pwr_func_lvl != PM_LEVEL_D0) {
958 mutex_exit(&pwr_p->pwr_lock);
959 if (pm_raise_power(dip, 0, PM_LEVEL_D0) !=
960 DDI_SUCCESS) {
961 PCIE_DBG("%s(%d): pwr_suspend: attempt "
962 "to raise power from %d to %d "
963 "failed\n", ddi_driver_name(dip),
964 ddi_get_instance(dip), pwr_p->pwr_func_lvl,
965 PM_LEVEL_D0);
966 return (DDI_FAILURE);
967 }
968 mutex_enter(&pwr_p->pwr_lock);
969 }
970 counters = pwr_p->pwr_counters;
971 /*
972 * Update the nexus counters. At the resume time all
973 * components are considered to be at unknown level. Use the
974 * fact that counters for unknown level are at the end.
975 */
976 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
977 counters[PCIE_UNKNOWN_INDEX] += counters[i];
978 counters[i] = 0;
979 }
980 mutex_exit(&pwr_p->pwr_lock);
981 }
982
983 /*
984 * Code taken from pci driver.
985 * Save the state of the configuration headers of child
986 * nodes.
987 */
988 for (cdip = ddi_get_child(dip); cdip != NULL;
989 cdip = ddi_get_next_sibling(cdip)) {
990 boolean_t is_pcie;
991
992 /*
993 * Not interested in children who are not already
994 * init'ed. They will be set up in init_child().
995 */
996 if (i_ddi_node_state(cdip) < DS_INITIALIZED) {
997 PCIE_DBG("%s(%d): DDI_SUSPEND: skipping "
998 "%s%d not in CF1\n", ddi_driver_name(dip),
999 ddi_get_instance(dip), ddi_driver_name(cdip),
1000 ddi_get_instance(cdip));
1001 continue;
1002 }
1003 /*
1004 * Update per child dip counters, if any. Counters
1005 * will not exist if the child is not power manageable
1006 * or if its power entry is never invoked.
1007 */
1008 if (PCIE_PMINFO(cdip) && PCIE_PAR_PMINFO(cdip))
1009 child_counters = PCIE_CHILD_COUNTERS(cdip);
1010 if (child_counters && pwr_p) {
1011 mutex_enter(&pwr_p->pwr_lock);
1012 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
1013 child_counters[PCIE_UNKNOWN_INDEX] +=
1014 child_counters[i];
1015 child_counters[i] = 0;
1016 }
1017 mutex_exit(&pwr_p->pwr_lock);
1018 }
1019
1020 /*
1021 * Only save config registers if not already saved by child.
1022 */
1023 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1024 SAVED_CONFIG_REGS) == 1) {
1025 continue;
1026 }
1027
1028 /*
1029 * The nexus needs to save config registers. Create a property
1030 * so it knows to restore on resume.
1031 */
1032 if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip,
1033 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
1034 PCIE_DBG("%s(%d): %s%d can't update prop %s",
1035 ddi_driver_name(dip), ddi_get_instance(dip),
1036 ddi_driver_name(cdip), ddi_get_instance(cdip),
1037 "nexus-saved-config-regs");
1038 }
1039 PCIE_DBG("%s(%d): DDI_SUSPEND: saving config space for"
1040 " %s%d\n", ddi_driver_name(dip), ddi_get_instance(dip),
1041 ddi_driver_name(cdip), ddi_get_instance(cdip));
1042
1043 /* PCIe workaround: disable errors during 4K config save */
1044 if (is_pcie = pcie_is_pcie(cdip))
1045 pcie_disable_errors(cdip);
1046 (void) pci_save_config_regs(cdip);
1047 if (is_pcie) {
1048 pcie_enable_errors(cdip);
1049 (void) pcie_enable_ce(cdip);
1050 }
1051 }
1052 return (DDI_SUCCESS);
1053 }
1054
1055 #ifdef DEBUG
1056 /*
1057 * Description of bus_power_op.
1058 */
1059 typedef struct pcie_buspwr_desc {
1060 pm_bus_power_op_t pwr_op;
1061 char *pwr_desc;
1062 } pcie_buspwr_desc_t;
1063
1064 static pcie_buspwr_desc_t pcie_buspwr_desc[] = {
1065 {BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"},
1066 {BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"},
1067 {BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"},
1068 {BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"},
1069 {BUS_POWER_HAS_CHANGED, "HAS_CHANGED"},
1070 {BUS_POWER_NOINVOL, "NOINVOL"},
1071 {-1, NULL}
1072 };
1073
1074 /*
1075 * Returns description of the bus_power_op.
1076 */
1077 static char *
pcie_decode_pwr_op(pm_bus_power_op_t op)1078 pcie_decode_pwr_op(pm_bus_power_op_t op)
1079 {
1080 pcie_buspwr_desc_t *descp = pcie_buspwr_desc;
1081
1082 for (; descp->pwr_desc; descp++) {
1083 if (op == descp->pwr_op)
1084 return (descp->pwr_desc);
1085 }
1086 return ("UNKNOWN OP");
1087 }
1088 #endif
1089