1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2019 Joyent, Inc.
25 */
26
27 #include <sys/types.h>
28 #include <sys/ddi.h>
29 #include <sys/kmem.h>
30 #include <sys/sysmacros.h>
31 #include <sys/sunddi.h>
32 #include <sys/sunpm.h>
33 #include <sys/epm.h>
34 #include <sys/sunndi.h>
35 #include <sys/ddi_impldefs.h>
36 #include <sys/ddi_implfuncs.h>
37 #include <sys/pcie.h>
38 #include <sys/pcie_impl.h>
39 #include <sys/promif.h> /* prom_printf */
40 #include <sys/pcie_pwr.h>
41
42 /*
43 * This file implements the power management functionality for
44 * pci express switch and pci express-to-pci/pci-x bridge. All the
45 * code in this file is generic and is not specific to a particular chip.
46 * The algorithm, which decides when to go to a lower power is explained
47 * below:
48 *
49 * 1. Initially when no children are attached, the driver is idle from
50 * PM framework point of view ( PM idle/PM busy).
51 *
52 * 2. Driver is PM busy if either a reference count called pwr_hold is
53 * greater than zero or driver is already at the lowest possible power
54 * level. The lowest possible power level for the driver is equal to the
55 * highest power level among its children. The PM busy condition is
56 * indicated by PCIE_PM_BUSY bit. At any point, only one pm_busy_component
57 * call is made for a nexus driver instance.
58 *
59 * 3. Driver is PM idle if the pwr_hold is zero and the lowest
60 * possible power level is less than the driver's current power level.
61 * At any point, only one pm_idle_component call is made for a nexus
62 * driver instance.
63 *
64 * 4. For any events like child attach, it increments pwr_hold and marks
65 * itslef busy, if it is not already done so. This temporary hold is
66 * removed when the event is complete.
67 *
68 * 5. Any child's power change requires the parent (this driver) to be
69 * full power. So it raises its power and increments pwr_hold. It also
70 * marks itself temporarily busy, if it is not already done. This hold
71 * is removed when the child power change is complete.
72 *
73 * 6. After each child power change, it evaluates what is the lowest
74 * possible power level. If the lowest possible power level is less than
75 * the current power level and pwr_hold is zero, then it marks itself
76 * idle. The lowest power level is equal or greater than the highest level
77 * among the children. It keeps track of children's power level by
78 * using counters.
79 *
80 * 7. Any code e.g., which is accessing the driver's own registers should
81 * place a temporary hold using pcie_pm_hold.
82 */
83
84 static int pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new);
85 static void pwr_update_counters(int *countersp, int olevel, int nlevel);
86 static int pwr_level_allowed(pcie_pwr_t *pwr_p);
87 static void pcie_add_comps(dev_info_t *dip, dev_info_t *cdip,
88 pcie_pwr_t *pwr_p);
89 static void pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip,
90 pcie_pwr_t *pwr_p);
91 static void pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p);
92 static boolean_t pcie_is_pcie(dev_info_t *dip);
93 #ifdef DEBUG
94 static char *pcie_decode_pwr_op(pm_bus_power_op_t op);
95 #else
96 #define pcie_decode_pwr_op
97 #endif
98
99 /*
100 * power entry point.
101 *
102 * This function decides whether the PM request is honorable.
103 * If yes, it then does what's necessary for switch or
104 * bridge to change its power.
105 */
106 /* ARGSUSED */
107 int
pcie_power(dev_info_t * dip,int component,int level)108 pcie_power(dev_info_t *dip, int component, int level)
109 {
110 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip);
111 int *counters = pwr_p->pwr_counters;
112 int pmcaps = pwr_p->pwr_pmcaps;
113 int ret = DDI_FAILURE;
114
115 #if defined(__x86)
116 if (dip)
117 return (DDI_SUCCESS);
118 #endif /* defined(__x86) */
119
120 ASSERT(level != PM_LEVEL_UNKNOWN);
121 /* PM should not asking for a level, which is unsupported */
122 ASSERT(level == PM_LEVEL_D0 || level == PM_LEVEL_D3 ||
123 (level == PM_LEVEL_D1 && (pmcaps & PCIE_SUPPORTS_D1)) ||
124 (level == PM_LEVEL_D2 && (pmcaps & PCIE_SUPPORTS_D2)));
125
126 mutex_enter(&pwr_p->pwr_lock);
127 PCIE_DBG("%s(%d): pcie_power: change from %d to %d\n",
128 ddi_driver_name(dip), ddi_get_instance(dip), pwr_p->pwr_func_lvl,
129 level);
130 if (pwr_p->pwr_func_lvl == level) {
131 PCIE_DBG("%s(%d): pcie_power: already at %d\n",
132 ddi_driver_name(dip), ddi_get_instance(dip), level);
133 ret = DDI_SUCCESS;
134 goto pcie_pwr_done;
135 }
136
137 if (level < pwr_p->pwr_func_lvl) {
138 /*
139 * Going to lower power. Reject this if we are either busy
140 * or there is a hold.
141 */
142 if (pwr_p->pwr_flags & PCIE_PM_BUSY) {
143 PCIE_DBG("%s(%d): pcie_power: rejecting change to %d "
144 "as busy\n", ddi_driver_name(dip),
145 ddi_get_instance(dip), level);
146 goto pcie_pwr_done;
147 }
148
149 /*
150 * Now we know that we are neither busy nor there is a hold.
151 * At this point none of the children should be at full power.
152 * Reject the request if level reqested is lower than the level
153 * possible.
154 */
155 ASSERT(!counters[PCIE_D0_INDEX] &&
156 !counters[PCIE_UNKNOWN_INDEX]);
157 if (level < pwr_level_allowed(pwr_p)) {
158 PCIE_DBG("%s(%d): pcie_power: rejecting level %d as"
159 " %d is the lowest possible\n",
160 ddi_driver_name(dip), ddi_get_instance(dip), level,
161 pwr_level_allowed(pwr_p));
162 goto pcie_pwr_done;
163 }
164 }
165
166 if (pcie_pwr_change(dip, pwr_p, level) != DDI_SUCCESS) {
167 PCIE_DBG("%s(%d): pcie_power: attempt to change to %d "
168 " failed \n", ddi_driver_name(dip), ddi_get_instance(dip),
169 level);
170 goto pcie_pwr_done;
171 }
172 pwr_p->pwr_func_lvl = level;
173 PCIE_DBG("%s(%d): pcie_power: level changed to %d \n",
174 ddi_driver_name(dip), ddi_get_instance(dip), level);
175 ret = DDI_SUCCESS;
176
177 pcie_pwr_done:
178 mutex_exit(&pwr_p->pwr_lock);
179 return (ret);
180 }
181
182 /*
183 * Called by pcie_power() only. Caller holds the pwr_lock.
184 *
185 * dip - dev_info pointer
186 * pwr_p - pm info for the node.
187 * new - new level
188 */
189 static int
pcie_pwr_change(dev_info_t * dip,pcie_pwr_t * pwr_p,int new)190 pcie_pwr_change(dev_info_t *dip, pcie_pwr_t *pwr_p, int new)
191 {
192 uint16_t pmcsr;
193
194 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
195 ASSERT(new != pwr_p->pwr_func_lvl);
196 pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset);
197 pmcsr &= ~PCI_PMCSR_STATE_MASK;
198 switch (new) {
199 case PM_LEVEL_D0:
200 pmcsr |= PCI_PMCSR_D0;
201 break;
202
203 case PM_LEVEL_D1:
204 pmcsr |= PCI_PMCSR_D1;
205 break;
206
207 case PM_LEVEL_D2:
208 pmcsr |= PCI_PMCSR_D2;
209 break;
210
211 case PM_LEVEL_D3:
212 pmcsr |= PCI_PMCSR_D3HOT;
213 break;
214
215 default:
216 ASSERT(0);
217 break;
218 }
219 /* Save config space, if going to D3 */
220 if (new == PM_LEVEL_D3) {
221 PCIE_DBG("%s(%d): pwr_change: saving config space regs\n",
222 ddi_driver_name(dip), ddi_get_instance(dip));
223 if (pci_save_config_regs(dip) != DDI_SUCCESS) {
224 PCIE_DBG("%s(%d): pcie_pwr_change: failed to save "
225 "config space regs\n", ddi_driver_name(dip),
226 ddi_get_instance(dip));
227 return (DDI_FAILURE);
228 }
229 }
230
231 pci_config_put16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset, pmcsr);
232
233 /*
234 * TBD: Taken from pci_pci driver. Is this required?
235 * No bus transactions should occur without waiting for
236 * settle time specified in PCI PM spec rev 2.1 sec 5.6.1
237 * To make things simple, just use the max time specified for
238 * all state transitions.
239 */
240 delay(drv_usectohz(PCI_CLK_SETTLE_TIME));
241
242 /*
243 * Restore config space if coming out of D3
244 */
245 if (pwr_p->pwr_func_lvl == PM_LEVEL_D3) {
246 PCIE_DBG("%s(%d): pcie_pwr_change: restoring config space\n",
247 ddi_driver_name(dip), ddi_get_instance(dip));
248 if (pci_restore_config_regs(dip) != DDI_SUCCESS) {
249 PCIE_DBG("%s(%d): pcie_pwr_change: failed to restore "
250 "config space regs\n", ddi_driver_name(dip),
251 ddi_get_instance(dip));
252 return (DDI_FAILURE);
253 }
254 }
255 return (DDI_SUCCESS);
256 }
257
258 /*
259 * bus_ctlops.bus_power function.
260 *
261 * This function handles PRE_ POST_ change notifications, sent by
262 * PM framework related to child's power level change. It marks itself
263 * idle or busy based on the children's power level.
264 */
265 int
pcie_bus_power(dev_info_t * dip,void * impl_arg,pm_bus_power_op_t op,void * arg,void * result)266 pcie_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
267 void *arg, void *result)
268 {
269 pcie_pwr_t *pwr_p = PCIE_NEXUS_PMINFO(dip);
270 int *counters = pwr_p->pwr_counters; /* nexus counters */
271 int *child_counters; /* per child dip counters */
272 pm_bp_child_pwrchg_t *bpc;
273 pm_bp_has_changed_t *bphc;
274 dev_info_t *cdip;
275 int new_level;
276 int old_level;
277 int rv = DDI_SUCCESS;
278 int level_allowed, comp;
279
280 #if defined(__x86)
281 if (dip)
282 return (DDI_SUCCESS);
283 #endif /* defined(__x86) */
284
285 switch (op) {
286 case BUS_POWER_PRE_NOTIFICATION:
287 case BUS_POWER_POST_NOTIFICATION:
288 bpc = (pm_bp_child_pwrchg_t *)arg;
289 cdip = bpc->bpc_dip;
290 new_level = bpc->bpc_nlevel;
291 old_level = bpc->bpc_olevel;
292 comp = bpc->bpc_comp;
293 break;
294
295 case BUS_POWER_HAS_CHANGED:
296 bphc = (pm_bp_has_changed_t *)arg;
297 cdip = bphc->bphc_dip;
298 new_level = bphc->bphc_nlevel;
299 old_level = bphc->bphc_olevel;
300 comp = bphc->bphc_comp;
301 break;
302
303 default:
304 break;
305
306 }
307
308 ASSERT(pwr_p);
309 mutex_enter(&pwr_p->pwr_lock);
310 switch (op) {
311 case BUS_POWER_PRE_NOTIFICATION:
312 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n",
313 ddi_driver_name(dip), ddi_get_instance(dip),
314 ddi_driver_name(cdip), ddi_get_instance(cdip),
315 pcie_decode_pwr_op(op), old_level, new_level);
316 /*
317 * If the nexus doesn't want the child to go into
318 * non-D0 state, mark the child busy. This way PM
319 * framework will never try to lower the child's power.
320 * In case of pm_lower_power, marking busy won't help.
321 * So we need to specifically reject the attempt to
322 * go to non-D0 state.
323 */
324 if (pwr_p->pwr_flags & PCIE_NO_CHILD_PM) {
325 if (!PCIE_IS_COMPS_COUNTED(cdip)) {
326 PCIE_DBG("%s(%d): pcie_bus_power: marking "
327 "child busy to disable pm \n",
328 ddi_driver_name(dip),
329 ddi_get_instance(dip));
330 (void) pm_busy_component(cdip, 0);
331 }
332 if (new_level < PM_LEVEL_D0 && !comp) {
333 PCIE_DBG("%s(%d): pcie_bus_power: rejecting "
334 "child's attempt to go to %d\n",
335 ddi_driver_name(dip), ddi_get_instance(dip),
336 new_level);
337 rv = DDI_FAILURE;
338 }
339 }
340 mutex_exit(&pwr_p->pwr_lock);
341 if (rv == DDI_SUCCESS)
342 rv = pcie_pm_hold(dip);
343 return (rv);
344
345 case BUS_POWER_HAS_CHANGED:
346 case BUS_POWER_POST_NOTIFICATION:
347 PCIE_DBG("%s(%d): pcie_bus_power: %s@%d op %s %d->%d\n",
348 ddi_driver_name(dip), ddi_get_instance(dip),
349 ddi_driver_name(cdip), ddi_get_instance(cdip),
350 pcie_decode_pwr_op(op), old_level, new_level);
351 /*
352 * Child device power changed
353 * If pm components of this child aren't accounted for
354 * then add the components to the counters. This can't
355 * be done in POST_ATTACH ctlop as pm info isn't created
356 * by then. Also because a driver can make a pm call during
357 * the attach.
358 */
359 if (!PCIE_IS_COMPS_COUNTED(cdip)) {
360 (void) pcie_pm_add_child(dip, cdip);
361 if ((pwr_p->pwr_flags & PCIE_NO_CHILD_PM) &&
362 (op == BUS_POWER_HAS_CHANGED)) {
363 PCIE_DBG("%s(%d): pcie_bus_power: marking "
364 "child busy to disable pm \n",
365 ddi_driver_name(dip),
366 ddi_get_instance(dip));
367 (void) pm_busy_component(cdip, 0);
368 /*
369 * If the driver has already changed to lower
370 * power(pm_power_has_changed) on its own,
371 * there is nothing we can do other than
372 * logging the warning message on the console.
373 */
374 if (new_level < PM_LEVEL_D0)
375 cmn_err(CE_WARN, "!Downstream device "
376 "%s@%d went to non-D0 state: "
377 "possible loss of link\n",
378 ddi_driver_name(cdip),
379 ddi_get_instance(cdip));
380 }
381 }
382
383
384 /*
385 * If it is POST and device PM is supported, release the
386 * hold done in PRE.
387 */
388 if (op == BUS_POWER_POST_NOTIFICATION &&
389 PCIE_SUPPORTS_DEVICE_PM(dip)) {
390 pcie_pm_subrelease(dip, pwr_p);
391 }
392
393 if (*((int *)result) == DDI_FAILURE) {
394 PCIE_DBG("%s(%d): pcie_bus_power: change for %s%d "
395 "failed\n", ddi_driver_name(dip),
396 ddi_get_instance(dip), ddi_driver_name(cdip),
397 ddi_get_instance(cdip));
398 break;
399 }
400 /* Modify counters appropriately */
401 pwr_update_counters(counters, old_level, new_level);
402
403 child_counters = PCIE_CHILD_COUNTERS(cdip);
404 pwr_update_counters(child_counters, old_level, new_level);
405
406 /* If no device PM, return */
407 if (!PCIE_SUPPORTS_DEVICE_PM(dip))
408 break;
409
410 level_allowed = pwr_level_allowed(pwr_p);
411 /*
412 * Check conditions for marking busy
413 * Check the flag to set this busy only once for multiple
414 * busy conditions. Mark busy if our current lowest possible
415 * is equal or greater to the current level.
416 */
417 if (level_allowed >= pwr_p->pwr_func_lvl &&
418 !(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
419 PCIE_DBG("%s(%d): pcie_bus_power: marking busy\n",
420 ddi_driver_name(dip), ddi_get_instance(dip));
421 (void) pm_busy_component(dip, 0);
422 pwr_p->pwr_flags |= PCIE_PM_BUSY;
423 break;
424 }
425 /*
426 * Check conditions for marking idle.
427 * If our lowest possible level is less than our current
428 * level mark idle. Mark idle only if it is not already done.
429 */
430 if ((level_allowed < pwr_p->pwr_func_lvl) &&
431 (pwr_p->pwr_hold == 0) &&
432 (pwr_p->pwr_flags & PCIE_PM_BUSY)) {
433 /*
434 * For pci express, we should check here whether
435 * the link is in L1 state or not.
436 */
437 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n",
438 ddi_driver_name(dip), ddi_get_instance(dip));
439 (void) pm_idle_component(dip, 0);
440 pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
441 break;
442 }
443 break;
444
445 default:
446 mutex_exit(&pwr_p->pwr_lock);
447 return (pm_busop_bus_power(dip, impl_arg, op, arg, result));
448 }
449 mutex_exit(&pwr_p->pwr_lock);
450 return (rv);
451 }
452
453 /*
454 * Decrement the count of children at olevel by one and increment
455 * count of children at nlevel by one.
456 */
457 static void
pwr_update_counters(int * countersp,int olevel,int nlevel)458 pwr_update_counters(int *countersp, int olevel, int nlevel)
459 {
460 uint32_t index;
461
462 ASSERT(olevel >= PM_LEVEL_UNKNOWN && olevel <= PM_LEVEL_D0);
463 ASSERT(nlevel >= PM_LEVEL_UNKNOWN && nlevel <= PM_LEVEL_D0);
464
465 index = (olevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : olevel);
466 countersp[index]--;
467 index = (nlevel == PM_LEVEL_UNKNOWN ? PCIE_UNKNOWN_INDEX : nlevel);
468 countersp[index]++;
469 }
470
471 /*
472 * Returns the lowest possible power level allowed for nexus
473 * based on children's power level. Lowest possible level is
474 * equal to the highest level among the children. It also checks
475 * for the supported level
476 * UNKNOWN = D0 > D1 > D2 > D3
477 */
478 static int
pwr_level_allowed(pcie_pwr_t * pwr_p)479 pwr_level_allowed(pcie_pwr_t *pwr_p)
480 {
481 int *counters = pwr_p->pwr_counters;
482 int i, j;
483
484 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
485 /*
486 * Search from UNKNOWN to D2. unknown is same as D0.
487 * find the highest level among the children. If that
488 * level is supported, return that level. If not,
489 * find the next higher supported level and return that
490 * level. For example, if the D1 is the highest among
491 * children and if D1 isn't supported return D0 as the
492 * lowest possible level. We don't need to look at D3
493 * as that is the default lowest level and it is always
494 * supported.
495 */
496 for (i = PCIE_UNKNOWN_INDEX; i > 0; i--) {
497 if (counters[i]) {
498 if (i == PCIE_UNKNOWN_INDEX)
499 return (PM_LEVEL_D0);
500 /*
501 * i is the highest level among children. If this is
502 * supported, return i.
503 */
504 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, i))
505 return (i);
506 /* find the next higher supported level */
507 for (j = i + 1; j <= PCIE_D0_INDEX; j++) {
508 if (PCIE_LEVEL_SUPPORTED(pwr_p->pwr_pmcaps, j))
509 return (j);
510 }
511 }
512 }
513
514 return (PM_LEVEL_D3);
515 }
516
517 /*
518 * Update the counters with number pm components of the child
519 * all components are assumed to be at UNKNOWN level.
520 */
521 static void
pcie_add_comps(dev_info_t * dip,dev_info_t * cdip,pcie_pwr_t * pwr_p)522 pcie_add_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p)
523 {
524 int comps = PM_NUMCMPTS(cdip);
525 pcie_pm_t *pcie_pm_p;
526 pcie_pwr_child_t *cpwr_p;
527
528 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
529 if (!comps)
530 return;
531
532 PCIE_DBG("%s(%d): pcie_add_comps: unknown level counter incremented "
533 "from %d by %d because of %s@%d\n",
534 ddi_driver_name(dip), ddi_get_instance(dip),
535 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX], comps,
536 ddi_driver_name(cdip), ddi_get_instance(cdip));
537 (pwr_p->pwr_counters)[PCIE_UNKNOWN_INDEX] += comps;
538 /*
539 * Allocate counters per child. This is a part of pcie
540 * pm info. If there is no pcie pm info, allocate it here.
541 * pcie pm info might already be there for pci express nexus
542 * driver e.g. pcieb. For all leaf nodes, it is allocated here.
543 */
544 if ((pcie_pm_p = PCIE_PMINFO(cdip)) == NULL) {
545 pcie_pm_p = (pcie_pm_t *)kmem_zalloc(
546 sizeof (pcie_pm_t), KM_SLEEP);
547 PCIE_SET_PMINFO(cdip, pcie_pm_p);
548 }
549 cpwr_p = (pcie_pwr_child_t *)kmem_zalloc(sizeof (pcie_pwr_child_t),
550 KM_SLEEP);
551 pcie_pm_p->pcie_par_pminfo = cpwr_p;
552 (cpwr_p->pwr_child_counters)[PCIE_UNKNOWN_INDEX] += comps;
553 }
554
555 /*
556 * Remove the pm components of a child from our counters.
557 */
558 static void
pcie_remove_comps(dev_info_t * dip,dev_info_t * cdip,pcie_pwr_t * pwr_p)559 pcie_remove_comps(dev_info_t *dip, dev_info_t *cdip, pcie_pwr_t *pwr_p)
560 {
561 int i;
562 int *child_counters;
563
564 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
565 if (!(PCIE_PMINFO(cdip)) || !PCIE_PAR_PMINFO(cdip)) {
566 if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
567 /*
568 * Driver never made a PM call and we didn't create
569 * any counters for this device. This also means that
570 * hold made at the PRE_ATTACH time, still remains.
571 * Remove the hold now. The correct thing to do is to
572 * stay at full power when a child is at full power
573 * whether a driver is there or not. This will be
574 * implemented in the future.
575 */
576 pcie_pm_subrelease(dip, pwr_p);
577 }
578 return;
579 }
580 PCIE_DBG("%s(%d): pcie_remove_comps:counters decremented because of "
581 "%s@%d\n", ddi_driver_name(dip), ddi_get_instance(dip),
582 ddi_driver_name(cdip), ddi_get_instance(cdip));
583 child_counters = PCIE_CHILD_COUNTERS(cdip);
584 /*
585 * Adjust the nexus counters. No need to adjust per child dip
586 * counters as we are freeing the per child dip info.
587 */
588 for (i = 0; i < PCIE_MAX_PWR_LEVELS; i++) {
589 ASSERT((pwr_p->pwr_counters)[i] >= child_counters[i]);
590 (pwr_p->pwr_counters)[i] -= child_counters[i];
591 }
592 /* remove both parent pm info and pcie pminfo itself */
593 kmem_free(PCIE_PAR_PMINFO(cdip), sizeof (pcie_pwr_child_t));
594 kmem_free(PCIE_PMINFO(cdip), sizeof (pcie_pm_t));
595 PCIE_RESET_PMINFO(cdip);
596 }
597
598 /*
599 * Power management related initialization common to px and pcieb
600 */
601 int
pwr_common_setup(dev_info_t * dip)602 pwr_common_setup(dev_info_t *dip)
603 {
604 pcie_pm_t *pcie_pm_p;
605 pcie_pwr_t *pwr_p;
606 int pminfo_created = 0;
607
608 /* Create pminfo, if it doesn't exist already */
609 if ((pcie_pm_p = PCIE_PMINFO(dip)) == NULL) {
610 pcie_pm_p = (pcie_pm_t *)kmem_zalloc(
611 sizeof (pcie_pm_t), KM_SLEEP);
612 PCIE_SET_PMINFO(dip, pcie_pm_p);
613 pminfo_created = 1;
614 }
615 pwr_p = (pcie_pwr_t *)kmem_zalloc(sizeof (pcie_pwr_t), KM_SLEEP);
616 mutex_init(&pwr_p->pwr_lock, NULL, MUTEX_DRIVER, NULL);
617 /* Initialize the power level and default level support */
618 pwr_p->pwr_func_lvl = PM_LEVEL_UNKNOWN;
619 pwr_p->pwr_pmcaps = PCIE_DEFAULT_LEVEL_SUPPORTED;
620
621 if (pcie_plat_pwr_setup(dip) != DDI_SUCCESS)
622 goto pwr_common_err;
623
624 pcie_pm_p->pcie_pwr_p = pwr_p;
625 return (DDI_SUCCESS);
626
627 pwr_common_err:
628 mutex_destroy(&pwr_p->pwr_lock);
629 kmem_free(pwr_p, sizeof (pcie_pwr_t));
630 if (pminfo_created) {
631 PCIE_RESET_PMINFO(dip);
632 kmem_free(pcie_pm_p, sizeof (pcie_pm_t));
633 }
634 return (DDI_FAILURE);
635
636 }
637
638 /*
639 * Undo whatever is done in pwr_common_setup. Called by px_detach or pxb_detach
640 */
641 void
pwr_common_teardown(dev_info_t * dip)642 pwr_common_teardown(dev_info_t *dip)
643 {
644 pcie_pm_t *pcie_pm_p = PCIE_PMINFO(dip);
645 pcie_pwr_t *pwr_p;
646
647 if (!pcie_pm_p || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
648 return;
649
650 pcie_plat_pwr_teardown(dip);
651 mutex_destroy(&pwr_p->pwr_lock);
652 pcie_pm_p->pcie_pwr_p = NULL;
653 kmem_free(pwr_p, sizeof (pcie_pwr_t));
654 /*
655 * If the parent didn't store have any pm info about
656 * this node, that means parent doesn't need pminfo when it handles
657 * POST_DETACH for this node. For example, if dip is the dip of
658 * root complex, then there is no parent pm info.
659 */
660 if (!PCIE_PAR_PMINFO(dip)) {
661 kmem_free(pcie_pm_p, sizeof (pcie_pm_t));
662 PCIE_RESET_PMINFO(dip);
663 }
664 }
665
666 /*
667 * Raises the power and marks itself busy.
668 */
669 int
pcie_pm_hold(dev_info_t * dip)670 pcie_pm_hold(dev_info_t *dip)
671 {
672 pcie_pwr_t *pwr_p;
673
674 /* If no PM info or no device PM, return */
675 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) ||
676 !(PCIE_SUPPORTS_DEVICE_PM(dip)))
677 return (DDI_SUCCESS);
678
679 /*
680 * If we are not at full power, then powerup.
681 * Need to be at full power so that link can be
682 * at L0. Similarly for PCI/PCI-X bus, it should be
683 * at full power.
684 */
685 mutex_enter(&pwr_p->pwr_lock);
686 ASSERT(pwr_p->pwr_hold >= 0);
687 PCIE_DBG("%s(%d): pm_hold: incrementing hold \n",
688 ddi_driver_name(dip), ddi_get_instance(dip));
689 pwr_p->pwr_hold++;
690 /* Mark itself busy, if it is not done already */
691 if (!(pwr_p->pwr_flags & PCIE_PM_BUSY)) {
692 PCIE_DBG("%s(%d): pm_hold: marking busy\n",
693 ddi_driver_name(dip), ddi_get_instance(dip));
694 pwr_p->pwr_flags |= PCIE_PM_BUSY;
695 (void) pm_busy_component(dip, 0);
696 }
697 if (pwr_p->pwr_func_lvl == PM_LEVEL_D0) {
698 mutex_exit(&pwr_p->pwr_lock);
699 return (DDI_SUCCESS);
700 }
701 mutex_exit(&pwr_p->pwr_lock);
702 if (pm_raise_power(dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) {
703 PCIE_DBG("%s(%d): pm_hold: attempt to raise power "
704 "from %d to %d failed\n", ddi_driver_name(dip),
705 ddi_get_instance(dip), pwr_p->pwr_func_lvl,
706 PM_LEVEL_D0);
707 pcie_pm_release(dip);
708 return (DDI_FAILURE);
709 }
710 return (DDI_SUCCESS);
711 }
712
713 /*
714 * Reverse the things done in pcie_pm_hold
715 */
716 void
pcie_pm_release(dev_info_t * dip)717 pcie_pm_release(dev_info_t *dip)
718 {
719 pcie_pwr_t *pwr_p;
720
721 /* If no PM info or no device PM, return */
722 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)) ||
723 !(PCIE_SUPPORTS_DEVICE_PM(dip)))
724 return;
725
726 mutex_enter(&pwr_p->pwr_lock);
727 pcie_pm_subrelease(dip, pwr_p);
728 mutex_exit(&pwr_p->pwr_lock);
729 }
730
731 static void
pcie_pm_subrelease(dev_info_t * dip,pcie_pwr_t * pwr_p)732 pcie_pm_subrelease(dev_info_t *dip, pcie_pwr_t *pwr_p)
733 {
734 int level;
735
736 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
737 ASSERT(pwr_p->pwr_hold > 0);
738 PCIE_DBG("%s(%d): pm_subrelease: decrementing hold \n",
739 ddi_driver_name(dip), ddi_get_instance(dip));
740 pwr_p->pwr_hold--;
741 ASSERT(pwr_p->pwr_hold >= 0);
742 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY);
743 level = pwr_level_allowed(pwr_p);
744 if (pwr_p->pwr_hold == 0 && level < pwr_p->pwr_func_lvl) {
745 PCIE_DBG("%s(%d): pm_subrelease: marking idle \n",
746 ddi_driver_name(dip), ddi_get_instance(dip));
747 (void) pm_idle_component(dip, 0);
748 pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
749 }
750 }
751
752 /*
753 * Called when the child makes the first power management call.
754 * sets up the counters. All the components of the child device are
755 * assumed to be at unknown level. It also releases the power hold
756 * pwr_p - parent's pwr_t
757 * cdip - child's dip
758 */
759 int
pcie_pm_add_child(dev_info_t * dip,dev_info_t * cdip)760 pcie_pm_add_child(dev_info_t *dip, dev_info_t *cdip)
761 {
762 pcie_pwr_t *pwr_p;
763
764 /* If no PM info, return */
765 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
766 return (DDI_SUCCESS);
767
768 ASSERT(MUTEX_HELD(&pwr_p->pwr_lock));
769 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0);
770 pcie_add_comps(dip, cdip, pwr_p);
771
772 /* If no device power management then return */
773 if (!PCIE_SUPPORTS_DEVICE_PM(dip))
774 return (DDI_SUCCESS);
775
776 /*
777 * We have informed PM that we are busy at PRE_ATTACH time for
778 * this child. Release the hold and but don't clear the busy bit.
779 * If a device never changes power, hold will not be released
780 * and we stay at full power.
781 */
782 ASSERT(pwr_p->pwr_hold > 0);
783 PCIE_DBG("%s(%d): pm_add_child: decrementing hold \n",
784 ddi_driver_name(dip), ddi_get_instance(dip));
785 pwr_p->pwr_hold--;
786 /*
787 * We must have made sure that busy bit
788 * is set when we put the hold
789 */
790 ASSERT(pwr_p->pwr_flags & PCIE_PM_BUSY);
791 return (DDI_SUCCESS);
792 }
793
794 /*
795 * Adjust the counters when a child detaches
796 * Marks itself idle if the idle conditions are met.
797 * Called at POST_DETACH time
798 */
799 int
pcie_pm_remove_child(dev_info_t * dip,dev_info_t * cdip)800 pcie_pm_remove_child(dev_info_t *dip, dev_info_t *cdip)
801 {
802 int *counters;
803 int total;
804 pcie_pwr_t *pwr_p;
805
806 /* If no PM info, return */
807 if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
808 return (DDI_SUCCESS);
809
810 counters = pwr_p->pwr_counters;
811 mutex_enter(&pwr_p->pwr_lock);
812 pcie_remove_comps(dip, cdip, pwr_p);
813 /* If no device power management then return */
814 if (!PCIE_SUPPORTS_DEVICE_PM(dip)) {
815 mutex_exit(&pwr_p->pwr_lock);
816 return (DDI_SUCCESS);
817 }
818 total = (counters[PCIE_D0_INDEX] + counters[PCIE_UNKNOWN_INDEX] +
819 counters[PCIE_D1_INDEX] + counters[PCIE_D2_INDEX] +
820 counters[PCIE_D3_INDEX]);
821 /*
822 * Mark idle if either there are no children or our lowest
823 * possible level is less than the current level. Mark idle
824 * only if it is not already done.
825 */
826 if ((pwr_p->pwr_hold == 0) &&
827 (!total || (pwr_level_allowed(pwr_p) < pwr_p->pwr_func_lvl))) {
828 if (pwr_p->pwr_flags & PCIE_PM_BUSY) {
829 PCIE_DBG("%s(%d): pcie_bus_power: marking idle\n",
830 ddi_driver_name(dip), ddi_get_instance(dip));
831 (void) pm_idle_component(dip, 0);
832 pwr_p->pwr_flags &= ~PCIE_PM_BUSY;
833 }
834 }
835 mutex_exit(&pwr_p->pwr_lock);
836 return (DDI_SUCCESS);
837 }
838
839 boolean_t
pcie_is_pcie(dev_info_t * dip)840 pcie_is_pcie(dev_info_t *dip)
841 {
842 pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
843 ASSERT(bus_p);
844 return (bus_p->bus_pcie_off != 0);
845 }
846
847 /*
848 * Called by px_attach or pcieb_attach:: DDI_RESUME
849 */
850 int
pcie_pwr_resume(dev_info_t * dip)851 pcie_pwr_resume(dev_info_t *dip)
852 {
853 dev_info_t *cdip;
854 pcie_pwr_t *pwr_p = NULL;
855
856 #if defined(__x86)
857 if (dip)
858 return (DDI_SUCCESS);
859 #endif /* defined(__x86) */
860
861 if (PCIE_PMINFO(dip))
862 pwr_p = PCIE_NEXUS_PMINFO(dip);
863
864 if (pwr_p) {
865 /* Inform the PM framework that dip is at full power */
866 if (PCIE_SUPPORTS_DEVICE_PM(dip)) {
867 ASSERT(pwr_p->pwr_func_lvl == PM_LEVEL_D0);
868 (void) pm_raise_power(dip, 0,
869 pwr_p->pwr_func_lvl);
870 }
871 }
872
873 /*
874 * Code taken from pci driver.
875 * Restore config registers for children that did not save
876 * their own registers. Children pwr states are UNKNOWN after
877 * a resume since it is possible for the PM framework to call
878 * resume without an actual power cycle. (ie if suspend fails).
879 */
880 for (cdip = ddi_get_child(dip); cdip != NULL;
881 cdip = ddi_get_next_sibling(cdip)) {
882 boolean_t is_pcie;
883
884 /*
885 * Not interested in children who are not already
886 * init'ed. They will be set up by init_child().
887 */
888 if (i_ddi_node_state(cdip) < DS_INITIALIZED) {
889 PCIE_DBG("%s(%d): "
890 "DDI_RESUME: skipping %s%d not in CF1\n",
891 ddi_driver_name(dip), ddi_get_instance(dip),
892 ddi_driver_name(cdip), ddi_get_instance(cdip));
893 continue;
894 }
895
896 /*
897 * Only restore config registers if saved by nexus.
898 */
899 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
900 "nexus-saved-config-regs") != 1)
901 continue;
902
903 PCIE_DBG("%s(%d): "
904 "DDI_RESUME: nexus restoring %s%d config regs\n",
905 ddi_driver_name(dip), ddi_get_instance(dip),
906 ddi_driver_name(cdip), ddi_get_instance(cdip));
907
908 /* clear errors left by OBP scrubbing */
909 pcie_clear_errors(cdip);
910
911 /* PCIe workaround: disable errors during 4K config resore */
912 is_pcie = pcie_is_pcie(cdip);
913 if (is_pcie)
914 pcie_disable_errors(cdip);
915 (void) pci_restore_config_regs(cdip);
916 if (is_pcie) {
917 pcie_enable_errors(cdip);
918 (void) pcie_enable_ce(cdip);
919 }
920
921 if (ndi_prop_remove(DDI_DEV_T_NONE, cdip,
922 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
923 PCIE_DBG("%s(%d): %s%d can't remove prop %s",
924 ddi_driver_name(dip), ddi_get_instance(dip),
925 ddi_driver_name(cdip), ddi_get_instance(cdip),
926 "nexus-saved-config-regs");
927 }
928 }
929 return (DDI_SUCCESS);
930 }
931
932 /*
933 * Called by pcie_detach or pcieb_detach:: DDI_SUSPEND
934 */
935 int
pcie_pwr_suspend(dev_info_t * dip)936 pcie_pwr_suspend(dev_info_t *dip)
937 {
938 dev_info_t *cdip;
939 int i, *counters; /* per nexus counters */
940 int *child_counters = NULL; /* per child dip counters */
941 pcie_pwr_t *pwr_p = NULL;
942
943 #if defined(__x86)
944 if (dip)
945 return (DDI_SUCCESS);
946 #endif /* defined(__x86) */
947
948 if (PCIE_PMINFO(dip))
949 pwr_p = PCIE_NEXUS_PMINFO(dip);
950
951 /*
952 * Mark all children to be unknown and bring our power level
953 * to full, if required. This is to avoid any panics while
954 * accessing the child's config space.
955 */
956 if (pwr_p) {
957 mutex_enter(&pwr_p->pwr_lock);
958 if (PCIE_SUPPORTS_DEVICE_PM(dip) &&
959 pwr_p->pwr_func_lvl != PM_LEVEL_D0) {
960 mutex_exit(&pwr_p->pwr_lock);
961 if (pm_raise_power(dip, 0, PM_LEVEL_D0) !=
962 DDI_SUCCESS) {
963 PCIE_DBG("%s(%d): pwr_suspend: attempt "
964 "to raise power from %d to %d "
965 "failed\n", ddi_driver_name(dip),
966 ddi_get_instance(dip), pwr_p->pwr_func_lvl,
967 PM_LEVEL_D0);
968 return (DDI_FAILURE);
969 }
970 mutex_enter(&pwr_p->pwr_lock);
971 }
972 counters = pwr_p->pwr_counters;
973 /*
974 * Update the nexus counters. At the resume time all
975 * components are considered to be at unknown level. Use the
976 * fact that counters for unknown level are at the end.
977 */
978 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
979 counters[PCIE_UNKNOWN_INDEX] += counters[i];
980 counters[i] = 0;
981 }
982 mutex_exit(&pwr_p->pwr_lock);
983 }
984
985 /*
986 * Code taken from pci driver.
987 * Save the state of the configuration headers of child
988 * nodes.
989 */
990 for (cdip = ddi_get_child(dip); cdip != NULL;
991 cdip = ddi_get_next_sibling(cdip)) {
992 boolean_t is_pcie;
993
994 /*
995 * Not interested in children who are not already
996 * init'ed. They will be set up in init_child().
997 */
998 if (i_ddi_node_state(cdip) < DS_INITIALIZED) {
999 PCIE_DBG("%s(%d): DDI_SUSPEND: skipping "
1000 "%s%d not in CF1\n", ddi_driver_name(dip),
1001 ddi_get_instance(dip), ddi_driver_name(cdip),
1002 ddi_get_instance(cdip));
1003 continue;
1004 }
1005 /*
1006 * Update per child dip counters, if any. Counters
1007 * will not exist if the child is not power manageable
1008 * or if its power entry is never invoked.
1009 */
1010 if (PCIE_PMINFO(cdip) && PCIE_PAR_PMINFO(cdip))
1011 child_counters = PCIE_CHILD_COUNTERS(cdip);
1012 if (child_counters && pwr_p) {
1013 mutex_enter(&pwr_p->pwr_lock);
1014 for (i = 0; i < PCIE_UNKNOWN_INDEX; i++) {
1015 child_counters[PCIE_UNKNOWN_INDEX] +=
1016 child_counters[i];
1017 child_counters[i] = 0;
1018 }
1019 mutex_exit(&pwr_p->pwr_lock);
1020 }
1021
1022 /*
1023 * Only save config registers if not already saved by child.
1024 */
1025 if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1026 SAVED_CONFIG_REGS) == 1) {
1027 continue;
1028 }
1029
1030 /*
1031 * The nexus needs to save config registers. Create a property
1032 * so it knows to restore on resume.
1033 */
1034 if (ndi_prop_create_boolean(DDI_DEV_T_NONE, cdip,
1035 "nexus-saved-config-regs") != DDI_PROP_SUCCESS) {
1036 PCIE_DBG("%s(%d): %s%d can't update prop %s",
1037 ddi_driver_name(dip), ddi_get_instance(dip),
1038 ddi_driver_name(cdip), ddi_get_instance(cdip),
1039 "nexus-saved-config-regs");
1040 }
1041 PCIE_DBG("%s(%d): DDI_SUSPEND: saving config space for"
1042 " %s%d\n", ddi_driver_name(dip), ddi_get_instance(dip),
1043 ddi_driver_name(cdip), ddi_get_instance(cdip));
1044
1045 /* PCIe workaround: disable errors during 4K config save */
1046 is_pcie = pcie_is_pcie(cdip);
1047 if (is_pcie)
1048 pcie_disable_errors(cdip);
1049 (void) pci_save_config_regs(cdip);
1050 if (is_pcie) {
1051 pcie_enable_errors(cdip);
1052 (void) pcie_enable_ce(cdip);
1053 }
1054 }
1055 return (DDI_SUCCESS);
1056 }
1057
1058 #ifdef DEBUG
1059 /*
1060 * Description of bus_power_op.
1061 */
1062 typedef struct pcie_buspwr_desc {
1063 pm_bus_power_op_t pwr_op;
1064 char *pwr_desc;
1065 } pcie_buspwr_desc_t;
1066
1067 static pcie_buspwr_desc_t pcie_buspwr_desc[] = {
1068 {BUS_POWER_CHILD_PWRCHG, "CHILD_PWRCHG"},
1069 {BUS_POWER_NEXUS_PWRUP, "NEXUS_PWRUP"},
1070 {BUS_POWER_PRE_NOTIFICATION, "PRE_NOTIFICATION"},
1071 {BUS_POWER_POST_NOTIFICATION, "POST_NOTIFICATION"},
1072 {BUS_POWER_HAS_CHANGED, "HAS_CHANGED"},
1073 {BUS_POWER_NOINVOL, "NOINVOL"},
1074 {-1, NULL}
1075 };
1076
1077 /*
1078 * Returns description of the bus_power_op.
1079 */
1080 static char *
pcie_decode_pwr_op(pm_bus_power_op_t op)1081 pcie_decode_pwr_op(pm_bus_power_op_t op)
1082 {
1083 pcie_buspwr_desc_t *descp = pcie_buspwr_desc;
1084
1085 for (; descp->pwr_desc; descp++) {
1086 if (op == descp->pwr_op)
1087 return (descp->pwr_desc);
1088 }
1089 return ("UNKNOWN OP");
1090 }
1091 #endif
1092