xref: /titanic_41/usr/src/uts/common/os/sunpm.c (revision c9431fa1e59a88c2f0abf611f25b97af964449e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sunpm.c builds sunpm.o	"power management framework"
30  *	kernel-resident power management code.  Implements power management
31  *	policy
32  *	Assumes: all backwards compat. device components wake up on &
33  *		 the pm_info pointer in dev_info is initially NULL
34  *
35  * PM - (device) Power Management
36  *
37  * Each device may have 0 or more components.  If a device has no components,
38  * then it can't be power managed.  Each component has 2 or more
39  * power states.
40  *
41  * "Backwards Compatible" (bc) devices:
42  * There are two different types of devices from the point of view of this
43  * code.  The original type, left over from the original PM implementation on
44  * the voyager platform are known in this code as "backwards compatible"
45  * devices (PM_ISBC(dip) returns true).
46  * They are recognized by the pm code by the lack of a pm-components property
47  * and a call made by the driver to pm_create_components(9F).
48  * For these devices, component 0 is special, and represents the power state
49  * of the device.  If component 0 is to be set to power level 0 (off), then
50  * the framework must first call into the driver's detach(9E) routine with
51  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
52  * After setting component 0 from 0 to a non-zero power level, a call must be
53  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
54  *
55  * Currently, the only way to get a bc device power managed is via a set of
56  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
57  *
58  * For non-bc devices, the driver describes the components by exporting a
59  * pm-components(9P) property that tells how many components there are,
60  * tells what each component's power state values are, and provides human
61  * readable strings (currently unused) for each component name and power state.
62  * Devices which export pm-components(9P) are automatically power managed
63  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
64  * after parsing power.conf(4)). The exception to this rule is that power
65  * manageable CPU devices may be automatically managed independently of autopm
66  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
67  * ioctls) cpupm. If the CPU devices are not managed independently, then they
68  * are managed by autopm. In either case, for automatically power managed
69  * devices, all components are considered independent of each other, and it is
70  * up to the driver to decide when a transition requires saving or restoring
71  * hardware state.
72  *
73  * Each device component also has a threshold time associated with each power
74  * transition (see power.conf(4)), and a busy/idle state maintained by the
75  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
76  * Components are created idle.
77  *
78  * The PM framework provides several functions:
79  * -implement PM policy as described in power.conf(4)
80  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
81  *  Policies consist of:
82  *    -set threshold values (defaults if none provided by pmconfig)
83  *    -set dependencies among devices
84  *    -enable/disable autopm
85  *    -enable/disable cpupm
86  *    -turn down idle components based on thresholds (if autopm or cpupm is
87  *     enabled) (aka scanning)
88  *    -maintain power states based on dependencies among devices
89  *    -upon request, or when the frame buffer powers off, attempt to turn off
90  *     all components that are idle or become idle over the next (10 sec)
91  *     period in an attempt to get down to an EnergyStar compliant state
92  *    -prevent powering off of a device which exported the
93  *     pm-no-involuntary-power-cycles property without active involvement of
94  *     the device's driver (so no removing power when the device driver is
95  *     not attached)
96  * -provide a mechanism for a device driver to request that a device's component
97  *  be brought back to the power level necessary for the use of the device
98  * -allow a process to directly control the power levels of device components
99  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
100  * -ensure that the console frame buffer is powered up before being referenced
101  *  via prom_printf() or other prom calls that might generate console output
102  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
103  * -provide "backwards compatible" behavior for devices without pm-components
104  *  property
105  *
106  * Scanning:
107  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
108  * component of each managed device to its lowest power based on the threshold
109  * of idleness associated with each transition and the busy/idle state of the
110  * component.
111  *
112  * The actual work of this is done by pm_scan_dev(), which cycles through each
113  * component of a device, checking its idleness against its current threshold,
114  * and calling pm_set_power() as appropriate to change the power level.
115  * This function also indicates when it would next be profitable to scan the
116  * device again, and a new scan is scheduled after that time.
117  *
118  * Dependencies:
119  * It is possible to establish a dependency between the power states of two
120  * otherwise unrelated devices.  This is currently done to ensure that the
121  * cdrom is always up whenever the console framebuffer is up, so that the user
122  * can insert a cdrom and see a popup as a result.
123  *
124  * The dependency terminology used in power.conf(4) is not easy to understand,
125  * so we've adopted a different terminology in the implementation.  We write
126  * of a "keeps up" and a "kept up" device.  A relationship can be established
127  * where one device keeps up another.  That means that if the keepsup device
128  * has any component that is at a non-zero power level, all components of the
129  * "kept up" device must be brought to full power.  This relationship is
130  * asynchronous.  When the keeping device is powered up, a request is queued
131  * to a worker thread to bring up the kept device.  The caller does not wait.
132  * Scan will not turn down a kept up device.
133  *
134  * Direct PM:
135  * A device may be directly power managed by a process.  If a device is
136  * directly pm'd, then it will not be scanned, and dependencies will not be
137  * enforced.  * If a directly pm'd device's driver requests a power change (via
138  * pm_raise_power(9F)), then the request is blocked and notification is sent
139  * to the controlling process, which must issue the requested power change for
140  * the driver to proceed.
141  *
142  */
143 
144 #include <sys/types.h>
145 #include <sys/errno.h>
146 #include <sys/callb.h>		/* callback registration during CPR */
147 #include <sys/conf.h>		/* driver flags and functions */
148 #include <sys/open.h>		/* OTYP_CHR definition */
149 #include <sys/stat.h>		/* S_IFCHR definition */
150 #include <sys/pathname.h>	/* name -> dev_info xlation */
151 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
152 #include <sys/kmem.h>		/* memory alloc stuff */
153 #include <sys/debug.h>
154 #include <sys/archsystm.h>
155 #include <sys/pm.h>
156 #include <sys/ddi.h>
157 #include <sys/sunddi.h>
158 #include <sys/sunndi.h>
159 #include <sys/sunpm.h>
160 #include <sys/epm.h>
161 #include <sys/vfs.h>
162 #include <sys/mode.h>
163 #include <sys/mkdev.h>
164 #include <sys/promif.h>
165 #include <sys/consdev.h>
166 #include <sys/esunddi.h>
167 #include <sys/modctl.h>
168 #include <sys/fs/ufs_fs.h>
169 #include <sys/note.h>
170 #include <sys/taskq.h>
171 #include <sys/bootconf.h>
172 #include <sys/reboot.h>
173 #include <sys/spl.h>
174 #include <sys/disp.h>
175 #include <sys/sobject.h>
176 #include <sys/sunmdi.h>
177 
178 
179 /*
180  * PM LOCKING
181  *	The list of locks:
182  * Global pm mutex locks.
183  *
184  * pm_scan_lock:
185  *		It protects the timeout id of the scan thread, and the value
186  *		of autopm_enabled and cpupm.  This lock is not held
187  *		concurrently with any other PM locks.
188  *
189  * pm_clone_lock:	Protects the clone list and count of poll events
190  *		pending for the pm driver.
191  *		Lock ordering:
192  *			pm_clone_lock -> pm_pscc_interest_rwlock,
193  *			pm_clone_lock -> pm_pscc_direct_rwlock.
194  *
195  * pm_rsvp_lock:
196  *		Used to synchronize the data structures used for processes
197  *		to rendezvous with state change information when doing
198  *		direct PM.
199  *		Lock ordering:
200  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
201  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
202  *			pm_rsvp_lock -> pm_clone_lock.
203  *
204  * ppm_lock:	protects the list of registered ppm drivers
205  *		Lock ordering:
206  *			ppm_lock -> ppm driver unit_lock
207  *
208  * pm_compcnt_lock:
209  *		Protects count of components that are not at their lowest
210  *		power level.
211  *		Lock ordering:
212  *			pm_compcnt_lock -> ppm_lock.
213  *
214  * pm_dep_thread_lock:
215  *		Protects work list for pm_dep_thread.  Not taken concurrently
216  *		with any other pm lock.
217  *
218  * pm_remdrv_lock:
219  *		Serializes the operation of removing noinvol data structure
220  *		entries for a branch of the tree when a driver has been
221  *		removed from the system (modctl_rem_major).
222  *		Lock ordering:
223  *			pm_remdrv_lock -> pm_noinvol_rwlock.
224  *
225  * pm_cfb_lock: (High level spin lock)
226  *		Protects the count of how many components of the console
227  *		frame buffer are off (so we know if we have to bring up the
228  *		console as a result of a prom_printf, etc.
229  *		No other locks are taken while holding this lock.
230  *
231  * pm_loan_lock:
232  *		Protects the lock_loan list.  List is used to record that one
233  *		thread has acquired a power lock but has launched another thread
234  *		to complete its processing.  An entry in the list indicates that
235  *		the worker thread can borrow the lock held by the other thread,
236  *		which must block on the completion of the worker.  Use is
237  *		specific to module loading.
238  *		No other locks are taken while holding this lock.
239  *
240  * Global PM rwlocks
241  *
242  * pm_thresh_rwlock:
243  *		Protects the list of thresholds recorded for future use (when
244  *		devices attach).
245  *		Lock ordering:
246  *			pm_thresh_rwlock -> devi_pm_lock
247  *
248  * pm_noinvol_rwlock:
249  *		Protects list of detached nodes that had noinvol registered.
250  *		No other PM locks are taken while holding pm_noinvol_rwlock.
251  *
252  * pm_pscc_direct_rwlock:
253  *		Protects the list that maps devices being directly power
254  *		managed to the processes that manage them.
255  *		Lock ordering:
256  *			pm_pscc_direct_rwlock -> psce_lock
257  *
258  * pm_pscc_interest_rwlock;
259  *		Protects the list that maps state change events to processes
260  *		that want to know about them.
261  *		Lock ordering:
262  *			pm_pscc_interest_rwlock -> psce_lock
263  *
264  * per-dip locks:
265  *
266  * Each node has these per-dip locks, which are only used if the device is
267  * a candidate for power management (e.g. has pm components)
268  *
269  * devi_pm_lock:
270  *		Protects all power management state of the node except for
271  *		power level, which is protected by ndi_devi_enter().
272  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
273  *		Lock ordering:
274  *			devi_pm_lock -> pm_rsvp_lock,
275  *			devi_pm_lock -> pm_dep_thread_lock,
276  *			devi_pm_lock -> pm_noinvol_rwlock,
277  *			devi_pm_lock -> power lock
278  *
279  * power lock (ndi_devi_enter()):
280  *		Since changing power level is possibly a slow operation (30
281  *		seconds to spin up a disk drive), this is locked separately.
282  *		Since a call into the driver to change the power level of one
283  *		component may result in a call back into the framework to change
284  *		the power level of another, this lock allows re-entrancy by
285  *		the same thread (ndi_devi_enter is used for this because
286  *		the USB framework uses ndi_devi_enter in its power entry point,
287  *		and use of any other lock would produce a deadlock.
288  *
289  * devi_pm_busy_lock:
290  *		This lock protects the integrity of the busy count.  It is
291  *		only taken by pm_busy_component() and pm_idle_component and
292  *		some code that adjust the busy time after the timer gets set
293  *		up or after a CPR operation.  It is per-dip to keep from
294  *		single-threading all the disk drivers on a system.
295  *		It could be per component instead, but most devices have
296  *		only one component.
297  *		No other PM locks are taken while holding this lock.
298  *
299  */
300 
301 static int stdout_is_framebuffer;
302 static kmutex_t	e_pm_power_lock;
303 static kmutex_t pm_loan_lock;
304 kmutex_t	pm_scan_lock;
305 callb_id_t	pm_cpr_cb_id;
306 callb_id_t	pm_panic_cb_id;
307 callb_id_t	pm_halt_cb_id;
308 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
309 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
310 
311 clock_t pm_min_scan = PM_MIN_SCAN;
312 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
313 
314 static int pm_busop_set_power(dev_info_t *,
315     void *, pm_bus_power_op_t, void *, void *);
316 static int pm_busop_match_request(dev_info_t *, void *);
317 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
318 
319 /*
320  * Dependency Processing is done thru a seperate thread.
321  */
322 kmutex_t	pm_dep_thread_lock;
323 kcondvar_t	pm_dep_thread_cv;
324 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
325 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
326 
327 /*
328  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
329  * power managing things in single user mode that have been suppressed via
330  * power.conf entries.  Protected by pm_scan_lock.
331  */
332 int		autopm_enabled;
333 
334 /*
335  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
336  * to define the power management behavior of CPU devices separate from
337  * autopm. Protected by pm_scan_lock.
338  */
339 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
340 
341 /*
342  * This flag is true while processes are stopped for a checkpoint/resume.
343  * Controlling processes of direct pm'd devices are not available to
344  * participate in power level changes, so we bypass them when this is set.
345  */
346 static int	pm_processes_stopped;
347 
348 #ifdef	DEBUG
349 
350 /*
351  * see common/sys/epm.h for PMD_* values
352  */
353 uint_t		pm_debug = 0;
354 
355 /*
356  * If pm_divertdebug is set, then no prom_printf calls will be made by
357  * PMD(), which will prevent debug output from bringing up the console
358  * frame buffer.  Clearing this variable before setting pm_debug will result
359  * in PMD output going to the console.
360  *
361  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
362  * deadlocks and decremented at the end of pm_set_power()
363  */
364 uint_t		pm_divertdebug = 1;
365 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
366 
367 void prdeps(char *);
368 #endif
369 
370 /* Globals */
371 
372 /*
373  * List of recorded thresholds and dependencies
374  */
375 pm_thresh_rec_t *pm_thresh_head;
376 krwlock_t pm_thresh_rwlock;
377 
378 pm_pdr_t *pm_dep_head;
379 static int pm_unresolved_deps = 0;
380 static int pm_prop_deps = 0;
381 
382 /*
383  * List of devices that exported no-involuntary-power-cycles property
384  */
385 pm_noinvol_t *pm_noinvol_head;
386 
387 /*
388  * Locks used in noinvol processing
389  */
390 krwlock_t pm_noinvol_rwlock;
391 kmutex_t pm_remdrv_lock;
392 
393 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
394 int pm_system_idle_threshold;
395 int pm_cpu_idle_threshold;
396 
397 /*
398  * By default nexus has 0 threshold, and depends on its children to keep it up
399  */
400 int pm_default_nexus_threshold = 0;
401 
402 /*
403  * Data structures shared with common/io/pm.c
404  */
405 kmutex_t	pm_clone_lock;
406 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
407 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
408 unsigned char	pm_interest[PM_MAX_CLONE];
409 struct pollhead	pm_pollhead;
410 
411 extern int	hz;
412 extern char	*platform_module_list[];
413 
414 /*
415  * Wrappers for use in ddi_walk_devs
416  */
417 
418 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
419 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
420 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
421 static int		pm_discard_dep_walk(dev_info_t *, void *);
422 #ifdef DEBUG
423 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
424 #endif
425 
426 /*
427  * Routines for managing noinvol devices
428  */
429 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
430 void			pm_noinvol_update_node(dev_info_t *,
431 			    pm_bp_noinvol_t *req);
432 
433 kmutex_t pm_rsvp_lock;
434 kmutex_t pm_compcnt_lock;
435 krwlock_t pm_pscc_direct_rwlock;
436 krwlock_t pm_pscc_interest_rwlock;
437 
438 #define	PSC_INTEREST	0	/* belongs to interest psc list */
439 #define	PSC_DIRECT	1	/* belongs to direct psc list */
440 
441 pscc_t *pm_pscc_interest;
442 pscc_t *pm_pscc_direct;
443 
444 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
445 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
446 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
447 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
448 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
449 
450 #define	PM_INCR_NOTLOWEST(dip) {					\
451 	mutex_enter(&pm_compcnt_lock);					\
452 	if (!PM_IS_NEXUS(dip) ||					\
453 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
454 		if (pm_comps_notlowest == 0)				\
455 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
456 		pm_comps_notlowest++;					\
457 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
458 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
459 	}								\
460 	mutex_exit(&pm_compcnt_lock);					\
461 }
462 #define	PM_DECR_NOTLOWEST(dip) {					\
463 	mutex_enter(&pm_compcnt_lock);					\
464 	if (!PM_IS_NEXUS(dip) ||					\
465 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
466 		ASSERT(pm_comps_notlowest);				\
467 		pm_comps_notlowest--;					\
468 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
469 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
470 		if (pm_comps_notlowest == 0)				\
471 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
472 	}								\
473 	mutex_exit(&pm_compcnt_lock);					\
474 }
475 
476 /*
477  * console frame-buffer power-management is not enabled when
478  * debugging services are present.  to override, set pm_cfb_override
479  * to non-zero.
480  */
481 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
482 kmutex_t pm_cfb_lock;
483 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
484 #ifdef DEBUG
485 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
486 #else
487 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
488 #endif
489 
490 static dev_info_t *cfb_dip = 0;
491 static dev_info_t *cfb_dip_detaching = 0;
492 uint_t cfb_inuse = 0;
493 static ddi_softintr_t pm_soft_id;
494 static clock_t pm_soft_pending;
495 int	pm_scans_disabled = 0;
496 
497 /*
498  * A structure to record the fact that one thread has borrowed a lock held
499  * by another thread.  The context requires that the lender block on the
500  * completion of the borrower.
501  */
502 typedef struct lock_loan {
503 	struct lock_loan	*pmlk_next;
504 	kthread_t		*pmlk_borrower;
505 	kthread_t		*pmlk_lender;
506 	dev_info_t		*pmlk_dip;
507 } lock_loan_t;
508 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
509 
510 #ifdef	DEBUG
511 #ifdef PMDDEBUG
512 #define	PMD_FUNC(func, name)	char *(func) = (name);
513 #else
514 #define	PMD_FUNC(func, name)
515 #endif
516 #else
517 #define	PMD_FUNC(func, name)
518 #endif
519 
520 
521 /*
522  * Must be called before first device (including pseudo) attach
523  */
524 void
525 pm_init_locks(void)
526 {
527 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
528 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
529 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
530 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
531 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
532 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
533 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
534 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
535 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
536 }
537 
538 static boolean_t
539 pm_cpr_callb(void *arg, int code)
540 {
541 	_NOTE(ARGUNUSED(arg))
542 	static int auto_save;
543 	static pm_cpupm_t cpupm_save;
544 	static int pm_reset_timestamps(dev_info_t *, void *);
545 
546 	switch (code) {
547 	case CB_CODE_CPR_CHKPT:
548 		/*
549 		 * Cancel scan or wait for scan in progress to finish
550 		 * Other threads may be trying to restart the scan, so we
551 		 * have to keep at it unil it sticks
552 		 */
553 		mutex_enter(&pm_scan_lock);
554 		ASSERT(!pm_scans_disabled);
555 		pm_scans_disabled = 1;
556 		auto_save = autopm_enabled;
557 		autopm_enabled = 0;
558 		cpupm_save = cpupm;
559 		cpupm = PM_CPUPM_NOTSET;
560 		mutex_exit(&pm_scan_lock);
561 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
562 		break;
563 
564 	case CB_CODE_CPR_RESUME:
565 		ASSERT(!autopm_enabled);
566 		ASSERT(cpupm == PM_CPUPM_NOTSET);
567 		ASSERT(pm_scans_disabled);
568 		pm_scans_disabled = 0;
569 		/*
570 		 * Call pm_reset_timestamps to reset timestamps of each
571 		 * device to the time when the system is resumed so that their
572 		 * idleness can be re-calculated. That's to avoid devices from
573 		 * being powered down right after resume if the system was in
574 		 * suspended mode long enough.
575 		 */
576 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
577 
578 		autopm_enabled = auto_save;
579 		cpupm = cpupm_save;
580 		/*
581 		 * If there is any auto-pm device, get the scanning
582 		 * going. Otherwise don't bother.
583 		 */
584 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
585 		break;
586 	}
587 	return (B_TRUE);
588 }
589 
590 /*
591  * This callback routine is called when there is a system panic.  This function
592  * exists for prototype matching.
593  */
594 static boolean_t
595 pm_panic_callb(void *arg, int code)
596 {
597 	_NOTE(ARGUNUSED(arg, code))
598 	void pm_cfb_check_and_powerup(void);
599 	PMD(PMD_CFB, ("pm_panic_callb\n"))
600 	pm_cfb_check_and_powerup();
601 	return (B_TRUE);
602 }
603 
604 static boolean_t
605 pm_halt_callb(void *arg, int code)
606 {
607 	_NOTE(ARGUNUSED(arg, code))
608 	return (B_TRUE);	/* XXX for now */
609 }
610 
611 /*
612  * This needs to be called after the root and platform drivers are loaded
613  * and be single-threaded with respect to driver attach/detach
614  */
615 void
616 pm_init(void)
617 {
618 	PMD_FUNC(pmf, "pm_init")
619 	char **mod;
620 	extern pri_t minclsyspri;
621 	static void pm_dep_thread(void);
622 
623 	pm_comps_notlowest = 0;
624 	pm_system_idle_threshold = pm_default_idle_threshold;
625 	pm_cpu_idle_threshold = 0;
626 
627 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
628 	    CB_CL_CPR_PM, "pm_cpr");
629 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
630 		    CB_CL_PANIC, "pm_panic");
631 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
632 		    CB_CL_HALT, "pm_halt");
633 
634 	/*
635 	 * Create a thread to do dependency processing.
636 	 */
637 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
638 	    TS_RUN, minclsyspri);
639 
640 	/*
641 	 * loadrootmodules already loaded these ppm drivers, now get them
642 	 * attached so they can claim the root drivers as they attach
643 	 */
644 	for (mod = platform_module_list; *mod; mod++) {
645 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
646 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
647 			    *mod);
648 		} else {
649 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
650 			    ddi_major_to_name(ddi_name_to_major(*mod))))
651 		}
652 	}
653 }
654 
655 /*
656  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
657  * enabled) when device becomes power managed or after a failed detach and
658  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
659  * a CPR resume to get all the devices scanning again.
660  */
661 void
662 pm_scan_init(dev_info_t *dip)
663 {
664 	PMD_FUNC(pmf, "scan_init")
665 	pm_scan_t	*scanp;
666 
667 	ASSERT(!PM_ISBC(dip));
668 
669 	PM_LOCK_DIP(dip);
670 	scanp = PM_GET_PM_SCAN(dip);
671 	if (!scanp) {
672 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
673 		    pmf, PM_DEVICE(dip)))
674 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
675 		DEVI(dip)->devi_pm_scan = scanp;
676 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
677 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
678 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
679 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
680 	}
681 	PM_UNLOCK_DIP(dip);
682 }
683 
684 /*
685  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
686  */
687 void
688 pm_scan_fini(dev_info_t *dip)
689 {
690 	PMD_FUNC(pmf, "scan_fini")
691 	pm_scan_t	*scanp;
692 
693 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
694 	ASSERT(!PM_ISBC(dip));
695 	PM_LOCK_DIP(dip);
696 	scanp = PM_GET_PM_SCAN(dip);
697 	if (!scanp) {
698 		PM_UNLOCK_DIP(dip);
699 		return;
700 	}
701 
702 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
703 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
704 
705 	kmem_free(scanp, sizeof (pm_scan_t));
706 	DEVI(dip)->devi_pm_scan = NULL;
707 	PM_UNLOCK_DIP(dip);
708 }
709 
710 /*
711  * Given a pointer to a component struct, return the current power level
712  * (struct contains index unless it is a continuous level).
713  * Located here in hopes of getting both this and dev_is_needed into the
714  * cache together
715  */
716 static int
717 cur_power(pm_component_t *cp)
718 {
719 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
720 		return (cp->pmc_cur_pwr);
721 
722 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
723 }
724 
725 static char *
726 pm_decode_direction(int direction)
727 {
728 	switch (direction) {
729 	case PM_LEVEL_UPONLY:
730 		return ("up");
731 
732 	case PM_LEVEL_EXACT:
733 		return ("exact");
734 
735 	case PM_LEVEL_DOWNONLY:
736 		return ("down");
737 
738 	default:
739 		return ("INVALID DIRECTION");
740 	}
741 }
742 
743 char *
744 pm_decode_op(pm_bus_power_op_t op)
745 {
746 	switch (op) {
747 	case BUS_POWER_CHILD_PWRCHG:
748 		return ("CHILD_PWRCHG");
749 	case BUS_POWER_NEXUS_PWRUP:
750 		return ("NEXUS_PWRUP");
751 	case BUS_POWER_PRE_NOTIFICATION:
752 		return ("PRE_NOTIFICATION");
753 	case BUS_POWER_POST_NOTIFICATION:
754 		return ("POST_NOTIFICATION");
755 	case BUS_POWER_HAS_CHANGED:
756 		return ("HAS_CHANGED");
757 	case BUS_POWER_NOINVOL:
758 		return ("NOINVOL");
759 	default:
760 		return ("UNKNOWN OP");
761 	}
762 }
763 
764 /*
765  * Returns true if level is a possible (valid) power level for component
766  */
767 int
768 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
769 {
770 	PMD_FUNC(pmf, "e_pm_valid_power")
771 	pm_component_t *cp = PM_CP(dip, cmpt);
772 	int i;
773 	int *ip = cp->pmc_comp.pmc_lvals;
774 	int limit = cp->pmc_comp.pmc_numlevels;
775 
776 	if (level < 0)
777 		return (0);
778 	for (i = 0; i < limit; i++) {
779 		if (level == *ip++)
780 			return (1);
781 	}
782 #ifdef DEBUG
783 	if (pm_debug & PMD_FAIL) {
784 		ip = cp->pmc_comp.pmc_lvals;
785 
786 		for (i = 0; i < limit; i++)
787 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
788 			    pmf, i, *ip++))
789 	}
790 #endif
791 	return (0);
792 }
793 
794 /*
795  * Returns true if device is pm'd (after calling pm_start if need be)
796  */
797 int
798 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
799 {
800 	pm_info_t *info;
801 	static int pm_start(dev_info_t *dip);
802 
803 	/*
804 	 * Check if the device is power managed if not.
805 	 * To make the common case (device is power managed already)
806 	 * fast, we check without the lock.  If device is not already
807 	 * power managed, then we take the lock and the long route through
808 	 * go get it managed.  Devices never go unmanaged until they
809 	 * detach.
810 	 */
811 	info = PM_GET_PM_INFO(dip);
812 	if (!info) {
813 		if (!DEVI_IS_ATTACHING(dip)) {
814 			return (0);
815 		}
816 		if (pm_start(dip) != DDI_SUCCESS) {
817 			return (0);
818 		}
819 		info = PM_GET_PM_INFO(dip);
820 	}
821 	ASSERT(info);
822 	if (infop != NULL)
823 		*infop = info;
824 	return (1);
825 }
826 
827 int
828 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
829 {
830 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
831 		if (cpp != NULL)
832 			*cpp = PM_CP(dip, cmpt);
833 		return (1);
834 	} else {
835 		return (0);
836 	}
837 }
838 
839 /*
840  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
841  */
842 static int
843 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
844 {
845 	PMD_FUNC(pmf, "din")
846 	pm_component_t *cp;
847 	char *pathbuf;
848 	int result;
849 
850 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
851 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
852 	    !e_pm_valid_power(dip, cmpt, level))
853 		return (DDI_FAILURE);
854 
855 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
856 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
857 	    level, cur_power(cp)))
858 
859 	if (pm_set_power(dip, cmpt, level,  direction,
860 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
861 		if (direction == PM_LEVEL_UPONLY) {
862 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
863 			(void) ddi_pathname(dip, pathbuf);
864 			cmn_err(CE_WARN, "Device %s failed to power up.",
865 			    pathbuf);
866 			kmem_free(pathbuf, MAXPATHLEN);
867 		}
868 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
869 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
870 		    pm_decode_direction(direction), level, result))
871 		return (DDI_FAILURE);
872 	}
873 
874 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
875 	    PM_DEVICE(dip)))
876 	pm_rescan(dip);
877 	return (DDI_SUCCESS);
878 }
879 
880 /*
881  * We can get multiple pm_rescan() threads, if one of them discovers
882  * that no scan is running at the moment, it kicks it into action.
883  * Otherwise, it tells the current scanning thread to scan again when
884  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
885  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
886  * thread at a time runs the pm_scan_dev() code.
887  */
888 void
889 pm_rescan(void *arg)
890 {
891 	PMD_FUNC(pmf, "rescan")
892 	dev_info_t	*dip = (dev_info_t *)arg;
893 	pm_info_t	*info;
894 	pm_scan_t	*scanp;
895 	timeout_id_t	scanid;
896 
897 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
898 	PM_LOCK_DIP(dip);
899 	info = PM_GET_PM_INFO(dip);
900 	scanp = PM_GET_PM_SCAN(dip);
901 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
902 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
903 		PM_UNLOCK_DIP(dip);
904 		return;
905 	}
906 	if (scanp->ps_scan_flags & PM_SCANNING) {
907 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
908 		PM_UNLOCK_DIP(dip);
909 		return;
910 	} else if (scanp->ps_scan_id) {
911 		scanid = scanp->ps_scan_id;
912 		scanp->ps_scan_id = 0;
913 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
914 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
915 		PM_UNLOCK_DIP(dip);
916 		(void) untimeout(scanid);
917 		PM_LOCK_DIP(dip);
918 	}
919 
920 	/*
921 	 * Dispatching pm_scan during attach time is risky due to the fact that
922 	 * attach might soon fail and dip dissolved, and panic may happen while
923 	 * attempting to stop scan. So schedule a pm_rescan instead.
924 	 * (Note that if either of the first two terms are true, taskq_dispatch
925 	 * will not be invoked).
926 	 *
927 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
928 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
929 	 * to regulate the dispatching.
930 	 *
931 	 * Scan is stopped before the device is detached (in pm_detaching())
932 	 * but it may get re-started during the post_detach processing if the
933 	 * driver fails to detach.
934 	 */
935 	if (DEVI_IS_ATTACHING(dip) ||
936 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
937 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
938 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
939 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
940 		if (scanp->ps_scan_id) {
941 			scanid = scanp->ps_scan_id;
942 			scanp->ps_scan_id = 0;
943 			PM_UNLOCK_DIP(dip);
944 			(void) untimeout(scanid);
945 			PM_LOCK_DIP(dip);
946 			if (scanp->ps_scan_id) {
947 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
948 				    "thread scheduled pm_rescan, scanid %lx\n",
949 				    pmf, PM_DEVICE(dip),
950 				    (ulong_t)scanp->ps_scan_id))
951 				PM_UNLOCK_DIP(dip);
952 				return;
953 			}
954 		}
955 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
956 		    (scanp->ps_idle_down ? pm_id_ticks :
957 		    (pm_min_scan * hz)));
958 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
959 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
960 		    (ulong_t)scanp->ps_scan_id))
961 	} else {
962 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
963 		    pmf, PM_DEVICE(dip)))
964 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
965 	}
966 	PM_UNLOCK_DIP(dip);
967 }
968 
969 void
970 pm_scan(void *arg)
971 {
972 	PMD_FUNC(pmf, "scan")
973 	dev_info_t	*dip = (dev_info_t *)arg;
974 	pm_scan_t	*scanp;
975 	time_t		nextscan;
976 
977 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
978 
979 	PM_LOCK_DIP(dip);
980 	scanp = PM_GET_PM_SCAN(dip);
981 	ASSERT(scanp && PM_GET_PM_INFO(dip));
982 
983 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
984 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
985 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
986 		PM_UNLOCK_DIP(dip);
987 		return;
988 	}
989 
990 	if (scanp->ps_idle_down) {
991 		/*
992 		 * make sure we remember idledown was in affect until
993 		 * we've completed the scan
994 		 */
995 		PMID_SET_SCANS(scanp->ps_idle_down)
996 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
997 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
998 	}
999 
1000 	/* possible having two threads running pm_scan() */
1001 	if (scanp->ps_scan_flags & PM_SCANNING) {
1002 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1003 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1004 		    pmf, PM_DEVICE(dip)))
1005 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1006 		PM_UNLOCK_DIP(dip);
1007 		return;
1008 	}
1009 
1010 	scanp->ps_scan_flags |= PM_SCANNING;
1011 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1012 	do {
1013 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1014 		PM_UNLOCK_DIP(dip);
1015 		nextscan = pm_scan_dev(dip);
1016 		PM_LOCK_DIP(dip);
1017 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1018 
1019 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1020 	scanp->ps_scan_flags &= ~PM_SCANNING;
1021 
1022 	if (scanp->ps_idle_down) {
1023 		scanp->ps_idle_down &= ~PMID_SCANS;
1024 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1025 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1026 	}
1027 
1028 	/* schedule for next idle check */
1029 	if (nextscan != LONG_MAX) {
1030 		if (nextscan > (LONG_MAX / hz))
1031 			nextscan = (LONG_MAX - 1) / hz;
1032 		if (scanp->ps_scan_id) {
1033 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1034 			    "another rescan scheduled scanid(%lx)\n", pmf,
1035 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1036 			PM_UNLOCK_DIP(dip);
1037 			return;
1038 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1039 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1040 			    (clock_t)(nextscan * hz));
1041 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1042 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1043 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1044 		}
1045 	}
1046 	PM_UNLOCK_DIP(dip);
1047 }
1048 
1049 void
1050 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1051 {
1052 	int components = PM_NUMCMPTS(dip);
1053 	int i;
1054 
1055 	ASSERT(components > 0);
1056 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1057 	for (i = 0; i < components; i++) {
1058 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1059 	}
1060 	PM_UNLOCK_BUSY(dip);
1061 }
1062 
1063 /*
1064  * Returns true if device needs to be kept up because it exported the
1065  * "no-involuntary-power-cycles" property or we're pretending it did (console
1066  * fb case) or it is an ancestor of such a device and has used up the "one
1067  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1068  * upon detach
1069  */
1070 int
1071 pm_noinvol(dev_info_t *dip)
1072 {
1073 	PMD_FUNC(pmf, "noinvol")
1074 
1075 	/*
1076 	 * This doesn't change over the life of a driver, so no locking needed
1077 	 */
1078 	if (PM_IS_CFB(dip)) {
1079 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1080 		    pmf, PM_DEVICE(dip)))
1081 		return (1);
1082 	}
1083 	/*
1084 	 * Not an issue if no such kids
1085 	 */
1086 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1087 #ifdef DEBUG
1088 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1089 			dev_info_t *pdip = dip;
1090 			do {
1091 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1092 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1093 				    DEVI(pdip)->devi_pm_noinvolpm,
1094 				    DEVI(pdip)->devi_pm_volpmd))
1095 				pdip = ddi_get_parent(pdip);
1096 			} while (pdip);
1097 		}
1098 #endif
1099 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1100 		return (0);
1101 	}
1102 
1103 	/*
1104 	 * Since we now maintain the counts correct at every node, we no longer
1105 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1106 	 * without the children getting their counts adjusted.
1107 	 */
1108 
1109 #ifdef	DEBUG
1110 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1111 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1112 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1113 		    PM_DEVICE(dip)))
1114 #endif
1115 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1116 }
1117 
1118 /*
1119  * This function performs the actual scanning of the device.
1120  * It attempts to power off the indicated device's components if they have
1121  * been idle and other restrictions are met.
1122  * pm_scan_dev calculates and returns when the next scan should happen for
1123  * this device.
1124  */
1125 time_t
1126 pm_scan_dev(dev_info_t *dip)
1127 {
1128 	PMD_FUNC(pmf, "scan_dev")
1129 	pm_scan_t	*scanp;
1130 	time_t		*timestamp, idletime, now, thresh;
1131 	time_t		timeleft = 0;
1132 #ifdef PMDDEBUG
1133 	int		curpwr;
1134 #endif
1135 	int		i, nxtpwr, pwrndx, unused;
1136 	size_t		size;
1137 	pm_component_t	 *cp;
1138 	dev_info_t	*pdip = ddi_get_parent(dip);
1139 	int		circ;
1140 	static int	cur_threshold(dev_info_t *, int);
1141 	static int	pm_next_lower_power(pm_component_t *, int);
1142 
1143 	/*
1144 	 * skip attaching device
1145 	 */
1146 	if (DEVI_IS_ATTACHING(dip)) {
1147 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1148 		    pmf, PM_DEVICE(dip), pm_min_scan))
1149 		return (pm_min_scan);
1150 	}
1151 
1152 	PM_LOCK_DIP(dip);
1153 	scanp = PM_GET_PM_SCAN(dip);
1154 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1155 
1156 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1157 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1158 	    PM_KUC(dip)))
1159 
1160 	/* no scan under the following conditions */
1161 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1162 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1163 	    (PM_KUC(dip) != 0) ||
1164 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1165 		PM_UNLOCK_DIP(dip);
1166 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1167 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1168 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1169 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1170 		    cpupm, PM_KUC(dip),
1171 		    PM_ISDIRECT(dip) ? "is" : "is not",
1172 		    pm_noinvol(dip) ? "is" : "is not"))
1173 		return (LONG_MAX);
1174 	}
1175 	PM_UNLOCK_DIP(dip);
1176 
1177 	if (!ndi_devi_tryenter(pdip, &circ)) {
1178 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1179 		    pmf, PM_DEVICE(pdip)))
1180 		return ((time_t)1);
1181 	}
1182 	now = gethrestime_sec();
1183 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1184 	timestamp = kmem_alloc(size, KM_SLEEP);
1185 	pm_get_timestamps(dip, timestamp);
1186 
1187 	/*
1188 	 * Since we removed support for backwards compatible devices,
1189 	 * (see big comment at top of file)
1190 	 * it is no longer required to deal with component 0 last.
1191 	 */
1192 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1193 		/*
1194 		 * If already off (an optimization, perhaps)
1195 		 */
1196 		cp = PM_CP(dip, i);
1197 		pwrndx = cp->pmc_cur_pwr;
1198 #ifdef PMDDEBUG
1199 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1200 		    PM_LEVEL_UNKNOWN :
1201 		    cp->pmc_comp.pmc_lvals[pwrndx];
1202 #endif
1203 
1204 		if (pwrndx == 0) {
1205 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1206 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1207 			/* skip device if off or at its lowest */
1208 			continue;
1209 		}
1210 
1211 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1212 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1213 			/* were busy or newly became busy by another thread */
1214 			if (timeleft == 0)
1215 				timeleft = max(thresh, pm_min_scan);
1216 			else
1217 				timeleft = min(
1218 				    timeleft, max(thresh, pm_min_scan));
1219 			continue;
1220 		}
1221 
1222 		idletime = now - timestamp[i];		/* idle time */
1223 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1224 		    pmf, PM_DEVICE(dip), i, idletime))
1225 		if (idletime >= thresh || PM_IS_PID(dip)) {
1226 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1227 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1228 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1229 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1230 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1231 			    PM_CURPOWER(dip, i) != nxtpwr) {
1232 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1233 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1234 				    i, curpwr, nxtpwr))
1235 				timeleft = pm_min_scan;
1236 				continue;
1237 			} else {
1238 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1239 				    "%d->%d, GOOD curpwr %d\n", pmf,
1240 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1241 				    cur_power(cp)))
1242 
1243 				if (nxtpwr == 0)	/* component went off */
1244 					continue;
1245 
1246 				/*
1247 				 * scan to next lower level
1248 				 */
1249 				if (timeleft == 0)
1250 					timeleft = max(
1251 					    1, cur_threshold(dip, i));
1252 				else
1253 					timeleft = min(timeleft,
1254 					    max(1, cur_threshold(dip, i)));
1255 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1256 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1257 				    i, timeleft))
1258 			}
1259 		} else {	/* comp not idle long enough */
1260 			if (timeleft == 0)
1261 				timeleft = thresh - idletime;
1262 			else
1263 				timeleft = min(timeleft, (thresh - idletime));
1264 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1265 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1266 		}
1267 	}
1268 	ndi_devi_exit(pdip, circ);
1269 	kmem_free(timestamp, size);
1270 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1271 	    PM_DEVICE(dip), timeleft))
1272 
1273 	/*
1274 	 * if components are already at lowest level, timeleft is left 0
1275 	 */
1276 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1277 }
1278 
1279 /*
1280  * pm_scan_stop - cancel scheduled pm_rescan,
1281  *                wait for termination of dispatched pm_scan thread
1282  *                     and active pm_scan_dev thread.
1283  */
1284 void
1285 pm_scan_stop(dev_info_t *dip)
1286 {
1287 	PMD_FUNC(pmf, "scan_stop")
1288 	pm_scan_t	*scanp;
1289 	timeout_id_t	scanid;
1290 
1291 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1292 	PM_LOCK_DIP(dip);
1293 	scanp = PM_GET_PM_SCAN(dip);
1294 	if (!scanp) {
1295 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1296 		    pmf, PM_DEVICE(dip)))
1297 		PM_UNLOCK_DIP(dip);
1298 		return;
1299 	}
1300 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1301 
1302 	/* cancel scheduled scan taskq */
1303 	while (scanp->ps_scan_id) {
1304 		scanid = scanp->ps_scan_id;
1305 		scanp->ps_scan_id = 0;
1306 		PM_UNLOCK_DIP(dip);
1307 		(void) untimeout(scanid);
1308 		PM_LOCK_DIP(dip);
1309 	}
1310 
1311 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1312 		PM_UNLOCK_DIP(dip);
1313 		delay(1);
1314 		PM_LOCK_DIP(dip);
1315 	}
1316 	PM_UNLOCK_DIP(dip);
1317 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1318 }
1319 
1320 int
1321 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1322 {
1323 	_NOTE(ARGUNUSED(arg))
1324 
1325 	if (!PM_GET_PM_SCAN(dip))
1326 		return (DDI_WALK_CONTINUE);
1327 	ASSERT(!PM_ISBC(dip));
1328 	pm_scan_stop(dip);
1329 	return (DDI_WALK_CONTINUE);
1330 }
1331 
1332 /*
1333  * Converts a power level value to its index
1334  */
1335 static int
1336 power_val_to_index(pm_component_t *cp, int val)
1337 {
1338 	int limit, i, *ip;
1339 
1340 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1341 	    val != PM_LEVEL_EXACT);
1342 	/*  convert power value into index (i) */
1343 	limit = cp->pmc_comp.pmc_numlevels;
1344 	ip = cp->pmc_comp.pmc_lvals;
1345 	for (i = 0; i < limit; i++)
1346 		if (val == *ip++)
1347 			return (i);
1348 	return (-1);
1349 }
1350 
1351 /*
1352  * Converts a numeric power level to a printable string
1353  */
1354 static char *
1355 power_val_to_string(pm_component_t *cp, int val)
1356 {
1357 	int index;
1358 
1359 	if (val == PM_LEVEL_UPONLY)
1360 		return ("<UPONLY>");
1361 
1362 	if (val == PM_LEVEL_UNKNOWN ||
1363 	    (index = power_val_to_index(cp, val)) == -1)
1364 		return ("<LEVEL_UNKNOWN>");
1365 
1366 	return (cp->pmc_comp.pmc_lnames[index]);
1367 }
1368 
1369 /*
1370  * Return true if this node has been claimed by a ppm.
1371  */
1372 static int
1373 pm_ppm_claimed(dev_info_t *dip)
1374 {
1375 	return (PPM(dip) != NULL);
1376 }
1377 
1378 /*
1379  * A node which was voluntarily power managed has just used up its "free cycle"
1380  * and need is volpmd field cleared, and the same done to all its descendents
1381  */
1382 static void
1383 pm_clear_volpm_dip(dev_info_t *dip)
1384 {
1385 	PMD_FUNC(pmf, "clear_volpm_dip")
1386 
1387 	if (dip == NULL)
1388 		return;
1389 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1390 	    PM_DEVICE(dip)))
1391 	DEVI(dip)->devi_pm_volpmd = 0;
1392 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1393 		pm_clear_volpm_dip(dip);
1394 	}
1395 }
1396 
1397 /*
1398  * A node which was voluntarily power managed has used up the "free cycles"
1399  * for the subtree that it is the root of.  Scan through the list of detached
1400  * nodes and adjust the counts of any that are descendents of the node.
1401  */
1402 static void
1403 pm_clear_volpm_list(dev_info_t *dip)
1404 {
1405 	PMD_FUNC(pmf, "clear_volpm_list")
1406 	char	*pathbuf;
1407 	size_t	len;
1408 	pm_noinvol_t *ip;
1409 
1410 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1411 	(void) ddi_pathname(dip, pathbuf);
1412 	len = strlen(pathbuf);
1413 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1414 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1415 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1416 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1417 		    ip->ni_path))
1418 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1419 		    ip->ni_path[len] == '/') {
1420 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1421 			    ip->ni_path))
1422 			ip->ni_volpmd = 0;
1423 			ip->ni_wasvolpmd = 0;
1424 		}
1425 	}
1426 	kmem_free(pathbuf, MAXPATHLEN);
1427 	rw_exit(&pm_noinvol_rwlock);
1428 }
1429 
1430 /*
1431  * Powers a device, suspending or resuming the driver if it is a backward
1432  * compatible device, calling into ppm to change power level.
1433  * Called with the component's power lock held.
1434  */
1435 static int
1436 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1437     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1438 {
1439 	PMD_FUNC(pmf, "power_dev")
1440 	power_req_t power_req;
1441 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1442 	int		resume_needed = 0;
1443 	int		suspended = 0;
1444 	int		result;
1445 #ifdef PMDDEBUG
1446 	struct pm_component *cp = PM_CP(dip, comp);
1447 #endif
1448 	int		bc = PM_ISBC(dip);
1449 	int pm_all_components_off(dev_info_t *);
1450 	int		clearvolpmd = 0;
1451 	char		pathbuf[MAXNAMELEN];
1452 #ifdef PMDDEBUG
1453 	char *ppmname, *ppmaddr;
1454 #endif
1455 	/*
1456 	 * If this is comp 0 of a backwards compat device and we are
1457 	 * going to take the power away, we need to detach it with
1458 	 * DDI_PM_SUSPEND command.
1459 	 */
1460 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1461 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1462 			/* We could not suspend before turning cmpt zero off */
1463 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1464 			    pmf, PM_DEVICE(dip)))
1465 			return (DDI_FAILURE);
1466 		} else {
1467 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1468 			suspended++;
1469 		}
1470 	}
1471 	power_req.request_type = PMR_PPM_SET_POWER;
1472 	power_req.req.ppm_set_power_req.who = dip;
1473 	power_req.req.ppm_set_power_req.cmpt = comp;
1474 	power_req.req.ppm_set_power_req.old_level = old_level;
1475 	power_req.req.ppm_set_power_req.new_level = level;
1476 	power_req.req.ppm_set_power_req.canblock = canblock;
1477 	power_req.req.ppm_set_power_req.cookie = NULL;
1478 #ifdef PMDDEBUG
1479 	if (pm_ppm_claimed(dip)) {
1480 		ppmname = PM_NAME(PPM(dip));
1481 		ppmaddr = PM_ADDR(PPM(dip));
1482 
1483 	} else {
1484 		ppmname = "noppm";
1485 		ppmaddr = "0";
1486 	}
1487 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1488 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1489 	    power_val_to_string(cp, old_level), old_level,
1490 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1491 #endif
1492 	/*
1493 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1494 	 * bc device comp 0 is powering on, then we count it as a power cycle
1495 	 * against its voluntary count.
1496 	 */
1497 	if (DEVI(dip)->devi_pm_volpmd &&
1498 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1499 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1500 		clearvolpmd = 1;
1501 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1502 	    &power_req, &result)) == DDI_SUCCESS) {
1503 		/*
1504 		 * Now do involuntary pm accounting;  If we've just cycled power
1505 		 * on a voluntarily pm'd node, and by inference on its entire
1506 		 * subtree, we need to set the subtree (including those nodes
1507 		 * already detached) volpmd counts to 0, and subtract out the
1508 		 * value of the current node's volpmd count from the ancestors
1509 		 */
1510 		if (clearvolpmd) {
1511 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1512 			pm_clear_volpm_dip(dip);
1513 			pm_clear_volpm_list(dip);
1514 			if (volpmd) {
1515 				(void) ddi_pathname(dip, pathbuf);
1516 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1517 				    volpmd, 0, pathbuf, dip);
1518 			}
1519 		}
1520 	} else {
1521 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1522 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1523 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1524 	}
1525 	/*
1526 	 * If some other devices were also powered up (e.g. other cpus in
1527 	 * the same domain) return a pointer to that list
1528 	 */
1529 	if (devlist) {
1530 		*devlist = (pm_ppm_devlist_t *)
1531 		    power_req.req.ppm_set_power_req.cookie;
1532 	}
1533 	/*
1534 	 * We will have to resume the device if the device is backwards compat
1535 	 * device and either of the following is true:
1536 	 * -This is comp 0 and we have successfully powered it up
1537 	 * -This is comp 0 and we have failed to power it down. Resume is
1538 	 *  needed because we have suspended it above
1539 	 */
1540 
1541 	if (bc && comp == 0) {
1542 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1543 		if (power_op_ret == DDI_SUCCESS) {
1544 			if (POWERING_ON(old_level, level)) {
1545 				/*
1546 				 * It must be either suspended or resumed
1547 				 * via pm_power_has_changed path
1548 				 */
1549 				ASSERT((DEVI(dip)->devi_pm_flags &
1550 				    PMC_SUSPENDED) ||
1551 				    (PM_CP(dip, comp)->pmc_flags &
1552 				    PM_PHC_WHILE_SET_POWER));
1553 
1554 					resume_needed = suspended;
1555 			}
1556 		} else {
1557 			if (POWERING_OFF(old_level, level)) {
1558 				/*
1559 				 * It must be either suspended or resumed
1560 				 * via pm_power_has_changed path
1561 				 */
1562 				ASSERT((DEVI(dip)->devi_pm_flags &
1563 				    PMC_SUSPENDED) ||
1564 				    (PM_CP(dip, comp)->pmc_flags &
1565 				    PM_PHC_WHILE_SET_POWER));
1566 
1567 					resume_needed = suspended;
1568 			}
1569 		}
1570 	}
1571 	if (resume_needed) {
1572 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1573 		/* ppm is not interested in DDI_PM_RESUME */
1574 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1575 		    DDI_SUCCESS) {
1576 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1577 		} else
1578 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1579 			    PM_DEVICE(dip));
1580 	}
1581 	return (power_op_ret);
1582 }
1583 
1584 /*
1585  * Return true if we are the owner or a borrower of the devi lock.  See
1586  * pm_lock_power_single() about borrowing the lock.
1587  */
1588 static int
1589 pm_devi_lock_held(dev_info_t *dip)
1590 {
1591 	lock_loan_t *cur;
1592 
1593 	if (DEVI_BUSY_OWNED(dip))
1594 	    return (1);
1595 
1596 	/* return false if no locks borrowed */
1597 	if (lock_loan_head.pmlk_next == NULL)
1598 		return (0);
1599 
1600 	mutex_enter(&pm_loan_lock);
1601 	/* see if our thread is registered as a lock borrower. */
1602 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1603 		if (cur->pmlk_borrower == curthread)
1604 			break;
1605 	mutex_exit(&pm_loan_lock);
1606 
1607 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1608 }
1609 
1610 /*
1611  * pm_set_power: adjusts power level of device.	 Assumes device is power
1612  * manageable & component exists.
1613  *
1614  * Cases which require us to bring up devices we keep up ("wekeepups") for
1615  * backwards compatible devices:
1616  *	component 0 is off and we're bringing it up from 0
1617  *		bring up wekeepup first
1618  *	and recursively when component 0 is off and we bring some other
1619  *	component up from 0
1620  * For devices which are not backward compatible, our dependency notion is much
1621  * simpler.  Unless all components are off, then wekeeps must be on.
1622  * We don't treat component 0 differently.
1623  * Canblock tells how to deal with a direct pm'd device.
1624  * Scan arg tells us if we were called from scan, in which case we don't need
1625  * to go back to the root node and walk down to change power.
1626  */
1627 int
1628 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1629     pm_canblock_t canblock, int scan, int *retp)
1630 {
1631 	PMD_FUNC(pmf, "set_power")
1632 	char		*pathbuf;
1633 	pm_bp_child_pwrchg_t bpc;
1634 	pm_sp_misc_t	pspm;
1635 	int		ret = DDI_SUCCESS;
1636 	int		unused = DDI_SUCCESS;
1637 	dev_info_t	*pdip = ddi_get_parent(dip);
1638 
1639 #ifdef DEBUG
1640 	int		diverted = 0;
1641 
1642 	/*
1643 	 * This prevents operations on the console from calling prom_printf and
1644 	 * either deadlocking or bringing up the console because of debug
1645 	 * output
1646 	 */
1647 	if (dip == cfb_dip) {
1648 		diverted++;
1649 		mutex_enter(&pm_debug_lock);
1650 		pm_divertdebug++;
1651 		mutex_exit(&pm_debug_lock);
1652 	}
1653 #endif
1654 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1655 	    direction == PM_LEVEL_EXACT);
1656 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1657 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1658 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1659 	(void) ddi_pathname(dip, pathbuf);
1660 	bpc.bpc_dip = dip;
1661 	bpc.bpc_path = pathbuf;
1662 	bpc.bpc_comp = comp;
1663 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1664 	bpc.bpc_nlevel = level;
1665 	pspm.pspm_direction = direction;
1666 	pspm.pspm_errnop = retp;
1667 	pspm.pspm_canblock = canblock;
1668 	pspm.pspm_scan = scan;
1669 	bpc.bpc_private = &pspm;
1670 
1671 	/*
1672 	 * If a config operation is being done (we've locked the parent) or
1673 	 * we already hold the power lock (we've locked the node)
1674 	 * then we can operate directly on the node because we have already
1675 	 * brought up all the ancestors, otherwise, we have to go back to the
1676 	 * top of the tree.
1677 	 */
1678 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1679 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1680 		    (void *)&bpc, (void *)&unused);
1681 	else
1682 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1683 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1684 #ifdef DEBUG
1685 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1686 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1687 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1688 	}
1689 	if (diverted) {
1690 		mutex_enter(&pm_debug_lock);
1691 		pm_divertdebug--;
1692 		mutex_exit(&pm_debug_lock);
1693 	}
1694 #endif
1695 	kmem_free(pathbuf, MAXPATHLEN);
1696 	return (ret);
1697 }
1698 
1699 /*
1700  * If holddip is set, then if a dip is found we return with the node held.
1701  *
1702  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1703  * (resolve_pathname), but it does not drive attach.
1704  */
1705 dev_info_t *
1706 pm_name_to_dip(char *pathname, int holddip)
1707 {
1708 	struct pathname pn;
1709 	char		*component;
1710 	dev_info_t	*parent, *child;
1711 	int		circ;
1712 
1713 	if ((pathname == NULL) || (*pathname != '/'))
1714 		return (NULL);
1715 
1716 	/* setup pathname and allocate component */
1717 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1718 		return (NULL);
1719 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1720 
1721 	/* start at top, process '/' component */
1722 	parent = child = ddi_root_node();
1723 	ndi_hold_devi(parent);
1724 	pn_skipslash(&pn);
1725 	ASSERT(i_ddi_devi_attached(parent));
1726 
1727 	/* process components of pathname */
1728 	while (pn_pathleft(&pn)) {
1729 		(void) pn_getcomponent(&pn, component);
1730 
1731 		/* enter parent and search for component child */
1732 		ndi_devi_enter(parent, &circ);
1733 		child = ndi_devi_findchild(parent, component);
1734 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1735 			child = NULL;
1736 			ndi_devi_exit(parent, circ);
1737 			ndi_rele_devi(parent);
1738 			goto out;
1739 		}
1740 
1741 		/* attached child found, hold child and release parent */
1742 		ndi_hold_devi(child);
1743 		ndi_devi_exit(parent, circ);
1744 		ndi_rele_devi(parent);
1745 
1746 		/* child becomes parent, and process next component */
1747 		parent = child;
1748 		pn_skipslash(&pn);
1749 
1750 		/* loop with active ndi_devi_hold of child->parent */
1751 	}
1752 
1753 out:
1754 	pn_free(&pn);
1755 	kmem_free(component, MAXNAMELEN);
1756 
1757 	/* if we are not asked to return with hold, drop current hold */
1758 	if (child && !holddip)
1759 		ndi_rele_devi(child);
1760 	return (child);
1761 }
1762 
1763 /*
1764  * Search for a dependency and mark it unsatisfied
1765  */
1766 static void
1767 pm_unsatisfy(char *keeper, char *kept)
1768 {
1769 	PMD_FUNC(pmf, "unsatisfy")
1770 	pm_pdr_t *dp;
1771 
1772 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1773 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1774 		if (!dp->pdr_isprop) {
1775 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1776 			    (dp->pdr_kept_count > 0) &&
1777 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1778 				if (dp->pdr_satisfied) {
1779 					dp->pdr_satisfied = 0;
1780 					pm_unresolved_deps++;
1781 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1782 					    "pm_unresolved_deps now %d\n", pmf,
1783 					    pm_unresolved_deps))
1784 				}
1785 			}
1786 		}
1787 	}
1788 }
1789 
1790 /*
1791  * Device dip is being un power managed, it keeps up count other devices.
1792  * We need to release any hold we have on the kept devices, and also
1793  * mark the dependency no longer satisfied.
1794  */
1795 static void
1796 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1797 {
1798 	PMD_FUNC(pmf, "unkeeps")
1799 	int i, j;
1800 	dev_info_t *kept;
1801 	dev_info_t *dip;
1802 	struct pm_component *cp;
1803 	int keeper_on = 0, circ;
1804 
1805 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1806 	    keeper, (void *)keptpaths))
1807 	/*
1808 	 * Try to grab keeper. Keeper may have gone away by now,
1809 	 * in this case, used the passed in value pwr
1810 	 */
1811 	dip = pm_name_to_dip(keeper, 1);
1812 	for (i = 0; i < count; i++) {
1813 		/* Release power hold */
1814 		kept = pm_name_to_dip(keptpaths[i], 1);
1815 		if (kept) {
1816 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1817 			    PM_DEVICE(kept), i))
1818 			/*
1819 			 * We need to check if we skipped a bringup here
1820 			 * because we could have failed the bringup
1821 			 * (ie DIRECT PM device) and have
1822 			 * not increment the count.
1823 			 */
1824 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1825 				keeper_on = 0;
1826 				PM_LOCK_POWER(dip, &circ);
1827 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1828 				    cp = &DEVI(dip)->devi_pm_components[j];
1829 					if (cur_power(cp)) {
1830 						keeper_on++;
1831 						break;
1832 					}
1833 				}
1834 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1835 					pm_rele_power(kept);
1836 					DEVI(kept)->devi_pm_flags
1837 						&= ~PMC_SKIP_BRINGUP;
1838 				}
1839 				PM_UNLOCK_POWER(dip, circ);
1840 			} else if (pwr) {
1841 				if (PM_SKBU(kept) == 0) {
1842 					pm_rele_power(kept);
1843 					DEVI(kept)->devi_pm_flags
1844 					    &= ~PMC_SKIP_BRINGUP;
1845 				}
1846 			}
1847 			ddi_release_devi(kept);
1848 		}
1849 		/*
1850 		 * mark this dependency not satisfied
1851 		 */
1852 		pm_unsatisfy(keeper, keptpaths[i]);
1853 	}
1854 	if (dip)
1855 		ddi_release_devi(dip);
1856 }
1857 
1858 /*
1859  * Device kept is being un power managed, it is kept up by keeper.
1860  * We need to mark the dependency no longer satisfied.
1861  */
1862 static void
1863 pm_unkepts(char *kept, char *keeper)
1864 {
1865 	PMD_FUNC(pmf, "unkepts")
1866 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1867 	ASSERT(keeper != NULL);
1868 	/*
1869 	 * mark this dependency not satisfied
1870 	 */
1871 	pm_unsatisfy(keeper, kept);
1872 }
1873 
1874 /*
1875  * Removes dependency information and hold on the kepts, if the path is a
1876  * path of a keeper.
1877  */
1878 static void
1879 pm_free_keeper(char *path, int pwr)
1880 {
1881 	pm_pdr_t *dp;
1882 	int i;
1883 	size_t length;
1884 
1885 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1886 		if (strcmp(dp->pdr_keeper, path) != 0)
1887 			continue;
1888 		/*
1889 		 * Remove all our kept holds and the dependency records,
1890 		 * then free up the kept lists.
1891 		 */
1892 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1893 		if (dp->pdr_kept_count)  {
1894 			for (i = 0; i < dp->pdr_kept_count; i++) {
1895 				length = strlen(dp->pdr_kept_paths[i]);
1896 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1897 			}
1898 			kmem_free(dp->pdr_kept_paths,
1899 			    dp->pdr_kept_count * sizeof (char **));
1900 			dp->pdr_kept_paths = NULL;
1901 			dp->pdr_kept_count = 0;
1902 		}
1903 	}
1904 }
1905 
1906 /*
1907  * Removes the device represented by path from the list of kepts, if the
1908  * path is a path of a kept
1909  */
1910 static void
1911 pm_free_kept(char *path)
1912 {
1913 	pm_pdr_t *dp;
1914 	int i;
1915 	int j, count;
1916 	size_t length;
1917 	char **paths;
1918 
1919 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1920 		if (dp->pdr_kept_count == 0)
1921 			continue;
1922 		count = dp->pdr_kept_count;
1923 		/* Remove this device from the kept path lists */
1924 		for (i = 0; i < count; i++) {
1925 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1926 				pm_unkepts(path, dp->pdr_keeper);
1927 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1928 				kmem_free(dp->pdr_kept_paths[i], length);
1929 				dp->pdr_kept_paths[i] = NULL;
1930 				dp->pdr_kept_count--;
1931 			}
1932 		}
1933 		/* Compact the kept paths array */
1934 		if (dp->pdr_kept_count) {
1935 			length = dp->pdr_kept_count * sizeof (char **);
1936 			paths = kmem_zalloc(length, KM_SLEEP);
1937 			j = 0;
1938 			for (i = 0; i < count; i++) {
1939 				if (dp->pdr_kept_paths[i] != NULL) {
1940 					paths[j] = dp->pdr_kept_paths[i];
1941 					j++;
1942 				}
1943 			}
1944 			ASSERT(j == dp->pdr_kept_count);
1945 		}
1946 		/* Now free the old array and point to the new one */
1947 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1948 		if (dp->pdr_kept_count)
1949 			dp->pdr_kept_paths = paths;
1950 		else
1951 			dp->pdr_kept_paths = NULL;
1952 	}
1953 }
1954 
1955 /*
1956  * Free the dependency information for a device.
1957  */
1958 void
1959 pm_free_keeps(char *path, int pwr)
1960 {
1961 	PMD_FUNC(pmf, "free_keeps")
1962 
1963 #ifdef DEBUG
1964 	int doprdeps = 0;
1965 	void prdeps(char *);
1966 
1967 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
1968 	if (pm_debug & PMD_KEEPS) {
1969 		doprdeps = 1;
1970 		prdeps("pm_free_keeps before");
1971 	}
1972 #endif
1973 	/*
1974 	 * First assume we are a keeper and remove all our kepts.
1975 	 */
1976 	pm_free_keeper(path, pwr);
1977 	/*
1978 	 * Now assume we a kept device, and remove all our records.
1979 	 */
1980 	pm_free_kept(path);
1981 #ifdef	DEBUG
1982 	if (doprdeps) {
1983 		prdeps("pm_free_keeps after");
1984 	}
1985 #endif
1986 }
1987 
1988 static int
1989 pm_is_kept(char *path)
1990 {
1991 	pm_pdr_t *dp;
1992 	int i;
1993 
1994 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1995 		if (dp->pdr_kept_count == 0)
1996 			continue;
1997 		for (i = 0; i < dp->pdr_kept_count; i++) {
1998 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
1999 				return (1);
2000 		}
2001 	}
2002 	return (0);
2003 }
2004 
2005 static void
2006 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2007 {
2008 	PMD_FUNC(pmf, "hold_rele_power")
2009 	int circ;
2010 
2011 	if ((dip == NULL) ||
2012 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2013 		return;
2014 
2015 	PM_LOCK_POWER(dip, &circ);
2016 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2017 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2018 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2019 
2020 	PM_KUC(dip) += cnt;
2021 
2022 	ASSERT(PM_KUC(dip) >= 0);
2023 	PM_UNLOCK_POWER(dip, circ);
2024 
2025 	if (cnt < 0 && PM_KUC(dip) == 0)
2026 		pm_rescan(dip);
2027 }
2028 
2029 #define	MAX_PPM_HANDLERS	4
2030 
2031 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2032 
2033 struct	ppm_callbacks {
2034 	int (*ppmc_func)(dev_info_t *);
2035 	dev_info_t	*ppmc_dip;
2036 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2037 
2038 
2039 /*
2040  * This routine calls into all the registered ppms to notify them
2041  * that either all components of power-managed devices are at their
2042  * lowest levels or no longer all are at their lowest levels.
2043  */
2044 static void
2045 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2046 {
2047 	struct ppm_callbacks *ppmcp;
2048 	power_req_t power_req;
2049 	int result = 0;
2050 
2051 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2052 	power_req.req.ppm_all_lowest_req.mode = mode;
2053 	mutex_enter(&ppm_lock);
2054 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2055 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2056 		    DDI_CTLOPS_POWER, &power_req, &result);
2057 	mutex_exit(&ppm_lock);
2058 }
2059 
2060 static void
2061 pm_set_pm_info(dev_info_t *dip, void *value)
2062 {
2063 	DEVI(dip)->devi_pm_info = value;
2064 }
2065 
2066 pm_rsvp_t *pm_blocked_list;
2067 
2068 /*
2069  * Look up an entry in the blocked list by dip and component
2070  */
2071 static pm_rsvp_t *
2072 pm_rsvp_lookup(dev_info_t *dip, int comp)
2073 {
2074 	pm_rsvp_t *p;
2075 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2076 	for (p = pm_blocked_list; p; p = p->pr_next)
2077 		if (p->pr_dip == dip && p->pr_comp == comp) {
2078 			return (p);
2079 		}
2080 	return (NULL);
2081 }
2082 
2083 /*
2084  * Called when a device which is direct power managed (or the parent or
2085  * dependent of such a device) changes power, or when a pm clone is closed
2086  * that was direct power managing a device.  This call results in pm_blocked()
2087  * (below) returning.
2088  */
2089 void
2090 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2091 {
2092 	PMD_FUNC(pmf, "proceed")
2093 	pm_rsvp_t *found = NULL;
2094 	pm_rsvp_t *p;
2095 
2096 	mutex_enter(&pm_rsvp_lock);
2097 	switch (cmd) {
2098 	/*
2099 	 * we're giving up control, let any pending op continue
2100 	 */
2101 	case PMP_RELEASE:
2102 		for (p = pm_blocked_list; p; p = p->pr_next) {
2103 			if (dip == p->pr_dip) {
2104 				p->pr_retval = PMP_RELEASE;
2105 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2106 				    pmf, PM_DEVICE(dip)))
2107 				cv_signal(&p->pr_cv);
2108 			}
2109 		}
2110 		break;
2111 
2112 	/*
2113 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2114 	 * succeed and a non-matching request for the same device fail
2115 	 */
2116 	case PMP_SETPOWER:
2117 		found = pm_rsvp_lookup(dip, comp);
2118 		if (!found)	/* if driver not waiting */
2119 			break;
2120 		/*
2121 		 * This cannot be pm_lower_power, since that can only happen
2122 		 * during detach or probe
2123 		 */
2124 		if (found->pr_newlevel <= newlevel) {
2125 			found->pr_retval = PMP_SUCCEED;
2126 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2127 			    PM_DEVICE(dip)))
2128 		} else {
2129 			found->pr_retval = PMP_FAIL;
2130 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2131 			    PM_DEVICE(dip)))
2132 		}
2133 		cv_signal(&found->pr_cv);
2134 		break;
2135 
2136 	default:
2137 		panic("pm_proceed unknown cmd %d", cmd);
2138 	}
2139 	mutex_exit(&pm_rsvp_lock);
2140 }
2141 
2142 /*
2143  * This routine dispatches new work to the dependency thread. Caller must
2144  * be prepared to block for memory if necessary.
2145  */
2146 void
2147 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2148     int *res, int cached_pwr)
2149 {
2150 	pm_dep_wk_t	*new_work;
2151 
2152 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2153 	new_work->pdw_type = cmd;
2154 	new_work->pdw_wait = wait;
2155 	new_work->pdw_done = 0;
2156 	new_work->pdw_ret = 0;
2157 	new_work->pdw_pwr = cached_pwr;
2158 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2159 	if (keeper != NULL) {
2160 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2161 		    KM_SLEEP);
2162 		(void) strcpy(new_work->pdw_keeper, keeper);
2163 	}
2164 	if (kept != NULL) {
2165 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2166 		(void) strcpy(new_work->pdw_kept, kept);
2167 	}
2168 	mutex_enter(&pm_dep_thread_lock);
2169 	if (pm_dep_thread_workq == NULL) {
2170 		pm_dep_thread_workq = new_work;
2171 		pm_dep_thread_tail = new_work;
2172 		new_work->pdw_next = NULL;
2173 	} else {
2174 		pm_dep_thread_tail->pdw_next = new_work;
2175 		pm_dep_thread_tail = new_work;
2176 		new_work->pdw_next = NULL;
2177 	}
2178 	cv_signal(&pm_dep_thread_cv);
2179 	/* If caller asked for it, wait till it is done. */
2180 	if (wait)  {
2181 		while (!new_work->pdw_done)
2182 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2183 		/*
2184 		 * Pass return status, if any, back.
2185 		 */
2186 		if (res != NULL)
2187 			*res = new_work->pdw_ret;
2188 		/*
2189 		 * If we asked to wait, it is our job to free the request
2190 		 * structure.
2191 		 */
2192 		if (new_work->pdw_keeper)
2193 			kmem_free(new_work->pdw_keeper,
2194 			    strlen(new_work->pdw_keeper) + 1);
2195 		if (new_work->pdw_kept)
2196 			kmem_free(new_work->pdw_kept,
2197 			    strlen(new_work->pdw_kept) + 1);
2198 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2199 	}
2200 	mutex_exit(&pm_dep_thread_lock);
2201 }
2202 
2203 /*
2204  * Release the pm resource for this device.
2205  */
2206 void
2207 pm_rem_info(dev_info_t *dip)
2208 {
2209 	PMD_FUNC(pmf, "rem_info")
2210 	int		i, count = 0;
2211 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2212 	dev_info_t	*pdip = ddi_get_parent(dip);
2213 	char		*pathbuf;
2214 	int		work_type = PM_DEP_WK_DETACH;
2215 
2216 	ASSERT(info);
2217 
2218 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2219 	if (PM_ISDIRECT(dip)) {
2220 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2221 		ASSERT(info->pmi_clone);
2222 		info->pmi_clone = 0;
2223 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2224 	}
2225 	ASSERT(!PM_GET_PM_SCAN(dip));
2226 
2227 	/*
2228 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2229 	 * Others we check all components.  BC node that has already
2230 	 * called pm_destroy_components() has zero component count.
2231 	 * Parents that get notification are not adjusted because their
2232 	 * kidsupcnt is always 0 (or 1 during configuration).
2233 	 */
2234 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2235 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2236 
2237 	/* node is detached, so we can examine power without locking */
2238 	if (PM_ISBC(dip)) {
2239 		count = (PM_CURPOWER(dip, 0) != 0);
2240 	} else {
2241 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2242 			count += (PM_CURPOWER(dip, i) != 0);
2243 	}
2244 
2245 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2246 		e_pm_hold_rele_power(pdip, -count);
2247 
2248 	/* Schedule a request to clean up dependency records */
2249 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2250 	(void) ddi_pathname(dip, pathbuf);
2251 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2252 	    PM_DEP_NOWAIT, NULL, (count > 0));
2253 	kmem_free(pathbuf, MAXPATHLEN);
2254 
2255 	/*
2256 	 * Adjust the pm_comps_notlowest count since this device is
2257 	 * not being power-managed anymore.
2258 	 */
2259 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2260 		if (PM_CURPOWER(dip, i) != 0)
2261 			PM_DECR_NOTLOWEST(dip);
2262 	}
2263 	/*
2264 	 * Once we clear the info pointer, it looks like it is not power
2265 	 * managed to everybody else.
2266 	 */
2267 	pm_set_pm_info(dip, NULL);
2268 	kmem_free(info, sizeof (pm_info_t));
2269 }
2270 
2271 int
2272 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2273 {
2274 	int components = PM_NUMCMPTS(dip);
2275 	int *bufp;
2276 	size_t size;
2277 	int i;
2278 
2279 	if (components <= 0) {
2280 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2281 		    "can't get normal power values\n", PM_DEVICE(dip));
2282 		return (DDI_FAILURE);
2283 	} else {
2284 		size = components * sizeof (int);
2285 		bufp = kmem_alloc(size, KM_SLEEP);
2286 		for (i = 0; i < components; i++) {
2287 			bufp[i] = pm_get_normal_power(dip, i);
2288 		}
2289 	}
2290 	*length = size;
2291 	*valuep = bufp;
2292 	return (DDI_SUCCESS);
2293 }
2294 
2295 static int
2296 pm_reset_timestamps(dev_info_t *dip, void *arg)
2297 {
2298 	_NOTE(ARGUNUSED(arg))
2299 
2300 	int components;
2301 	int	i;
2302 
2303 	if (!PM_GET_PM_INFO(dip))
2304 		return (DDI_WALK_CONTINUE);
2305 	components = PM_NUMCMPTS(dip);
2306 	ASSERT(components > 0);
2307 	PM_LOCK_BUSY(dip);
2308 	for (i = 0; i < components; i++) {
2309 		struct pm_component *cp;
2310 		/*
2311 		 * If the component was not marked as busy,
2312 		 * reset its timestamp to now.
2313 		 */
2314 		cp = PM_CP(dip, i);
2315 		if (cp->pmc_timestamp)
2316 			cp->pmc_timestamp = gethrestime_sec();
2317 	}
2318 	PM_UNLOCK_BUSY(dip);
2319 	return (DDI_WALK_CONTINUE);
2320 }
2321 
2322 /*
2323  * Convert a power level to an index into the levels array (or
2324  * just PM_LEVEL_UNKNOWN in that special case).
2325  */
2326 static int
2327 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2328 {
2329 	PMD_FUNC(pmf, "level_to_index")
2330 	int i;
2331 	int limit = cp->pmc_comp.pmc_numlevels;
2332 	int *ip = cp->pmc_comp.pmc_lvals;
2333 
2334 	if (level == PM_LEVEL_UNKNOWN)
2335 		return (level);
2336 
2337 	for (i = 0; i < limit; i++) {
2338 		if (level == *ip++) {
2339 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2340 			    pmf, PM_DEVICE(dip),
2341 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2342 			return (i);
2343 		}
2344 	}
2345 	panic("pm_level_to_index: level %d not found for device "
2346 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2347 	/*NOTREACHED*/
2348 }
2349 
2350 /*
2351  * Internal function to set current power level
2352  */
2353 static void
2354 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2355 {
2356 	PMD_FUNC(pmf, "set_cur_pwr")
2357 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2358 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2359 
2360 	/*
2361 	 * Nothing to adjust if current & new levels are the same.
2362 	 */
2363 	if (curpwr != PM_LEVEL_UNKNOWN &&
2364 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2365 		return;
2366 
2367 	/*
2368 	 * Keep the count for comps doing transition to/from lowest
2369 	 * level.
2370 	 */
2371 	if (curpwr == 0) {
2372 		PM_INCR_NOTLOWEST(dip);
2373 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2374 		PM_DECR_NOTLOWEST(dip);
2375 	}
2376 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2377 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2378 }
2379 
2380 /*
2381  * This is the default method of setting the power of a device if no ppm
2382  * driver has claimed it.
2383  */
2384 int
2385 pm_power(dev_info_t *dip, int comp, int level)
2386 {
2387 	PMD_FUNC(pmf, "power")
2388 	struct dev_ops	*ops;
2389 	int		(*fn)(dev_info_t *, int, int);
2390 	struct pm_component *cp = PM_CP(dip, comp);
2391 	int retval;
2392 	pm_info_t *info = PM_GET_PM_INFO(dip);
2393 	static int pm_phc_impl(dev_info_t *, int, int, int);
2394 
2395 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2396 	    PM_DEVICE(dip), comp, level))
2397 	if (!(ops = ddi_get_driver(dip))) {
2398 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2399 		    PM_DEVICE(dip)))
2400 		return (DDI_FAILURE);
2401 	}
2402 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2403 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2404 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2405 		    (!fn ? " devo_power NULL" : "")))
2406 		return (DDI_FAILURE);
2407 	}
2408 	cp->pmc_flags |= PM_POWER_OP;
2409 	retval = (*fn)(dip, comp, level);
2410 	cp->pmc_flags &= ~PM_POWER_OP;
2411 	if (retval == DDI_SUCCESS) {
2412 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2413 		return (DDI_SUCCESS);
2414 	}
2415 
2416 	/*
2417 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2418 	 * updated only the power level of the component.  If our attempt to
2419 	 * set the device new to a power level above has failed we sync the
2420 	 * total power state via phc code now.
2421 	 */
2422 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2423 		int phc_lvl =
2424 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2425 
2426 		ASSERT(info);
2427 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2428 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2429 			pmf, PM_DEVICE(dip), comp, phc_lvl))
2430 	}
2431 
2432 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2433 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2434 	    level, power_val_to_string(cp, level)));
2435 	return (DDI_FAILURE);
2436 }
2437 
2438 int
2439 pm_unmanage(dev_info_t *dip)
2440 {
2441 	PMD_FUNC(pmf, "unmanage")
2442 	power_req_t power_req;
2443 	int result, retval = 0;
2444 
2445 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2446 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2447 	    PM_DEVICE(dip)))
2448 	power_req.request_type = PMR_PPM_UNMANAGE;
2449 	power_req.req.ppm_config_req.who = dip;
2450 	if (pm_ppm_claimed(dip))
2451 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2452 		    &power_req, &result);
2453 #ifdef DEBUG
2454 	else
2455 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2456 		    &power_req, &result);
2457 #endif
2458 	ASSERT(retval == DDI_SUCCESS);
2459 	pm_rem_info(dip);
2460 	return (retval);
2461 }
2462 
2463 int
2464 pm_raise_power(dev_info_t *dip, int comp, int level)
2465 {
2466 	if (level < 0)
2467 		return (DDI_FAILURE);
2468 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2469 	    !e_pm_valid_power(dip, comp, level))
2470 		return (DDI_FAILURE);
2471 
2472 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2473 }
2474 
2475 int
2476 pm_lower_power(dev_info_t *dip, int comp, int level)
2477 {
2478 	PMD_FUNC(pmf, "pm_lower_power")
2479 
2480 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2481 	    !e_pm_valid_power(dip, comp, level)) {
2482 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2483 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2484 		return (DDI_FAILURE);
2485 	}
2486 
2487 	if (!DEVI_IS_DETACHING(dip)) {
2488 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2489 		    pmf, PM_DEVICE(dip)))
2490 		return (DDI_FAILURE);
2491 	}
2492 
2493 	/*
2494 	 * If we don't care about saving power, or we're treating this node
2495 	 * specially, then this is a no-op
2496 	 */
2497 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2498 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2499 		    pmf, PM_DEVICE(dip),
2500 		    !autopm_enabled ? "!autopm_enabled " : "",
2501 		    !PM_CPUPM_ENABLED ? "!cpupm_enabled " : "",
2502 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2503 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2504 		return (DDI_SUCCESS);
2505 	}
2506 
2507 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2508 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2509 		    PM_DEVICE(dip)))
2510 		return (DDI_FAILURE);
2511 	}
2512 	return (DDI_SUCCESS);
2513 }
2514 
2515 /*
2516  * Find the entries struct for a given dip in the blocked list, return it locked
2517  */
2518 static psce_t *
2519 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2520 {
2521 	pscc_t *p;
2522 	psce_t *psce;
2523 
2524 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2525 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2526 		if (p->pscc_dip == dip) {
2527 			*psccp = p;
2528 			psce = p->pscc_entries;
2529 			mutex_enter(&psce->psce_lock);
2530 			ASSERT(psce);
2531 			rw_exit(&pm_pscc_direct_rwlock);
2532 			return (psce);
2533 		}
2534 	}
2535 	rw_exit(&pm_pscc_direct_rwlock);
2536 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2537 	/*NOTREACHED*/
2538 }
2539 
2540 /*
2541  * Write an entry indicating a power level change (to be passed to a process
2542  * later) in the given psce.
2543  * If we were called in the path that brings up the console fb in the
2544  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2545  * we create a record that has a size of -1, a physaddr of NULL, and that
2546  * has the overflow flag set.
2547  */
2548 static int
2549 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2550     int old, int which, pm_canblock_t canblock)
2551 {
2552 	char	buf[MAXNAMELEN];
2553 	pm_state_change_t *p;
2554 	size_t	size;
2555 	caddr_t physpath = NULL;
2556 	int	overrun = 0;
2557 
2558 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2559 	(void) ddi_pathname(dip, buf);
2560 	size = strlen(buf) + 1;
2561 	p = psce->psce_in;
2562 	if (canblock == PM_CANBLOCK_BYPASS) {
2563 		physpath = kmem_alloc(size, KM_NOSLEEP);
2564 		if (physpath == NULL) {
2565 			/*
2566 			 * mark current entry as overrun
2567 			 */
2568 			p->flags |= PSC_EVENT_LOST;
2569 			size = (size_t)-1;
2570 		}
2571 	} else
2572 		physpath = kmem_alloc(size, KM_SLEEP);
2573 	if (p->size) {	/* overflow; mark the next entry */
2574 		if (p->size != (size_t)-1)
2575 			kmem_free(p->physpath, p->size);
2576 		ASSERT(psce->psce_out == p);
2577 		if (p == psce->psce_last) {
2578 			psce->psce_first->flags |= PSC_EVENT_LOST;
2579 			psce->psce_out = psce->psce_first;
2580 		} else {
2581 			(p + 1)->flags |= PSC_EVENT_LOST;
2582 			psce->psce_out = (p + 1);
2583 		}
2584 		overrun++;
2585 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2586 		p->flags |= PSC_EVENT_LOST;
2587 		p->size = 0;
2588 		p->physpath = NULL;
2589 	}
2590 	if (which == PSC_INTEREST) {
2591 		mutex_enter(&pm_compcnt_lock);
2592 		if (pm_comps_notlowest == 0)
2593 			p->flags |= PSC_ALL_LOWEST;
2594 		else
2595 			p->flags &= ~PSC_ALL_LOWEST;
2596 		mutex_exit(&pm_compcnt_lock);
2597 	}
2598 	p->event = event;
2599 	p->timestamp = gethrestime_sec();
2600 	p->component = comp;
2601 	p->old_level = old;
2602 	p->new_level = new;
2603 	p->physpath = physpath;
2604 	p->size = size;
2605 	if (physpath != NULL)
2606 		(void) strcpy(p->physpath, buf);
2607 	if (p == psce->psce_last)
2608 		psce->psce_in = psce->psce_first;
2609 	else
2610 		psce->psce_in = ++p;
2611 	mutex_exit(&psce->psce_lock);
2612 	return (overrun);
2613 }
2614 
2615 /*
2616  * Find the next entry on the interest list.  We keep a pointer to the item we
2617  * last returned in the user's cooke.  Returns a locked entries struct.
2618  */
2619 static psce_t *
2620 psc_interest(void **cookie, pscc_t **psccp)
2621 {
2622 	pscc_t *pscc;
2623 	pscc_t **cookiep = (pscc_t **)cookie;
2624 
2625 	if (*cookiep == NULL)
2626 		pscc = pm_pscc_interest;
2627 	else
2628 		pscc = (*cookiep)->pscc_next;
2629 	if (pscc) {
2630 		*cookiep = pscc;
2631 		*psccp = pscc;
2632 		mutex_enter(&pscc->pscc_entries->psce_lock);
2633 		return (pscc->pscc_entries);
2634 	} else {
2635 		return (NULL);
2636 	}
2637 }
2638 
2639 /*
2640  * Create an entry for a process to pick up indicating a power level change.
2641  */
2642 static void
2643 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2644     int newlevel, int oldlevel, pm_canblock_t canblock)
2645 {
2646 	PMD_FUNC(pmf, "enqueue_notify")
2647 	pscc_t	*pscc;
2648 	psce_t	*psce;
2649 	void		*cookie = NULL;
2650 	int	overrun;
2651 
2652 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2653 	switch (cmd) {
2654 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2655 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2656 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2657 		psce = pm_psc_dip_to_direct(dip, &pscc);
2658 		ASSERT(psce);
2659 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2660 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2661 		    pm_poll_cnt[pscc->pscc_clone]))
2662 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2663 		    PSC_DIRECT, canblock);
2664 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2665 		mutex_enter(&pm_clone_lock);
2666 		if (!overrun)
2667 			pm_poll_cnt[pscc->pscc_clone]++;
2668 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2669 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2670 		mutex_exit(&pm_clone_lock);
2671 		break;
2672 	case PSC_HAS_CHANGED:
2673 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2674 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2675 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2676 			psce = pm_psc_dip_to_direct(dip, &pscc);
2677 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2678 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2679 			    pm_poll_cnt[pscc->pscc_clone]))
2680 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2681 			    oldlevel, PSC_DIRECT, canblock);
2682 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2683 			mutex_enter(&pm_clone_lock);
2684 			if (!overrun)
2685 				pm_poll_cnt[pscc->pscc_clone]++;
2686 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2687 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2688 			mutex_exit(&pm_clone_lock);
2689 		}
2690 		mutex_enter(&pm_clone_lock);
2691 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2692 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2693 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2694 			    oldlevel, PSC_INTEREST, canblock);
2695 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2696 		}
2697 		rw_exit(&pm_pscc_interest_rwlock);
2698 		mutex_exit(&pm_clone_lock);
2699 		break;
2700 #ifdef DEBUG
2701 	default:
2702 		ASSERT(0);
2703 #endif
2704 	}
2705 }
2706 
2707 static void
2708 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2709 {
2710 	if (listp) {
2711 		pm_ppm_devlist_t *p, *next = NULL;
2712 
2713 		for (p = *listp; p; p = next) {
2714 			next = p->ppd_next;
2715 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2716 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2717 			    canblock);
2718 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2719 		}
2720 		*listp = NULL;
2721 	}
2722 }
2723 
2724 /*
2725  * Try to get the power locks of the parent node and target (child)
2726  * node.  Return true if successful (with both locks held) or false
2727  * (with no locks held).
2728  */
2729 static int
2730 pm_try_parent_child_locks(dev_info_t *pdip,
2731     dev_info_t *dip, int *pcircp, int *circp)
2732 {
2733 	if (ndi_devi_tryenter(pdip, pcircp))
2734 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2735 			return (1);
2736 		} else {
2737 			ndi_devi_exit(pdip, *pcircp);
2738 		}
2739 	return (0);
2740 }
2741 
2742 /*
2743  * Determine if the power lock owner is blocked by current thread.
2744  * returns :
2745  * 	1 - If the thread owning the effective power lock (the first lock on
2746  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2747  *          a mutex held by the current thread.
2748  *
2749  *	0 - otherwise
2750  *
2751  * Note : This function is called by pm_power_has_changed to determine whether
2752  * it is executing in parallel with pm_set_power.
2753  */
2754 static int
2755 pm_blocked_by_us(dev_info_t *dip)
2756 {
2757 	power_req_t power_req;
2758 	kthread_t *owner;
2759 	int result;
2760 	kmutex_t *mp;
2761 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2762 
2763 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2764 	power_req.req.ppm_power_lock_owner_req.who = dip;
2765 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2766 	    DDI_SUCCESS) {
2767 		/*
2768 		 * It is assumed that if the device is claimed by ppm, ppm
2769 		 * will always implement this request type and it'll always
2770 		 * return success. We panic here, if it fails.
2771 		 */
2772 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2773 		    PM_DEVICE(dip));
2774 		/*NOTREACHED*/
2775 	}
2776 
2777 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2778 	    owner->t_state == TS_SLEEP &&
2779 	    owner->t_sobj_ops &&
2780 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2781 	    (mp = (kmutex_t *)owner->t_wchan) &&
2782 	    mutex_owner(mp) == curthread)
2783 		return (1);
2784 
2785 	return (0);
2786 }
2787 
2788 /*
2789  * Notify parent which wants to hear about a child's power changes.
2790  */
2791 static void
2792 pm_notify_parent(dev_info_t *dip,
2793     dev_info_t *pdip, int comp, int old_level, int level)
2794 {
2795 	pm_bp_has_changed_t bphc;
2796 	pm_sp_misc_t pspm;
2797 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2798 	int result = DDI_SUCCESS;
2799 
2800 	bphc.bphc_dip = dip;
2801 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2802 	bphc.bphc_comp = comp;
2803 	bphc.bphc_olevel = old_level;
2804 	bphc.bphc_nlevel = level;
2805 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2806 	pspm.pspm_scan = 0;
2807 	bphc.bphc_private = &pspm;
2808 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2809 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2810 	kmem_free(pathbuf, MAXPATHLEN);
2811 }
2812 
2813 /*
2814  * Check if we need to resume a BC device, and make the attach call as required.
2815  */
2816 static int
2817 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2818 {
2819 	int ret = DDI_SUCCESS;
2820 
2821 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2822 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2823 		/* ppm is not interested in DDI_PM_RESUME */
2824 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2825 			/* XXX Should we mark it resumed, */
2826 			/* even though it failed? */
2827 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2828 			    PM_NAME(dip), PM_ADDR(dip));
2829 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2830 	}
2831 
2832 	return (ret);
2833 }
2834 
2835 /*
2836  * Tests outside the lock to see if we should bother to enqueue an entry
2837  * for any watching process.  If yes, then caller will take the lock and
2838  * do the full protocol
2839  */
2840 static int
2841 pm_watchers()
2842 {
2843 	if (pm_processes_stopped)
2844 		return (0);
2845 	return (pm_pscc_direct || pm_pscc_interest);
2846 }
2847 
2848 /*
2849  * A driver is reporting that the power of one of its device's components
2850  * has changed.  Update the power state accordingly.
2851  */
2852 int
2853 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2854 {
2855 	PMD_FUNC(pmf, "pm_power_has_changed")
2856 	int ret;
2857 	dev_info_t *pdip = ddi_get_parent(dip);
2858 	struct pm_component *cp;
2859 	int blocked, circ, pcirc, old_level;
2860 	static int pm_phc_impl(dev_info_t *, int, int, int);
2861 
2862 	if (level < 0) {
2863 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2864 		    PM_DEVICE(dip), level))
2865 		return (DDI_FAILURE);
2866 	}
2867 
2868 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2869 	    PM_DEVICE(dip), comp, level))
2870 
2871 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2872 	    !e_pm_valid_power(dip, comp, level))
2873 		return (DDI_FAILURE);
2874 
2875 	/*
2876 	 * A driver thread calling pm_power_has_changed and another thread
2877 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2878 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2879 	 * this specific deadlock.  If we can't get the lock immediately
2880 	 * and we are deadlocked, just update the component's level, do
2881 	 * notifications, and return.  We intend to update the total power
2882 	 * state later (if the other thread fails to set power to the
2883 	 * desired level).  If we were called because of a power change on a
2884 	 * component that isn't involved in a set_power op, update all state
2885 	 * immediately.
2886 	 */
2887 	cp = PM_CP(dip, comp);
2888 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2889 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2890 		    (cp->pmc_flags & PM_POWER_OP)) {
2891 			if (pm_watchers()) {
2892 				mutex_enter(&pm_rsvp_lock);
2893 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2894 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2895 				mutex_exit(&pm_rsvp_lock);
2896 			}
2897 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2898 				pm_notify_parent(dip,
2899 				    pdip, comp, cur_power(cp), level);
2900 			(void) pm_check_and_resume(dip,
2901 			    comp, cur_power(cp), level);
2902 
2903 			/*
2904 			 * Stash the old power index, update curpwr, and flag
2905 			 * that the total power state needs to be synched.
2906 			 */
2907 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2908 			/*
2909 			 * Several pm_power_has_changed calls could arrive
2910 			 * while the set power path remains blocked.  Keep the
2911 			 * oldest old power and the newest new power of any
2912 			 * sequence of phc calls which arrive during deadlock.
2913 			 */
2914 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2915 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2916 			cp->pmc_cur_pwr =
2917 			    pm_level_to_index(dip, cp, level);
2918 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2919 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2920 			return (DDI_SUCCESS);
2921 		} else
2922 			if (blocked) {	/* blocked, but different cmpt? */
2923 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2924 					cmn_err(CE_NOTE,
2925 					    "!pm: parent kuc not updated due "
2926 					    "to possible deadlock.\n");
2927 					return (pm_phc_impl(dip,
2928 						    comp, level, 1));
2929 				}
2930 				old_level = cur_power(cp);
2931 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2932 				    (!PM_ISBC(dip) || comp == 0) &&
2933 				    POWERING_ON(old_level, level))
2934 					pm_hold_power(pdip);
2935 				ret = pm_phc_impl(dip, comp, level, 1);
2936 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2937 					if ((!PM_ISBC(dip) ||
2938 					    comp == 0) && level == 0 &&
2939 					    old_level != PM_LEVEL_UNKNOWN)
2940 						pm_rele_power(pdip);
2941 				}
2942 				ndi_devi_exit(pdip, pcirc);
2943 				/* child lock not held: deadlock */
2944 				return (ret);
2945 			}
2946 		delay(1);
2947 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
2948 	}
2949 
2950 	/* non-deadlock case */
2951 	old_level = cur_power(cp);
2952 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2953 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
2954 		pm_hold_power(pdip);
2955 	ret = pm_phc_impl(dip, comp, level, 1);
2956 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2957 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
2958 		    old_level != PM_LEVEL_UNKNOWN)
2959 			pm_rele_power(pdip);
2960 	}
2961 	PM_UNLOCK_POWER(dip, circ);
2962 	ndi_devi_exit(pdip, pcirc);
2963 	return (ret);
2964 }
2965 
2966 /*
2967  * Account for power changes to a component of the the console frame buffer.
2968  * If lowering power from full (or "unkown", which is treatd as full)
2969  * we will increment the "components off" count of the fb device.
2970  * Subsequent lowering of the same component doesn't affect the count.  If
2971  * raising a component back to full power, we will decrement the count.
2972  *
2973  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
2974  */
2975 static int
2976 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
2977 {
2978 	struct pm_component *cp = PM_CP(dip, cmpt);
2979 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
2980 	int want_normal = (new == cp->pmc_norm_pwr);
2981 	int incr = 0;
2982 
2983 	if (on && !want_normal)
2984 		incr = 1;
2985 	else if (!on && want_normal)
2986 		incr = -1;
2987 	return (incr);
2988 }
2989 
2990 /*
2991  * Adjust the count of console frame buffer components < full power.
2992  */
2993 static void
2994 update_comps_off(int incr, dev_info_t *dip)
2995 {
2996 		mutex_enter(&pm_cfb_lock);
2997 		pm_cfb_comps_off += incr;
2998 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
2999 		mutex_exit(&pm_cfb_lock);
3000 }
3001 
3002 /*
3003  * Update the power state in the framework (via the ppm).  The 'notify'
3004  * argument tells whether to notify watchers.  Power lock is already held.
3005  */
3006 static int
3007 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3008 {
3009 	PMD_FUNC(pmf, "phc_impl")
3010 	power_req_t power_req;
3011 	int i, dodeps = 0;
3012 	dev_info_t *pdip = ddi_get_parent(dip);
3013 	int result;
3014 	int old_level;
3015 	struct pm_component *cp;
3016 	int incr = 0;
3017 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3018 	int work_type = 0;
3019 	char *pathbuf;
3020 
3021 	/* Must use "official" power level for this test. */
3022 	cp = PM_CP(dip, comp);
3023 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3024 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3025 	if (old_level != PM_LEVEL_UNKNOWN)
3026 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3027 
3028 	if (level == old_level) {
3029 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3030 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3031 		return (DDI_SUCCESS);
3032 	}
3033 
3034 	/*
3035 	 * Tell ppm about this.
3036 	 */
3037 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3038 	power_req.req.ppm_notify_level_req.who = dip;
3039 	power_req.req.ppm_notify_level_req.cmpt = comp;
3040 	power_req.req.ppm_notify_level_req.new_level = level;
3041 	power_req.req.ppm_notify_level_req.old_level = old_level;
3042 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3043 	    &result) == DDI_FAILURE) {
3044 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3045 		    pmf, PM_DEVICE(dip), level))
3046 		return (DDI_FAILURE);
3047 	}
3048 
3049 	if (PM_IS_CFB(dip)) {
3050 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3051 
3052 		if (incr) {
3053 			update_comps_off(incr, dip);
3054 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3055 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3056 			    comp, old_level, level, pm_cfb_comps_off))
3057 		}
3058 	}
3059 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3060 	result = DDI_SUCCESS;
3061 
3062 	if (notify) {
3063 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3064 			pm_notify_parent(dip, pdip, comp, old_level, level);
3065 		(void) pm_check_and_resume(dip, comp, old_level, level);
3066 	}
3067 
3068 	/*
3069 	 * Decrement the dependency kidsup count if we turn a device
3070 	 * off.
3071 	 */
3072 	if (POWERING_OFF(old_level, level)) {
3073 		dodeps = 1;
3074 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3075 			cp = PM_CP(dip, i);
3076 			if (cur_power(cp)) {
3077 				dodeps = 0;
3078 				break;
3079 			}
3080 		}
3081 		if (dodeps)
3082 			work_type = PM_DEP_WK_POWER_OFF;
3083 	}
3084 
3085 	/*
3086 	 * Increment if we turn it on. Check to see
3087 	 * if other comps are already on, if so,
3088 	 * dont increment.
3089 	 */
3090 	if (POWERING_ON(old_level, level)) {
3091 		dodeps = 1;
3092 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3093 			cp = PM_CP(dip, i);
3094 			if (comp == i)
3095 				continue;
3096 			/* -1 also treated as 0 in this case */
3097 			if (cur_power(cp) > 0) {
3098 				dodeps = 0;
3099 				break;
3100 			}
3101 		}
3102 		if (dodeps)
3103 			work_type = PM_DEP_WK_POWER_ON;
3104 	}
3105 
3106 	if (dodeps) {
3107 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3108 		(void) ddi_pathname(dip, pathbuf);
3109 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3110 		    PM_DEP_NOWAIT, NULL, 0);
3111 		kmem_free(pathbuf, MAXPATHLEN);
3112 	}
3113 
3114 	if (notify && (level != old_level) && pm_watchers()) {
3115 		mutex_enter(&pm_rsvp_lock);
3116 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3117 		    PM_CANBLOCK_BLOCK);
3118 		mutex_exit(&pm_rsvp_lock);
3119 	}
3120 
3121 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3122 	pm_rescan(dip);
3123 	return (DDI_SUCCESS);
3124 }
3125 
3126 /*
3127  * This function is called at startup time to notify pm of the existence
3128  * of any platform power managers for this platform.  As a result of
3129  * this registration, each function provided will be called each time
3130  * a device node is attached, until one returns true, and it must claim the
3131  * device node (by returning non-zero) if it wants to be involved in the
3132  * node's power management.  If it does claim the node, then it will
3133  * subsequently be notified of attach and detach events.
3134  *
3135  */
3136 
3137 int
3138 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3139 {
3140 	PMD_FUNC(pmf, "register_ppm")
3141 	struct ppm_callbacks *ppmcp;
3142 	pm_component_t *cp;
3143 	int i, pwr, result, circ;
3144 	power_req_t power_req;
3145 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3146 	void pm_ppm_claim(dev_info_t *);
3147 
3148 	mutex_enter(&ppm_lock);
3149 	ppmcp = ppm_callbacks;
3150 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3151 		if (ppmcp->ppmc_func == NULL) {
3152 			ppmcp->ppmc_func = func;
3153 			ppmcp->ppmc_dip = dip;
3154 			break;
3155 		}
3156 	}
3157 	mutex_exit(&ppm_lock);
3158 
3159 	if (i >= MAX_PPM_HANDLERS)
3160 		return (DDI_FAILURE);
3161 	while ((dip = ddi_get_parent(dip)) != NULL) {
3162 		if (PM_GET_PM_INFO(dip) == NULL)
3163 			continue;
3164 		pm_ppm_claim(dip);
3165 		if (pm_ppm_claimed(dip)) {
3166 			/*
3167 			 * Tell ppm about this.
3168 			 */
3169 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3170 			p->old_level = PM_LEVEL_UNKNOWN;
3171 			p->who = dip;
3172 			PM_LOCK_POWER(dip, &circ);
3173 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3174 				cp = PM_CP(dip, i);
3175 				pwr = cp->pmc_cur_pwr;
3176 				if (pwr != PM_LEVEL_UNKNOWN) {
3177 					p->cmpt = i;
3178 					p->new_level = cur_power(cp);
3179 					p->old_level = PM_LEVEL_UNKNOWN;
3180 					if (pm_ctlops(PPM(dip), dip,
3181 					    DDI_CTLOPS_POWER, &power_req,
3182 					    &result) == DDI_FAILURE) {
3183 						PMD(PMD_FAIL, ("%s: pc "
3184 						    "%s@%s(%s#%d) to %d "
3185 						    "fails\n", pmf,
3186 						    PM_DEVICE(dip), pwr))
3187 					}
3188 				}
3189 			}
3190 			PM_UNLOCK_POWER(dip, circ);
3191 		}
3192 	}
3193 	return (DDI_SUCCESS);
3194 }
3195 
3196 /*
3197  * Call the ppm's that have registered and adjust the devinfo struct as
3198  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3199  * by each ppm are assumed to be disjoint.
3200  */
3201 void
3202 pm_ppm_claim(dev_info_t *dip)
3203 {
3204 	struct ppm_callbacks *ppmcp;
3205 
3206 	if (PPM(dip)) {
3207 		return;
3208 	}
3209 	mutex_enter(&ppm_lock);
3210 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3211 		if ((*ppmcp->ppmc_func)(dip)) {
3212 			DEVI(dip)->devi_pm_ppm =
3213 			    (struct dev_info *)ppmcp->ppmc_dip;
3214 			mutex_exit(&ppm_lock);
3215 			return;
3216 		}
3217 	}
3218 	mutex_exit(&ppm_lock);
3219 }
3220 
3221 /*
3222  * Node is being detached so stop autopm until we see if it succeeds, in which
3223  * case pm_stop will be called.  For backwards compatible devices we bring the
3224  * device up to full power on the assumption the detach will succeed.
3225  */
3226 void
3227 pm_detaching(dev_info_t *dip)
3228 {
3229 	PMD_FUNC(pmf, "detaching")
3230 	pm_info_t *info = PM_GET_PM_INFO(dip);
3231 	int iscons;
3232 
3233 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3234 	    PM_NUMCMPTS(dip)))
3235 	if (info == NULL)
3236 		return;
3237 	ASSERT(DEVI_IS_DETACHING(dip));
3238 	PM_LOCK_DIP(dip);
3239 	info->pmi_dev_pm_state |= PM_DETACHING;
3240 	PM_UNLOCK_DIP(dip);
3241 	if (!PM_ISBC(dip))
3242 		pm_scan_stop(dip);
3243 
3244 	/*
3245 	 * console and old-style devices get brought up when detaching.
3246 	 */
3247 	iscons = PM_IS_CFB(dip);
3248 	if (iscons || PM_ISBC(dip)) {
3249 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3250 		if (iscons) {
3251 			mutex_enter(&pm_cfb_lock);
3252 			while (cfb_inuse) {
3253 				mutex_exit(&pm_cfb_lock);
3254 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3255 				delay(1);
3256 				mutex_enter(&pm_cfb_lock);
3257 			}
3258 			ASSERT(cfb_dip_detaching == NULL);
3259 			ASSERT(cfb_dip);
3260 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3261 			cfb_dip = NULL;
3262 			mutex_exit(&pm_cfb_lock);
3263 		}
3264 	}
3265 }
3266 
3267 /*
3268  * Node failed to detach.  If it used to be autopm'd, make it so again.
3269  */
3270 void
3271 pm_detach_failed(dev_info_t *dip)
3272 {
3273 	PMD_FUNC(pmf, "detach_failed")
3274 	pm_info_t *info = PM_GET_PM_INFO(dip);
3275 	int pm_all_at_normal(dev_info_t *);
3276 
3277 	if (info == NULL)
3278 		return;
3279 	ASSERT(DEVI_IS_DETACHING(dip));
3280 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3281 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3282 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3283 			/* Make sure the operation is still needed */
3284 			if (!pm_all_at_normal(dip)) {
3285 				if (pm_all_to_normal(dip,
3286 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3287 					PMD(PMD_ERROR, ("%s: could not bring "
3288 					    "%s@%s(%s#%d) to normal\n", pmf,
3289 					    PM_DEVICE(dip)))
3290 				}
3291 			}
3292 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3293 		}
3294 	}
3295 	if (!PM_ISBC(dip)) {
3296 		mutex_enter(&pm_scan_lock);
3297 		if (PM_SCANABLE(dip))
3298 			pm_scan_init(dip);
3299 		mutex_exit(&pm_scan_lock);
3300 		pm_rescan(dip);
3301 	}
3302 }
3303 
3304 /* generic Backwards Compatible component */
3305 static char *bc_names[] = {"off", "on"};
3306 
3307 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3308 
3309 static void
3310 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3311 {
3312 	pm_comp_t *pmc;
3313 	pmc = &cp->pmc_comp;
3314 	pmc->pmc_numlevels = 2;
3315 	pmc->pmc_lvals[0] = 0;
3316 	pmc->pmc_lvals[1] = norm;
3317 	e_pm_set_cur_pwr(dip, cp, norm);
3318 }
3319 
3320 static void
3321 e_pm_default_components(dev_info_t *dip, int cmpts)
3322 {
3323 	int i;
3324 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3325 
3326 	p = DEVI(dip)->devi_pm_components;
3327 	for (i = 0; i < cmpts; i++, p++) {
3328 		p->pmc_comp = bc_comp;	/* struct assignment */
3329 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3330 		    KM_SLEEP);
3331 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3332 		    KM_SLEEP);
3333 		p->pmc_comp.pmc_numlevels = 2;
3334 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3335 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3336 	}
3337 }
3338 
3339 /*
3340  * Called from functions that require components to exist already to allow
3341  * for their creation by parsing the pm-components property.
3342  * Device will not be power managed as a result of this call
3343  * No locking needed because we're single threaded by the ndi_devi_enter
3344  * done while attaching, and the device isn't visible until after it has
3345  * attached
3346  */
3347 int
3348 pm_premanage(dev_info_t *dip, int style)
3349 {
3350 	PMD_FUNC(pmf, "premanage")
3351 	pm_comp_t	*pcp, *compp;
3352 	int		cmpts, i, norm, error;
3353 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3354 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3355 
3356 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3357 	/*
3358 	 * If this dip has already been processed, don't mess with it
3359 	 */
3360 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3361 		return (DDI_SUCCESS);
3362 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3363 		return (DDI_FAILURE);
3364 	}
3365 	/*
3366 	 * Look up pm-components property and create components accordingly
3367 	 * If that fails, fall back to backwards compatibility
3368 	 */
3369 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3370 		/*
3371 		 * If error is set, the property existed but was not well formed
3372 		 */
3373 		if (error || (style == PM_STYLE_NEW)) {
3374 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3375 			return (DDI_FAILURE);
3376 		}
3377 		/*
3378 		 * If they don't have the pm-components property, then we
3379 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3380 		 * behavior driver must have called pm_create_components, and
3381 		 * we need to flesh out dummy components
3382 		 */
3383 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3384 			/*
3385 			 * Not really failure, but we don't want the
3386 			 * caller to treat it as success
3387 			 */
3388 			return (DDI_FAILURE);
3389 		}
3390 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3391 		e_pm_default_components(dip, cmpts);
3392 		for (i = 0; i < cmpts; i++) {
3393 			/*
3394 			 * if normal power not set yet, we don't really know
3395 			 * what *ANY* of the power values are.  If normal
3396 			 * power is set, then we assume for this backwards
3397 			 * compatible case that the values are 0, normal power.
3398 			 */
3399 			norm = pm_get_normal_power(dip, i);
3400 			if (norm == (uint_t)-1) {
3401 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3402 				    PM_DEVICE(dip), i))
3403 				return (DDI_FAILURE);
3404 			}
3405 			/*
3406 			 * Components of BC devices start at their normal power,
3407 			 * so count them to be not at their lowest power.
3408 			 */
3409 			PM_INCR_NOTLOWEST(dip);
3410 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3411 		}
3412 	} else {
3413 		/*
3414 		 * e_pm_create_components was called from pm_autoconfig(), it
3415 		 * creates components with no descriptions (or known levels)
3416 		 */
3417 		cmpts = PM_NUMCMPTS(dip);
3418 		ASSERT(cmpts != 0);
3419 		pcp = compp;
3420 		p = DEVI(dip)->devi_pm_components;
3421 		for (i = 0; i < cmpts; i++, p++) {
3422 			p->pmc_comp = *pcp++;   /* struct assignment */
3423 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3424 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3425 		}
3426 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3427 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3428 			    PMC_CPU_THRESH);
3429 		else
3430 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3431 			    PMC_DEF_THRESH);
3432 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3433 	}
3434 	return (DDI_SUCCESS);
3435 }
3436 
3437 /*
3438  * Called from during or after the device's attach to let us know it is ready
3439  * to play autopm.   Look up the pm model and manage the device accordingly.
3440  * Returns system call errno value.
3441  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3442  * a little cleaner
3443  *
3444  * Called with dip lock held, return with dip lock unheld.
3445  */
3446 
3447 int
3448 e_pm_manage(dev_info_t *dip, int style)
3449 {
3450 	PMD_FUNC(pmf, "e_manage")
3451 	pm_info_t	*info;
3452 	dev_info_t	*pdip = ddi_get_parent(dip);
3453 	int	pm_thresh_specd(dev_info_t *);
3454 	int	count;
3455 	char	*pathbuf;
3456 
3457 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3458 		return (DDI_FAILURE);
3459 	}
3460 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3461 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3462 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3463 
3464 	/*
3465 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3466 	 * out at their normal power, so they are "up", others start out
3467 	 * unknown, which is effectively "up".  Parent which want notification
3468 	 * get kidsupcnt of 0 always.
3469 	 */
3470 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3471 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3472 		e_pm_hold_rele_power(pdip, count);
3473 
3474 	pm_set_pm_info(dip, info);
3475 	/*
3476 	 * Apply any recorded thresholds
3477 	 */
3478 	(void) pm_thresh_specd(dip);
3479 
3480 	/*
3481 	 * Do dependency processing.
3482 	 */
3483 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3484 	(void) ddi_pathname(dip, pathbuf);
3485 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3486 	    PM_DEP_NOWAIT, NULL, 0);
3487 	kmem_free(pathbuf, MAXPATHLEN);
3488 
3489 	if (!PM_ISBC(dip)) {
3490 		mutex_enter(&pm_scan_lock);
3491 		if (PM_SCANABLE(dip)) {
3492 			pm_scan_init(dip);
3493 			mutex_exit(&pm_scan_lock);
3494 			pm_rescan(dip);
3495 		} else {
3496 			mutex_exit(&pm_scan_lock);
3497 		}
3498 	}
3499 	return (0);
3500 }
3501 
3502 /*
3503  * This is the obsolete exported interface for a driver to find out its
3504  * "normal" (max) power.
3505  * We only get components destroyed while no power management is
3506  * going on (and the device is detached), so we don't need a mutex here
3507  */
3508 int
3509 pm_get_normal_power(dev_info_t *dip, int comp)
3510 {
3511 
3512 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3513 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3514 	}
3515 	return (DDI_FAILURE);
3516 }
3517 
3518 /*
3519  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3520  */
3521 int
3522 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3523 {
3524 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3525 		*levelp = PM_CURPOWER(dip, comp);
3526 		return (DDI_SUCCESS);
3527 	}
3528 	return (DDI_FAILURE);
3529 }
3530 
3531 /*
3532  * Returns current threshold of indicated component
3533  */
3534 static int
3535 cur_threshold(dev_info_t *dip, int comp)
3536 {
3537 	pm_component_t *cp = PM_CP(dip, comp);
3538 	int pwr;
3539 
3540 	if (PM_ISBC(dip)) {
3541 		/*
3542 		 * backwards compatible nodes only have one threshold
3543 		 */
3544 		return (cp->pmc_comp.pmc_thresh[1]);
3545 	}
3546 	pwr = cp->pmc_cur_pwr;
3547 	if (pwr == PM_LEVEL_UNKNOWN) {
3548 		int thresh;
3549 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3550 			thresh = pm_default_nexus_threshold;
3551 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3552 			thresh = pm_cpu_idle_threshold;
3553 		else
3554 			thresh = pm_system_idle_threshold;
3555 		return (thresh);
3556 	}
3557 	ASSERT(cp->pmc_comp.pmc_thresh);
3558 	return (cp->pmc_comp.pmc_thresh[pwr]);
3559 }
3560 
3561 /*
3562  * Compute next lower component power level given power index.
3563  */
3564 static int
3565 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3566 {
3567 	int nxt_pwr;
3568 
3569 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3570 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3571 	} else {
3572 		pwrndx--;
3573 		ASSERT(pwrndx >= 0);
3574 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3575 	}
3576 	return (nxt_pwr);
3577 }
3578 
3579 /*
3580  * Bring all components of device to normal power
3581  */
3582 int
3583 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3584 {
3585 	PMD_FUNC(pmf, "all_to_normal")
3586 	int		*normal;
3587 	int		i, ncomps, result;
3588 	size_t		size;
3589 	int		changefailed = 0;
3590 
3591 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3592 	ASSERT(PM_GET_PM_INFO(dip));
3593 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3594 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3595 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3596 		return (DDI_FAILURE);
3597 	}
3598 	ncomps = PM_NUMCMPTS(dip);
3599 	for (i = 0; i < ncomps; i++) {
3600 		if (pm_set_power(dip, i, normal[i],
3601 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3602 			changefailed++;
3603 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3604 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3605 			    PM_DEVICE(dip), i, normal[i], result))
3606 		}
3607 	}
3608 	kmem_free(normal, size);
3609 	if (changefailed) {
3610 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3611 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3612 		return (DDI_FAILURE);
3613 	}
3614 	return (DDI_SUCCESS);
3615 }
3616 
3617 /*
3618  * Returns true if all components of device are at normal power
3619  */
3620 int
3621 pm_all_at_normal(dev_info_t *dip)
3622 {
3623 	PMD_FUNC(pmf, "all_at_normal")
3624 	int		*normal;
3625 	int		i;
3626 	size_t		size;
3627 
3628 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3629 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3630 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3631 		return (DDI_FAILURE);
3632 	}
3633 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3634 		int current = PM_CURPOWER(dip, i);
3635 		if (normal[i] > current) {
3636 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3637 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3638 			    normal[i], current))
3639 			break;
3640 		}
3641 	}
3642 	kmem_free(normal, size);
3643 	if (i != PM_NUMCMPTS(dip)) {
3644 		return (0);
3645 	}
3646 	return (1);
3647 }
3648 
3649 static void
3650 bring_wekeeps_up(char *keeper)
3651 {
3652 	PMD_FUNC(pmf, "bring_wekeeps_up")
3653 	int i;
3654 	pm_pdr_t *dp;
3655 	pm_info_t *wku_info;
3656 	char *kept_path;
3657 	dev_info_t *kept;
3658 	static void bring_pmdep_up(dev_info_t *, int);
3659 
3660 	if (panicstr) {
3661 		return;
3662 	}
3663 	/*
3664 	 * We process the request even if the keeper detaches because
3665 	 * detach processing expects this to increment kidsupcnt of kept.
3666 	 */
3667 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3668 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3669 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3670 			continue;
3671 		for (i = 0; i < dp->pdr_kept_count; i++) {
3672 			kept_path = dp->pdr_kept_paths[i];
3673 			if (kept_path == NULL)
3674 				continue;
3675 			ASSERT(kept_path[0] != '\0');
3676 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3677 				continue;
3678 			wku_info = PM_GET_PM_INFO(kept);
3679 			if (wku_info == NULL) {
3680 				if (kept)
3681 					ddi_release_devi(kept);
3682 				continue;
3683 			}
3684 			/*
3685 			 * Don't mess with it if it is being detached, it isn't
3686 			 * safe to call its power entry point
3687 			 */
3688 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3689 				if (kept)
3690 					ddi_release_devi(kept);
3691 				continue;
3692 			}
3693 			bring_pmdep_up(kept, 1);
3694 			ddi_release_devi(kept);
3695 		}
3696 	}
3697 }
3698 
3699 /*
3700  * Bring up the 'kept' device passed as argument
3701  */
3702 static void
3703 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3704 {
3705 	PMD_FUNC(pmf, "bring_pmdep_up")
3706 	int is_all_at_normal = 0;
3707 
3708 	/*
3709 	 * If the kept device has been unmanaged, do nothing.
3710 	 */
3711 	if (!PM_GET_PM_INFO(kept_dip))
3712 		return;
3713 
3714 	/* Just ignore DIRECT PM device till they are released. */
3715 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3716 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3717 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3718 		    "controlling process did something else\n", pmf,
3719 		    PM_DEVICE(kept_dip)))
3720 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3721 		return;
3722 	}
3723 	/* if we got here the keeper had a transition from OFF->ON */
3724 	if (hold)
3725 		pm_hold_power(kept_dip);
3726 
3727 	if (!is_all_at_normal)
3728 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3729 }
3730 
3731 /*
3732  * A bunch of stuff that belongs only to the next routine (or two)
3733  */
3734 
3735 static const char namestr[] = "NAME=";
3736 static const int nameln = sizeof (namestr) - 1;
3737 static const char pmcompstr[] = "pm-components";
3738 
3739 struct pm_comp_pkg {
3740 	pm_comp_t		*comp;
3741 	struct pm_comp_pkg	*next;
3742 };
3743 
3744 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3745 
3746 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3747 			((ch) >= 'A' && (ch) <= 'F'))
3748 
3749 /*
3750  * Rather than duplicate this code ...
3751  * (this code excerpted from the function that follows it)
3752  */
3753 #define	FINISH_COMP { \
3754 	ASSERT(compp); \
3755 	compp->pmc_lnames_sz = size; \
3756 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3757 	compp->pmc_numlevels = level; \
3758 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3759 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3760 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3761 	/* copy string out of prop array into buffer */ \
3762 	for (j = 0; j < level; j++) { \
3763 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3764 		compp->pmc_lvals[j] = lvals[j]; \
3765 		(void) strcpy(tp, lnames[j]); \
3766 		compp->pmc_lnames[j] = tp; \
3767 		tp += lszs[j]; \
3768 	} \
3769 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3770 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3771 	}
3772 
3773 /*
3774  * Create (empty) component data structures.
3775  */
3776 static void
3777 e_pm_create_components(dev_info_t *dip, int num_components)
3778 {
3779 	struct pm_component *compp, *ocompp;
3780 	int i, size = 0;
3781 
3782 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3783 	ASSERT(!DEVI(dip)->devi_pm_components);
3784 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3785 	size = sizeof (struct pm_component) * num_components;
3786 
3787 	compp = kmem_zalloc(size, KM_SLEEP);
3788 	ocompp = compp;
3789 	DEVI(dip)->devi_pm_comp_size = size;
3790 	DEVI(dip)->devi_pm_num_components = num_components;
3791 	PM_LOCK_BUSY(dip);
3792 	for (i = 0; i < num_components;  i++) {
3793 		compp->pmc_timestamp = gethrestime_sec();
3794 		compp->pmc_norm_pwr = (uint_t)-1;
3795 		compp++;
3796 	}
3797 	PM_UNLOCK_BUSY(dip);
3798 	DEVI(dip)->devi_pm_components = ocompp;
3799 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3800 }
3801 
3802 /*
3803  * Parse hex or decimal value from char string
3804  */
3805 static char *
3806 pm_parsenum(char *cp, int *valp)
3807 {
3808 	int ch, offset;
3809 	char numbuf[256];
3810 	char *np = numbuf;
3811 	int value = 0;
3812 
3813 	ch = *cp++;
3814 	if (isdigit(ch)) {
3815 		if (ch == '0') {
3816 			if ((ch = *cp++) == 'x' || ch == 'X') {
3817 				ch = *cp++;
3818 				while (isxdigit(ch)) {
3819 					*np++ = (char)ch;
3820 					ch = *cp++;
3821 				}
3822 				*np = 0;
3823 				cp--;
3824 				goto hexval;
3825 			} else {
3826 				goto digit;
3827 			}
3828 		} else {
3829 digit:
3830 			while (isdigit(ch)) {
3831 				*np++ = (char)ch;
3832 				ch = *cp++;
3833 			}
3834 			*np = 0;
3835 			cp--;
3836 			goto decval;
3837 		}
3838 	} else
3839 		return (NULL);
3840 
3841 hexval:
3842 	for (np = numbuf; *np; np++) {
3843 		if (*np >= 'a' && *np <= 'f')
3844 			offset = 'a' - 10;
3845 		else if (*np >= 'A' && *np <= 'F')
3846 			offset = 'A' - 10;
3847 		else if (*np >= '0' && *np <= '9')
3848 			offset = '0';
3849 		value *= 16;
3850 		value += *np - offset;
3851 	}
3852 	*valp = value;
3853 	return (cp);
3854 
3855 decval:
3856 	offset = '0';
3857 	for (np = numbuf; *np; np++) {
3858 		value *= 10;
3859 		value += *np - offset;
3860 	}
3861 	*valp = value;
3862 	return (cp);
3863 }
3864 
3865 /*
3866  * Set max (previously documented as "normal") power.
3867  */
3868 static void
3869 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3870 {
3871 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3872 }
3873 
3874 /*
3875  * Internal routine for destroying components
3876  * It is called even when there might not be any, so it must be forgiving.
3877  */
3878 static void
3879 e_pm_destroy_components(dev_info_t *dip)
3880 {
3881 	int i;
3882 	struct pm_component *cp;
3883 
3884 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3885 	if (PM_NUMCMPTS(dip) == 0)
3886 		return;
3887 	cp = DEVI(dip)->devi_pm_components;
3888 	ASSERT(cp);
3889 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3890 		int nlevels = cp->pmc_comp.pmc_numlevels;
3891 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3892 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3893 		/*
3894 		 * For BC nodes, the rest is static in bc_comp, so skip it
3895 		 */
3896 		if (PM_ISBC(dip))
3897 			continue;
3898 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
3899 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
3900 		kmem_free(cp->pmc_comp.pmc_lname_buf,
3901 				cp->pmc_comp.pmc_lnames_sz);
3902 	}
3903 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
3904 	DEVI(dip)->devi_pm_components = NULL;
3905 	DEVI(dip)->devi_pm_num_components = 0;
3906 	DEVI(dip)->devi_pm_flags &=
3907 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
3908 }
3909 
3910 /*
3911  * Read the pm-components property (if there is one) and use it to set up
3912  * components.  Returns a pointer to an array of component structures if
3913  * pm-components found and successfully parsed, else returns NULL.
3914  * Sets error return *errp to true to indicate a failure (as opposed to no
3915  * property being present).
3916  */
3917 pm_comp_t *
3918 pm_autoconfig(dev_info_t *dip, int *errp)
3919 {
3920 	PMD_FUNC(pmf, "autoconfig")
3921 	uint_t nelems;
3922 	char **pp;
3923 	pm_comp_t *compp = NULL;
3924 	int i, j, level, components = 0;
3925 	size_t size = 0;
3926 	struct pm_comp_pkg *p, *ptail;
3927 	struct pm_comp_pkg *phead = NULL;
3928 	int *lvals = NULL;
3929 	int *lszs = NULL;
3930 	int *np = NULL;
3931 	int npi = 0;
3932 	char **lnames = NULL;
3933 	char *cp, *tp;
3934 	pm_comp_t *ret = NULL;
3935 
3936 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3937 	*errp = 0;	/* assume success */
3938 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3939 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
3940 		return (NULL);
3941 	}
3942 
3943 	if (nelems < 3) {	/* need at least one name and two levels */
3944 		goto errout;
3945 	}
3946 
3947 	/*
3948 	 * pm_create_components is no longer allowed
3949 	 */
3950 	if (PM_NUMCMPTS(dip) != 0) {
3951 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
3952 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
3953 		goto errout;
3954 	}
3955 
3956 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3957 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3958 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
3959 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3960 
3961 	level = 0;
3962 	phead = NULL;
3963 	for (i = 0; i < nelems; i++) {
3964 		cp = pp[i];
3965 		if (!isdigit(*cp)) {	/*  must be name */
3966 			if (strncmp(cp, namestr, nameln) != 0) {
3967 				goto errout;
3968 			}
3969 			if (i != 0) {
3970 				if (level == 0) {	/* no level spec'd */
3971 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
3972 					    pmf))
3973 					goto errout;
3974 				}
3975 				np[npi++] = lvals[level - 1];
3976 				/* finish up previous component levels */
3977 				FINISH_COMP;
3978 			}
3979 			cp += nameln;
3980 			if (!*cp) {
3981 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
3982 				goto errout;
3983 			}
3984 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
3985 			if (phead == NULL) {
3986 				phead = ptail = p;
3987 			} else {
3988 				ptail->next = p;
3989 				ptail = p;
3990 			}
3991 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
3992 			    KM_SLEEP);
3993 			compp->pmc_name_sz = strlen(cp) + 1;
3994 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
3995 			    KM_SLEEP);
3996 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
3997 			components++;
3998 			level = 0;
3999 		} else {	/* better be power level <num>=<name> */
4000 #ifdef DEBUG
4001 			tp = cp;
4002 #endif
4003 			if (i == 0 ||
4004 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4005 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4006 				goto errout;
4007 			}
4008 #ifdef DEBUG
4009 			tp = cp;
4010 #endif
4011 			if (*cp++ != '=' || !*cp) {
4012 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4013 				goto errout;
4014 			}
4015 
4016 			lszs[level] = strlen(cp) + 1;
4017 			size += lszs[level];
4018 			lnames[level] = cp;	/* points into prop string */
4019 			level++;
4020 		}
4021 	}
4022 	np[npi++] = lvals[level - 1];
4023 	if (level == 0) {	/* ended with a name */
4024 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4025 		goto errout;
4026 	}
4027 	FINISH_COMP;
4028 
4029 
4030 	/*
4031 	 * Now we have a list of components--we have to return instead an
4032 	 * array of them, but we can just copy the top level and leave
4033 	 * the rest as is
4034 	 */
4035 	(void) e_pm_create_components(dip, components);
4036 	for (i = 0; i < components; i++)
4037 		e_pm_set_max_power(dip, i, np[i]);
4038 
4039 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4040 	for (i = 0, p = phead; i < components; i++) {
4041 		ASSERT(p);
4042 		/*
4043 		 * Now sanity-check values:  levels must be monotonically
4044 		 * increasing
4045 		 */
4046 		if (p->comp->pmc_numlevels < 2) {
4047 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4048 			    "levels\n", pmf,
4049 			    p->comp->pmc_name, PM_DEVICE(dip),
4050 			    p->comp->pmc_numlevels))
4051 			goto errout;
4052 		}
4053 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4054 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4055 			    (p->comp->pmc_lvals[j] <=
4056 			    p->comp->pmc_lvals[j - 1]))) {
4057 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4058 				    "not mono. incr, %d follows %d\n", pmf,
4059 				    p->comp->pmc_name, PM_DEVICE(dip),
4060 				    p->comp->pmc_lvals[j],
4061 				    p->comp->pmc_lvals[j - 1]))
4062 				goto errout;
4063 			}
4064 		}
4065 		ret[i] = *p->comp;	/* struct assignment */
4066 		for (j = 0; j < i; j++) {
4067 			/*
4068 			 * Test for unique component names
4069 			 */
4070 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4071 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4072 				    "unique\n", pmf, ret[j].pmc_name,
4073 				    PM_DEVICE(dip)))
4074 				goto errout;
4075 			}
4076 		}
4077 		ptail = p;
4078 		p = p->next;
4079 		phead = p;	/* errout depends on phead making sense */
4080 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4081 		kmem_free(ptail, sizeof (*ptail));
4082 	}
4083 out:
4084 	ddi_prop_free(pp);
4085 	if (lvals)
4086 		kmem_free(lvals, nelems * sizeof (int));
4087 	if (lszs)
4088 		kmem_free(lszs, nelems * sizeof (int));
4089 	if (lnames)
4090 		kmem_free(lnames, nelems * sizeof (char *));
4091 	if (np)
4092 		kmem_free(np, nelems * sizeof (int));
4093 	return (ret);
4094 
4095 errout:
4096 	e_pm_destroy_components(dip);
4097 	*errp = 1;	/* signal failure */
4098 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4099 	for (i = 0; i < nelems - 1; i++)
4100 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4101 	if (nelems != 0)
4102 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4103 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4104 	for (p = phead; p; ) {
4105 		pm_comp_t *pp;
4106 		int n;
4107 
4108 		ptail = p;
4109 		/*
4110 		 * Free component data structures
4111 		 */
4112 		pp = p->comp;
4113 		n = pp->pmc_numlevels;
4114 		if (pp->pmc_name_sz) {
4115 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4116 		}
4117 		if (pp->pmc_lnames_sz) {
4118 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4119 		}
4120 		if (pp->pmc_lnames) {
4121 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4122 		}
4123 		if (pp->pmc_thresh) {
4124 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4125 		}
4126 		if (pp->pmc_lvals) {
4127 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4128 		}
4129 		p = ptail->next;
4130 		kmem_free(ptail, sizeof (*ptail));
4131 	}
4132 	if (ret != NULL)
4133 		kmem_free(ret, components * sizeof (pm_comp_t));
4134 	ret = NULL;
4135 	goto out;
4136 }
4137 
4138 /*
4139  * Set threshold values for a devices components by dividing the target
4140  * threshold (base) by the number of transitions and assign each transition
4141  * that threshold.  This will get the entire device down in the target time if
4142  * all components are idle and even if there are dependencies among components.
4143  *
4144  * Devices may well get powered all the way down before the target time, but
4145  * at least the EPA will be happy.
4146  */
4147 void
4148 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4149 {
4150 	PMD_FUNC(pmf, "set_device_threshold")
4151 	int target_threshold = (base * 95) / 100;
4152 	int level, comp;		/* loop counters */
4153 	int transitions = 0;
4154 	int ncomp = PM_NUMCMPTS(dip);
4155 	int thresh;
4156 	int remainder;
4157 	pm_comp_t *pmc;
4158 	int i, circ;
4159 
4160 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4161 	PM_LOCK_DIP(dip);
4162 	/*
4163 	 * First we handle the easy one.  If we're setting the default
4164 	 * threshold for a node with children, then we set it to the
4165 	 * default nexus threshold (currently 0) and mark it as default
4166 	 * nexus threshold instead
4167 	 */
4168 	if (PM_IS_NEXUS(dip)) {
4169 		if (flag == PMC_DEF_THRESH) {
4170 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4171 			    PM_DEVICE(dip)))
4172 			thresh = pm_default_nexus_threshold;
4173 			for (comp = 0; comp < ncomp; comp++) {
4174 				pmc = &PM_CP(dip, comp)->pmc_comp;
4175 				for (level = 1; level < pmc->pmc_numlevels;
4176 				    level++) {
4177 					pmc->pmc_thresh[level] = thresh;
4178 				}
4179 			}
4180 			DEVI(dip)->devi_pm_dev_thresh =
4181 			    pm_default_nexus_threshold;
4182 			/*
4183 			 * If the nexus node is being reconfigured back to
4184 			 * the default threshold, adjust the notlowest count.
4185 			 */
4186 			if (DEVI(dip)->devi_pm_flags &
4187 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4188 				PM_LOCK_POWER(dip, &circ);
4189 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4190 					if (PM_CURPOWER(dip, i) == 0)
4191 						continue;
4192 					mutex_enter(&pm_compcnt_lock);
4193 					ASSERT(pm_comps_notlowest);
4194 					pm_comps_notlowest--;
4195 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4196 					    "notlowest to %d\n", pmf,
4197 					    PM_DEVICE(dip), pm_comps_notlowest))
4198 					if (pm_comps_notlowest == 0)
4199 						pm_ppm_notify_all_lowest(dip,
4200 						    PM_ALL_LOWEST);
4201 					mutex_exit(&pm_compcnt_lock);
4202 				}
4203 				PM_UNLOCK_POWER(dip, circ);
4204 			}
4205 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4206 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4207 			PM_UNLOCK_DIP(dip);
4208 			return;
4209 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4210 			/*
4211 			 * If the nexus node is being configured for a
4212 			 * non-default threshold, include that node in
4213 			 * the notlowest accounting.
4214 			 */
4215 			PM_LOCK_POWER(dip, &circ);
4216 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4217 				if (PM_CURPOWER(dip, i) == 0)
4218 					continue;
4219 				mutex_enter(&pm_compcnt_lock);
4220 				if (pm_comps_notlowest == 0)
4221 					pm_ppm_notify_all_lowest(dip,
4222 					    PM_NOT_ALL_LOWEST);
4223 				pm_comps_notlowest++;
4224 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4225 				    "notlowest to %d\n", pmf,
4226 				    PM_DEVICE(dip), pm_comps_notlowest))
4227 				mutex_exit(&pm_compcnt_lock);
4228 			}
4229 			PM_UNLOCK_POWER(dip, circ);
4230 		}
4231 	}
4232 	/*
4233 	 * Compute the total number of transitions for all components
4234 	 * of the device.  Distribute the threshold evenly over them
4235 	 */
4236 	for (comp = 0; comp < ncomp; comp++) {
4237 		pmc = &PM_CP(dip, comp)->pmc_comp;
4238 		ASSERT(pmc->pmc_numlevels > 1);
4239 		transitions += pmc->pmc_numlevels - 1;
4240 	}
4241 	ASSERT(transitions);
4242 	thresh = target_threshold / transitions;
4243 
4244 	for (comp = 0; comp < ncomp; comp++) {
4245 		pmc = &PM_CP(dip, comp)->pmc_comp;
4246 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4247 			pmc->pmc_thresh[level] = thresh;
4248 		}
4249 	}
4250 
4251 #ifdef DEBUG
4252 	for (comp = 0; comp < ncomp; comp++) {
4253 		pmc = &PM_CP(dip, comp)->pmc_comp;
4254 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4255 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4256 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4257 			    comp, level, pmc->pmc_thresh[level]))
4258 		}
4259 	}
4260 #endif
4261 	/*
4262 	 * Distribute any remainder till they are all gone
4263 	 */
4264 	remainder = target_threshold - thresh * transitions;
4265 	level = 1;
4266 #ifdef DEBUG
4267 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4268 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4269 	    transitions))
4270 #endif
4271 	while (remainder > 0) {
4272 		comp = 0;
4273 		while (remainder && (comp < ncomp)) {
4274 			pmc = &PM_CP(dip, comp)->pmc_comp;
4275 			if (level < pmc->pmc_numlevels) {
4276 				pmc->pmc_thresh[level] += 1;
4277 				remainder--;
4278 			}
4279 			comp++;
4280 		}
4281 		level++;
4282 	}
4283 #ifdef DEBUG
4284 	for (comp = 0; comp < ncomp; comp++) {
4285 		pmc = &PM_CP(dip, comp)->pmc_comp;
4286 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4287 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4288 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4289 			    comp, level, pmc->pmc_thresh[level]))
4290 		}
4291 	}
4292 #endif
4293 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4294 	DEVI(dip)->devi_pm_dev_thresh = base;
4295 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4296 	DEVI(dip)->devi_pm_flags |= flag;
4297 	PM_UNLOCK_DIP(dip);
4298 }
4299 
4300 /*
4301  * Called when there is no old-style platform power management driver
4302  */
4303 static int
4304 ddi_no_platform_power(power_req_t *req)
4305 {
4306 	_NOTE(ARGUNUSED(req))
4307 	return (DDI_FAILURE);
4308 }
4309 
4310 /*
4311  * This function calls the entry point supplied by the platform-specific
4312  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4313  * The use of global for getting the  function name from platform-specific
4314  * pm driver is not ideal, but it is simple and efficient.
4315  * The previous property lookup was being done in the idle loop on swift
4316  * systems without pmc chips and hurt deskbench performance as well as
4317  * violating scheduler locking rules
4318  */
4319 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4320 
4321 /*
4322  * Old obsolete interface for a device to request a power change (but only
4323  * an increase in power)
4324  */
4325 int
4326 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4327 {
4328 	return (pm_raise_power(dip, cmpt, level));
4329 }
4330 
4331 /*
4332  * The old obsolete interface to platform power management.  Only used by
4333  * Gypsy platform and APM on X86.
4334  */
4335 int
4336 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4337 {
4338 	power_req_t	request;
4339 
4340 	request.request_type = PMR_SET_POWER;
4341 	request.req.set_power_req.who = dip;
4342 	request.req.set_power_req.cmpt = pm_cmpt;
4343 	request.req.set_power_req.level = pm_level;
4344 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4345 }
4346 
4347 /*
4348  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4349  * passed.  Returns true if subsequent processing could result in power being
4350  * removed from the device.  The arg is not currently used because it is
4351  * implicit in the operation of cpr/DR.
4352  */
4353 int
4354 ddi_removing_power(dev_info_t *dip)
4355 {
4356 	_NOTE(ARGUNUSED(dip))
4357 	return (pm_powering_down);
4358 }
4359 
4360 /*
4361  * Returns true if a device indicates that its parent handles suspend/resume
4362  * processing for it.
4363  */
4364 int
4365 e_ddi_parental_suspend_resume(dev_info_t *dip)
4366 {
4367 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4368 }
4369 
4370 /*
4371  * Called for devices which indicate that their parent does suspend/resume
4372  * handling for them
4373  */
4374 int
4375 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4376 {
4377 	power_req_t	request;
4378 	request.request_type = PMR_SUSPEND;
4379 	request.req.suspend_req.who = dip;
4380 	request.req.suspend_req.cmd = cmd;
4381 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4382 }
4383 
4384 /*
4385  * Called for devices which indicate that their parent does suspend/resume
4386  * handling for them
4387  */
4388 int
4389 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4390 {
4391 	power_req_t	request;
4392 	request.request_type = PMR_RESUME;
4393 	request.req.resume_req.who = dip;
4394 	request.req.resume_req.cmd = cmd;
4395 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4396 }
4397 
4398 /*
4399  * Old obsolete exported interface for drivers to create components.
4400  * This is now handled by exporting the pm-components property.
4401  */
4402 int
4403 pm_create_components(dev_info_t *dip, int num_components)
4404 {
4405 	PMD_FUNC(pmf, "pm_create_components")
4406 
4407 	if (num_components < 1)
4408 		return (DDI_FAILURE);
4409 
4410 	if (!DEVI_IS_ATTACHING(dip)) {
4411 		return (DDI_FAILURE);
4412 	}
4413 
4414 	/* don't need to lock dip because attach is single threaded */
4415 	if (DEVI(dip)->devi_pm_components) {
4416 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4417 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4418 		return (DDI_FAILURE);
4419 	}
4420 	e_pm_create_components(dip, num_components);
4421 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4422 	e_pm_default_components(dip, num_components);
4423 	return (DDI_SUCCESS);
4424 }
4425 
4426 /*
4427  * Obsolete interface previously called by drivers to destroy their components
4428  * at detach time.  This is now done automatically.  However, we need to keep
4429  * this for the old drivers.
4430  */
4431 void
4432 pm_destroy_components(dev_info_t *dip)
4433 {
4434 	PMD_FUNC(pmf, "pm_destroy_components")
4435 	dev_info_t *pdip = ddi_get_parent(dip);
4436 
4437 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4438 	    PM_DEVICE(dip)))
4439 	ASSERT(DEVI_IS_DETACHING(dip));
4440 #ifdef DEBUG
4441 	if (!PM_ISBC(dip))
4442 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4443 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4444 		    PM_ADDR(dip));
4445 #endif
4446 	/*
4447 	 * We ignore this unless this is an old-style driver, except for
4448 	 * printing the message above
4449 	 */
4450 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4451 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4452 		    PM_DEVICE(dip)))
4453 		return;
4454 	}
4455 	ASSERT(PM_GET_PM_INFO(dip));
4456 
4457 	/*
4458 	 * pm_unmanage will clear info pointer later, after dealing with
4459 	 * dependencies
4460 	 */
4461 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4462 	/*
4463 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4464 	 * Parents that get notification are not adjusted because their
4465 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4466 	 */
4467 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4468 		pm_rele_power(pdip);
4469 #ifdef DEBUG
4470 	else {
4471 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4472 		    pmf, PM_DEVICE(dip)))
4473 	}
4474 #endif
4475 	e_pm_destroy_components(dip);
4476 	/*
4477 	 * Forget we ever knew anything about the components of this  device
4478 	 */
4479 	DEVI(dip)->devi_pm_flags &=
4480 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4481 }
4482 
4483 /*
4484  * Exported interface for a driver to set a component busy.
4485  */
4486 int
4487 pm_busy_component(dev_info_t *dip, int cmpt)
4488 {
4489 	struct pm_component *cp;
4490 
4491 	ASSERT(dip != NULL);
4492 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4493 		return (DDI_FAILURE);
4494 	PM_LOCK_BUSY(dip);
4495 	cp->pmc_busycount++;
4496 	cp->pmc_timestamp = 0;
4497 	PM_UNLOCK_BUSY(dip);
4498 	return (DDI_SUCCESS);
4499 }
4500 
4501 /*
4502  * Exported interface for a driver to set a component idle.
4503  */
4504 int
4505 pm_idle_component(dev_info_t *dip, int cmpt)
4506 {
4507 	PMD_FUNC(pmf, "pm_idle_component")
4508 	struct pm_component *cp;
4509 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4510 
4511 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4512 		return (DDI_FAILURE);
4513 
4514 	PM_LOCK_BUSY(dip);
4515 	if (cp->pmc_busycount) {
4516 		if (--(cp->pmc_busycount) == 0)
4517 			cp->pmc_timestamp = gethrestime_sec();
4518 	} else {
4519 		cp->pmc_timestamp = gethrestime_sec();
4520 	}
4521 
4522 	PM_UNLOCK_BUSY(dip);
4523 
4524 	/*
4525 	 * if device becomes idle during idle down period, try scan it down
4526 	 */
4527 	if (scanp && PM_IS_PID(dip)) {
4528 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4529 		    PM_DEVICE(dip)))
4530 		pm_rescan(dip);
4531 		return (DDI_SUCCESS);
4532 	}
4533 
4534 	/*
4535 	 * handle scan not running with nexus threshold == 0
4536 	 */
4537 
4538 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4539 		pm_rescan(dip);
4540 	}
4541 
4542 	return (DDI_SUCCESS);
4543 }
4544 
4545 /*
4546  * This is the old  obsolete interface called by drivers to set their normal
4547  * power.  Thus we can't fix its behavior or return a value.
4548  * This functionality is replaced by the pm-component property.
4549  * We'll only get components destroyed while no power management is
4550  * going on (and the device is detached), so we don't need a mutex here
4551  */
4552 void
4553 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4554 {
4555 	PMD_FUNC(pmf, "set_normal_power")
4556 #ifdef DEBUG
4557 	if (!PM_ISBC(dip))
4558 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4559 		    "(driver exporting pm-components property) ignored",
4560 		    PM_NAME(dip), PM_ADDR(dip));
4561 #endif
4562 	if (PM_ISBC(dip)) {
4563 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4564 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4565 		e_pm_set_max_power(dip, comp, level);
4566 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4567 	}
4568 }
4569 
4570 /*
4571  * Called on a successfully detached driver to free pm resources
4572  */
4573 static void
4574 pm_stop(dev_info_t *dip)
4575 {
4576 	PMD_FUNC(pmf, "stop")
4577 	dev_info_t *pdip = ddi_get_parent(dip);
4578 
4579 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4580 	/* stopping scan, destroy scan data structure */
4581 	if (!PM_ISBC(dip)) {
4582 		pm_scan_stop(dip);
4583 		pm_scan_fini(dip);
4584 	}
4585 
4586 	if (PM_GET_PM_INFO(dip) != NULL) {
4587 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4588 			/*
4589 			 * Old style driver may have called
4590 			 * pm_destroy_components already, but just in case ...
4591 			 */
4592 			e_pm_destroy_components(dip);
4593 		} else {
4594 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4595 			    pmf, PM_DEVICE(dip)))
4596 		}
4597 	} else {
4598 		if (PM_NUMCMPTS(dip))
4599 			e_pm_destroy_components(dip);
4600 		else {
4601 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4602 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4603 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4604 					pm_rele_power(pdip);
4605 				} else if (pdip &&
4606 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4607 					(void) mdi_power(pdip,
4608 					    MDI_PM_RELE_POWER,
4609 					    (void *)dip, NULL, 0);
4610 				}
4611 			}
4612 		}
4613 	}
4614 }
4615 
4616 /*
4617  * The node is the subject of a reparse pm props ioctl. Throw away the old
4618  * info and start over.
4619  */
4620 int
4621 e_new_pm_props(dev_info_t *dip)
4622 {
4623 	if (PM_GET_PM_INFO(dip) != NULL) {
4624 		pm_stop(dip);
4625 
4626 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4627 			return (DDI_FAILURE);
4628 		}
4629 	}
4630 	e_pm_props(dip);
4631 	return (DDI_SUCCESS);
4632 }
4633 
4634 /*
4635  * Device has been attached, so process its pm properties
4636  */
4637 void
4638 e_pm_props(dev_info_t *dip)
4639 {
4640 	char *pp;
4641 	int len;
4642 	int flags = 0;
4643 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4644 
4645 	/*
4646 	 * It doesn't matter if we do this more than once, we should always
4647 	 * get the same answers, and if not, then the last one in is the
4648 	 * best one.
4649 	 */
4650 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4651 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4652 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4653 			flags = PMC_NEEDS_SR;
4654 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4655 			flags = PMC_NO_SR;
4656 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4657 			flags = PMC_PARENTAL_SR;
4658 		} else {
4659 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4660 			    "%s property value '%s'", PM_NAME(dip),
4661 			    PM_ADDR(dip), "pm-hardware-state", pp);
4662 		}
4663 		kmem_free(pp, len);
4664 	}
4665 	/*
4666 	 * This next segment (PMC_WANTS_NOTIFY) is in
4667 	 * support of nexus drivers which will want to be involved in
4668 	 * (or at least notified of) their child node's power level transitions.
4669 	 * "pm-want-child-notification?" is defined by the parent.
4670 	 */
4671 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4672 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4673 		flags |= PMC_WANTS_NOTIFY;
4674 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4675 	    dip, propflag, "pm-want-child-notification?"));
4676 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4677 	    "no-involuntary-power-cycles"))
4678 		flags |= PMC_NO_INVOL;
4679 	/*
4680 	 * Is the device a CPU device?
4681 	 */
4682 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4683 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4684 		if (strcmp(pp, "CPU") == 0) {
4685 			flags |= PMC_CPU_DEVICE;
4686 		} else {
4687 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4688 			    "%s property value '%s'", PM_NAME(dip),
4689 			PM_ADDR(dip), "pm-class", pp);
4690 		}
4691 		kmem_free(pp, len);
4692 	}
4693 	/* devfs single threads us */
4694 	DEVI(dip)->devi_pm_flags |= flags;
4695 }
4696 
4697 /*
4698  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4699  * driver which has claimed a node.
4700  * Sets old_power in arg struct.
4701  */
4702 static int
4703 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4704     ddi_ctl_enum_t ctlop, void *arg, void *result)
4705 {
4706 	_NOTE(ARGUNUSED(dip))
4707 	PMD_FUNC(pmf, "ctlops")
4708 	power_req_t *reqp = (power_req_t *)arg;
4709 	int retval;
4710 	dev_info_t *target_dip;
4711 	int new_level, old_level, cmpt;
4712 #ifdef PMDDEBUG
4713 	char *format;
4714 #endif
4715 
4716 	/*
4717 	 * The interface for doing the actual power level changes is now
4718 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4719 	 * different platform-specific power control drivers.
4720 	 *
4721 	 * This driver implements the "default" version of this interface.
4722 	 * If no ppm driver has been installed then this interface is called
4723 	 * instead.
4724 	 */
4725 	ASSERT(dip == NULL);
4726 	switch (ctlop) {
4727 	case DDI_CTLOPS_POWER:
4728 		switch (reqp->request_type) {
4729 		case PMR_PPM_SET_POWER:
4730 		{
4731 			target_dip = reqp->req.ppm_set_power_req.who;
4732 			ASSERT(target_dip == rdip);
4733 			new_level = reqp->req.ppm_set_power_req.new_level;
4734 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4735 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4736 			old_level = PM_CURPOWER(target_dip, cmpt);
4737 			reqp->req.ppm_set_power_req.old_level = old_level;
4738 			retval = pm_power(target_dip, cmpt, new_level);
4739 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4740 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4741 			    old_level, new_level, (retval == DDI_SUCCESS ?
4742 			    "chd" : "no chg")))
4743 			return (retval);
4744 		}
4745 
4746 		case PMR_PPM_PRE_DETACH:
4747 		case PMR_PPM_POST_DETACH:
4748 		case PMR_PPM_PRE_ATTACH:
4749 		case PMR_PPM_POST_ATTACH:
4750 		case PMR_PPM_PRE_PROBE:
4751 		case PMR_PPM_POST_PROBE:
4752 		case PMR_PPM_PRE_RESUME:
4753 		case PMR_PPM_INIT_CHILD:
4754 		case PMR_PPM_UNINIT_CHILD:
4755 #ifdef PMDDEBUG
4756 			switch (reqp->request_type) {
4757 				case PMR_PPM_PRE_DETACH:
4758 					format = "%s: PMR_PPM_PRE_DETACH "
4759 					    "%s@%s(%s#%d)\n";
4760 					break;
4761 				case PMR_PPM_POST_DETACH:
4762 					format = "%s: PMR_PPM_POST_DETACH "
4763 					    "%s@%s(%s#%d) rets %d\n";
4764 					break;
4765 				case PMR_PPM_PRE_ATTACH:
4766 					format = "%s: PMR_PPM_PRE_ATTACH "
4767 					    "%s@%s(%s#%d)\n";
4768 					break;
4769 				case PMR_PPM_POST_ATTACH:
4770 					format = "%s: PMR_PPM_POST_ATTACH "
4771 					    "%s@%s(%s#%d) rets %d\n";
4772 					break;
4773 				case PMR_PPM_PRE_PROBE:
4774 					format = "%s: PMR_PPM_PRE_PROBE "
4775 					    "%s@%s(%s#%d)\n";
4776 					break;
4777 				case PMR_PPM_POST_PROBE:
4778 					format = "%s: PMR_PPM_POST_PROBE "
4779 					    "%s@%s(%s#%d) rets %d\n";
4780 					break;
4781 				case PMR_PPM_PRE_RESUME:
4782 					format = "%s: PMR_PPM_PRE_RESUME "
4783 					    "%s@%s(%s#%d) rets %d\n";
4784 					break;
4785 				case PMR_PPM_INIT_CHILD:
4786 					format = "%s: PMR_PPM_INIT_CHILD "
4787 					    "%s@%s(%s#%d)\n";
4788 					break;
4789 				case PMR_PPM_UNINIT_CHILD:
4790 					format = "%s: PMR_PPM_UNINIT_CHILD "
4791 					    "%s@%s(%s#%d)\n";
4792 					break;
4793 				default:
4794 					break;
4795 			}
4796 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4797 			    reqp->req.ppm_config_req.result))
4798 #endif
4799 			return (DDI_SUCCESS);
4800 
4801 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4802 			/*
4803 			 * Nothing for us to do
4804 			 */
4805 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4806 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4807 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4808 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4809 			    reqp->req.ppm_notify_level_req.cmpt,
4810 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4811 			    reqp->req.ppm_notify_level_req.cmpt),
4812 			    reqp->req.ppm_notify_level_req.new_level))
4813 			return (DDI_SUCCESS);
4814 
4815 		case PMR_PPM_UNMANAGE:
4816 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4817 			    pmf, PM_DEVICE(rdip)))
4818 			return (DDI_SUCCESS);
4819 
4820 		case PMR_PPM_LOCK_POWER:
4821 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4822 			    reqp->req.ppm_lock_power_req.circp);
4823 			return (DDI_SUCCESS);
4824 
4825 		case PMR_PPM_UNLOCK_POWER:
4826 			pm_unlock_power_single(
4827 			    reqp->req.ppm_unlock_power_req.who,
4828 			    reqp->req.ppm_unlock_power_req.circ);
4829 			return (DDI_SUCCESS);
4830 
4831 		case PMR_PPM_TRY_LOCK_POWER:
4832 			*(int *)result = pm_try_locking_power_single(
4833 			    reqp->req.ppm_lock_power_req.who,
4834 			    reqp->req.ppm_lock_power_req.circp);
4835 			return (DDI_SUCCESS);
4836 
4837 		case PMR_PPM_POWER_LOCK_OWNER:
4838 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4839 			ASSERT(target_dip == rdip);
4840 			reqp->req.ppm_power_lock_owner_req.owner =
4841 			    DEVI(rdip)->devi_busy_thread;
4842 			return (DDI_SUCCESS);
4843 		default:
4844 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4845 			return (DDI_FAILURE);
4846 		}
4847 
4848 	default:
4849 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4850 		return (DDI_FAILURE);
4851 	}
4852 }
4853 
4854 /*
4855  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4856  * power_ops struct for this functionality instead?
4857  * However, we only ever do this on a ppm driver.
4858  */
4859 int
4860 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4861 {
4862 	int (*fp)();
4863 
4864 	/* if no ppm handler, call the default routine */
4865 	if (d == NULL) {
4866 		return (pm_default_ctlops(d, r, op, a, v));
4867 	}
4868 	if (!d || !r)
4869 		return (DDI_FAILURE);
4870 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4871 		DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4872 
4873 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4874 	return ((*fp)(d, r, op, a, v));
4875 }
4876 
4877 /*
4878  * Called on a node when attach completes or the driver makes its first pm
4879  * call (whichever comes first).
4880  * In the attach case, device may not be power manageable at all.
4881  * Don't need to lock the dip because we're single threaded by the devfs code
4882  */
4883 static int
4884 pm_start(dev_info_t *dip)
4885 {
4886 	PMD_FUNC(pmf, "start")
4887 	int ret;
4888 	dev_info_t *pdip = ddi_get_parent(dip);
4889 	int e_pm_manage(dev_info_t *, int);
4890 	void pm_noinvol_specd(dev_info_t *dip);
4891 
4892 	e_pm_props(dip);
4893 	pm_noinvol_specd(dip);
4894 	/*
4895 	 * If this dip has already been processed, don't mess with it
4896 	 * (but decrement the speculative count we did above, as whatever
4897 	 * code put it under pm already will have dealt with it)
4898 	 */
4899 	if (PM_GET_PM_INFO(dip)) {
4900 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
4901 		    pmf, PM_DEVICE(dip)))
4902 		return (0);
4903 	}
4904 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
4905 
4906 	if (PM_GET_PM_INFO(dip) == NULL) {
4907 		/*
4908 		 * keep the kidsupcount increment as is
4909 		 */
4910 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
4911 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4912 			pm_hold_power(pdip);
4913 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4914 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
4915 			    (void *)dip, NULL, 0);
4916 		}
4917 
4918 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
4919 		    "left up\n", pmf, PM_DEVICE(dip)))
4920 	}
4921 
4922 	return (ret);
4923 }
4924 
4925 /*
4926  * Keep a list of recorded thresholds.  For now we just keep a list and
4927  * search it linearly.  We don't expect too many entries.  Can always hash it
4928  * later if we need to.
4929  */
4930 void
4931 pm_record_thresh(pm_thresh_rec_t *rp)
4932 {
4933 	pm_thresh_rec_t *pptr, *ptr;
4934 
4935 	ASSERT(*rp->ptr_physpath);
4936 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
4937 	for (pptr = NULL, ptr = pm_thresh_head;
4938 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
4939 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
4940 			/* replace this one */
4941 			rp->ptr_next = ptr->ptr_next;
4942 			if (pptr) {
4943 				pptr->ptr_next = rp;
4944 			} else {
4945 				pm_thresh_head = rp;
4946 			}
4947 			rw_exit(&pm_thresh_rwlock);
4948 			kmem_free(ptr, ptr->ptr_size);
4949 			return;
4950 		}
4951 		continue;
4952 	}
4953 	/*
4954 	 * There was not a match in the list, insert this one in front
4955 	 */
4956 	if (pm_thresh_head) {
4957 		rp->ptr_next = pm_thresh_head;
4958 		pm_thresh_head = rp;
4959 	} else {
4960 		rp->ptr_next = NULL;
4961 		pm_thresh_head = rp;
4962 	}
4963 	rw_exit(&pm_thresh_rwlock);
4964 }
4965 
4966 /*
4967  * Create a new dependency record and hang a new dependency entry off of it
4968  */
4969 pm_pdr_t *
4970 newpdr(char *kept, char *keeps, int isprop)
4971 {
4972 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
4973 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
4974 	p->pdr_size = size;
4975 	p->pdr_isprop = isprop;
4976 	p->pdr_kept_paths = NULL;
4977 	p->pdr_kept_count = 0;
4978 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
4979 	(void) strcpy(p->pdr_kept, kept);
4980 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
4981 	(void) strcpy(p->pdr_keeper, keeps);
4982 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
4983 	    (intptr_t)p + size);
4984 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
4985 	    (intptr_t)p + size);
4986 	return (p);
4987 }
4988 
4989 /*
4990  * Keep a list of recorded dependencies.  We only keep the
4991  * keeper -> kept list for simplification. At this point We do not
4992  * care about whether the devices are attached or not yet,
4993  * this would be done in pm_keeper() and pm_kept().
4994  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
4995  * and it is up to the user to issue the ioctl again if they want it
4996  * (e.g. pmconfig)
4997  * Returns true if dependency already exists in the list.
4998  */
4999 int
5000 pm_record_keeper(char *kept, char *keeper, int isprop)
5001 {
5002 	PMD_FUNC(pmf, "record_keeper")
5003 	pm_pdr_t *npdr, *ppdr, *pdr;
5004 
5005 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5006 	ASSERT(kept && keeper);
5007 #ifdef DEBUG
5008 	if (pm_debug & PMD_KEEPS)
5009 		prdeps("pm_record_keeper entry");
5010 #endif
5011 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5012 	    ppdr = pdr, pdr = pdr->pdr_next) {
5013 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5014 		    pdr->pdr_keeper))
5015 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5016 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5017 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5018 			return (1);
5019 		}
5020 	}
5021 	/*
5022 	 * We did not find any match, so we have to make an entry
5023 	 */
5024 	npdr = newpdr(kept, keeper, isprop);
5025 	if (ppdr) {
5026 		ASSERT(ppdr->pdr_next == NULL);
5027 		ppdr->pdr_next = npdr;
5028 	} else {
5029 		ASSERT(pm_dep_head == NULL);
5030 		pm_dep_head = npdr;
5031 	}
5032 #ifdef DEBUG
5033 	if (pm_debug & PMD_KEEPS)
5034 		prdeps("pm_record_keeper after new record");
5035 #endif
5036 	if (!isprop)
5037 		pm_unresolved_deps++;
5038 	else
5039 		pm_prop_deps++;
5040 	return (0);
5041 }
5042 
5043 /*
5044  * Look up this device in the set of devices we've seen ioctls for
5045  * to see if we are holding a threshold spec for it.  If so, make it so.
5046  * At ioctl time, we were given the physical path of the device.
5047  */
5048 int
5049 pm_thresh_specd(dev_info_t *dip)
5050 {
5051 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5052 	char *path = 0;
5053 	char pathbuf[MAXNAMELEN];
5054 	pm_thresh_rec_t *rp;
5055 
5056 	path = ddi_pathname(dip, pathbuf);
5057 
5058 	rw_enter(&pm_thresh_rwlock, RW_READER);
5059 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5060 		if (strcmp(rp->ptr_physpath, path) != 0)
5061 			continue;
5062 		pm_apply_recorded_thresh(dip, rp);
5063 		rw_exit(&pm_thresh_rwlock);
5064 		return (1);
5065 	}
5066 	rw_exit(&pm_thresh_rwlock);
5067 	return (0);
5068 }
5069 
5070 static int
5071 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5072 {
5073 	PMD_FUNC(pmf, "set_keeping")
5074 	pm_info_t *kept_info;
5075 	int j, up = 0, circ;
5076 	void prdeps(char *);
5077 
5078 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5079 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5080 #ifdef DEBUG
5081 	if (pm_debug & PMD_KEEPS)
5082 		prdeps("Before PAD\n");
5083 #endif
5084 	ASSERT(keeper != kept);
5085 	if (PM_GET_PM_INFO(keeper) == NULL) {
5086 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5087 		    "%s@%s(%s#%d), but the latter is not power managed",
5088 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5089 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5090 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5091 		return (0);
5092 	}
5093 	kept_info = PM_GET_PM_INFO(kept);
5094 	ASSERT(kept_info);
5095 	PM_LOCK_POWER(keeper, &circ);
5096 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5097 		if (PM_CURPOWER(keeper, j)) {
5098 			up++;
5099 			break;
5100 		}
5101 	}
5102 	if (up) {
5103 		/* Bringup and maintain a hold on the kept */
5104 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5105 		    PM_DEVICE(kept)))
5106 		bring_pmdep_up(kept, 1);
5107 	}
5108 	PM_UNLOCK_POWER(keeper, circ);
5109 #ifdef DEBUG
5110 	if (pm_debug & PMD_KEEPS)
5111 		prdeps("After PAD\n");
5112 #endif
5113 	return (1);
5114 }
5115 
5116 /*
5117  * Should this device keep up another device?
5118  * Look up this device in the set of devices we've seen ioctls for
5119  * to see if we are holding a dependency spec for it.  If so, make it so.
5120  * Because we require the kept device to be attached already in order to
5121  * make the list entry (and hold it), we only need to look for keepers.
5122  * At ioctl time, we were given the physical path of the device.
5123  */
5124 int
5125 pm_keeper(char *keeper)
5126 {
5127 	PMD_FUNC(pmf, "keeper")
5128 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5129 	dev_info_t *dip;
5130 	pm_pdr_t *dp;
5131 	dev_info_t *kept = NULL;
5132 	int ret = 0;
5133 	int i;
5134 
5135 	if (!pm_unresolved_deps && !pm_prop_deps)
5136 		return (0);
5137 	ASSERT(keeper != NULL);
5138 	dip = pm_name_to_dip(keeper, 1);
5139 	if (dip == NULL)
5140 		return (0);
5141 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5142 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5143 		if (!dp->pdr_isprop) {
5144 			if (!pm_unresolved_deps)
5145 				continue;
5146 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5147 			if (dp->pdr_satisfied) {
5148 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5149 				continue;
5150 			}
5151 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5152 				ret += pm_apply_recorded_dep(dip, dp);
5153 			}
5154 		} else {
5155 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5156 				continue;
5157 			for (i = 0; i < dp->pdr_kept_count; i++) {
5158 				if (dp->pdr_kept_paths[i] == NULL)
5159 					continue;
5160 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5161 				if (kept == NULL)
5162 					continue;
5163 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5164 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5165 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5166 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5167 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5168 				    dp->pdr_kept_count))
5169 				if (kept != dip) {
5170 					ret += pm_set_keeping(dip, kept);
5171 				}
5172 				ddi_release_devi(kept);
5173 			}
5174 
5175 		}
5176 	}
5177 	ddi_release_devi(dip);
5178 	return (ret);
5179 }
5180 
5181 /*
5182  * Should this device be kept up by another device?
5183  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5184  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5185  * kept device lists.
5186  */
5187 static int
5188 pm_kept(char *keptp)
5189 {
5190 	PMD_FUNC(pmf, "kept")
5191 	pm_pdr_t *dp;
5192 	int found = 0;
5193 	int ret = 0;
5194 	dev_info_t *keeper;
5195 	dev_info_t *kept;
5196 	size_t length;
5197 	int i;
5198 	char **paths;
5199 	char *path;
5200 
5201 	ASSERT(keptp != NULL);
5202 	kept = pm_name_to_dip(keptp, 1);
5203 	if (kept == NULL)
5204 		return (0);
5205 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5206 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5207 		if (dp->pdr_isprop) {
5208 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5209 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5210 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5211 				/*
5212 				 * Dont allow self dependency.
5213 				 */
5214 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5215 					continue;
5216 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5217 				if (keeper == NULL)
5218 					continue;
5219 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5220 				    "%p\n", pmf, (void *)kept))
5221 #ifdef DEBUG
5222 				if (pm_debug & PMD_DEP)
5223 					prdeps("Before Adding from pm_kept\n");
5224 #endif
5225 				/*
5226 				 * Add ourselves to the dip list.
5227 				 */
5228 				if (dp->pdr_kept_count == 0) {
5229 					length = strlen(keptp) + 1;
5230 					path =
5231 					    kmem_alloc(length, KM_SLEEP);
5232 					paths = kmem_alloc(sizeof (char **),
5233 						    KM_SLEEP);
5234 					(void) strcpy(path, keptp);
5235 					paths[0] = path;
5236 					dp->pdr_kept_paths = paths;
5237 					dp->pdr_kept_count++;
5238 				} else {
5239 					/* Check to see if already on list */
5240 					for (i = 0; i < dp->pdr_kept_count;
5241 					    i++) {
5242 						if (strcmp(keptp,
5243 						    dp->pdr_kept_paths[i])
5244 						    == 0) {
5245 							found++;
5246 							break;
5247 						}
5248 					}
5249 					if (found) {
5250 						ddi_release_devi(keeper);
5251 						continue;
5252 					}
5253 					length = dp->pdr_kept_count *
5254 					    sizeof (char **);
5255 					paths = kmem_alloc(
5256 					    length + sizeof (char **),
5257 					    KM_SLEEP);
5258 					if (dp->pdr_kept_count) {
5259 						bcopy(dp->pdr_kept_paths,
5260 						    paths, length);
5261 						kmem_free(dp->pdr_kept_paths,
5262 							length);
5263 					}
5264 					dp->pdr_kept_paths = paths;
5265 					length = strlen(keptp) + 1;
5266 					path =
5267 					    kmem_alloc(length, KM_SLEEP);
5268 					(void) strcpy(path, keptp);
5269 					dp->pdr_kept_paths[i] = path;
5270 					dp->pdr_kept_count++;
5271 				}
5272 #ifdef DEBUG
5273 				if (pm_debug & PMD_DEP)
5274 					prdeps("After from pm_kept\n");
5275 #endif
5276 				if (keeper) {
5277 					ret += pm_set_keeping(keeper, kept);
5278 					ddi_release_devi(keeper);
5279 				}
5280 			}
5281 		} else {
5282 			/*
5283 			 * pm_keeper would be called later to do
5284 			 * the actual pm_set_keeping.
5285 			 */
5286 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5287 			    pmf, (void *)kept))
5288 #ifdef DEBUG
5289 			if (pm_debug & PMD_DEP)
5290 				prdeps("Before Adding from pm_kept\n");
5291 #endif
5292 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5293 				if (dp->pdr_kept_paths == NULL) {
5294 					length = strlen(keptp) + 1;
5295 					path =
5296 					    kmem_alloc(length, KM_SLEEP);
5297 					paths = kmem_alloc(sizeof (char **),
5298 						KM_SLEEP);
5299 					(void) strcpy(path, keptp);
5300 					paths[0] = path;
5301 					dp->pdr_kept_paths = paths;
5302 					dp->pdr_kept_count++;
5303 				}
5304 			}
5305 #ifdef DEBUG
5306 			if (pm_debug & PMD_DEP)
5307 			    prdeps("After from pm_kept\n");
5308 #endif
5309 		}
5310 	}
5311 	ddi_release_devi(kept);
5312 	return (ret);
5313 }
5314 
5315 /*
5316  * Apply a recorded dependency.  dp specifies the dependency, and
5317  * keeper is already known to be the device that keeps up the other (kept) one.
5318  * We have to the whole tree for the "kept" device, then apply
5319  * the dependency (which may already be applied).
5320  */
5321 int
5322 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5323 {
5324 	PMD_FUNC(pmf, "apply_recorded_dep")
5325 	dev_info_t *kept = NULL;
5326 	int ret = 0;
5327 	char *keptp = NULL;
5328 
5329 	/*
5330 	 * Device to Device dependency can only be 1 to 1.
5331 	 */
5332 	if (dp->pdr_kept_paths == NULL)
5333 		return (0);
5334 	keptp = dp->pdr_kept_paths[0];
5335 	if (keptp == NULL)
5336 		return (0);
5337 	ASSERT(*keptp != '\0');
5338 	kept = pm_name_to_dip(keptp, 1);
5339 	if (kept == NULL)
5340 		return (0);
5341 	if (kept) {
5342 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5343 		    dp->pdr_keeper, keptp))
5344 		if (pm_set_keeping(keeper, kept)) {
5345 			ASSERT(dp->pdr_satisfied == 0);
5346 			dp->pdr_satisfied = 1;
5347 			ASSERT(pm_unresolved_deps);
5348 			pm_unresolved_deps--;
5349 			ret++;
5350 		}
5351 	}
5352 	ddi_release_devi(kept);
5353 
5354 	return (ret);
5355 }
5356 
5357 /*
5358  * Called from common/io/pm.c
5359  */
5360 int
5361 pm_cur_power(pm_component_t *cp)
5362 {
5363 	return (cur_power(cp));
5364 }
5365 
5366 /*
5367  * External interface to sanity-check a power level.
5368  */
5369 int
5370 pm_valid_power(dev_info_t *dip, int comp, int level)
5371 {
5372 	PMD_FUNC(pmf, "valid_power")
5373 
5374 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5375 		return (e_pm_valid_power(dip, comp, level));
5376 	else {
5377 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5378 		    pmf, comp, PM_NUMCMPTS(dip), level))
5379 		return (0);
5380 	}
5381 }
5382 
5383 /*
5384  * Called when a device that is direct power managed needs to change state.
5385  * This routine arranges to block the request until the process managing
5386  * the device makes the change (or some other incompatible change) or
5387  * the process closes /dev/pm.
5388  */
5389 static int
5390 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5391 {
5392 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5393 	int ret = 0;
5394 	void pm_dequeue_blocked(pm_rsvp_t *);
5395 	void pm_enqueue_blocked(pm_rsvp_t *);
5396 
5397 	ASSERT(!pm_processes_stopped);
5398 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5399 	new->pr_dip = dip;
5400 	new->pr_comp = comp;
5401 	new->pr_newlevel = newpower;
5402 	new->pr_oldlevel = oldpower;
5403 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5404 	mutex_enter(&pm_rsvp_lock);
5405 	pm_enqueue_blocked(new);
5406 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5407 	    PM_CANBLOCK_BLOCK);
5408 	PM_UNLOCK_DIP(dip);
5409 	/*
5410 	 * truss may make the cv_wait_sig return prematurely
5411 	 */
5412 	while (ret == 0) {
5413 		/*
5414 		 * Normally there will be no user context involved, but if
5415 		 * there is (e.g. we are here via an ioctl call to a driver)
5416 		 * then we should allow the process to abort the request,
5417 		 * or we get an unkillable process if the same thread does
5418 		 * PM_DIRECT_PM and pm_raise_power
5419 		 */
5420 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5421 			ret = PMP_FAIL;
5422 		} else {
5423 			ret = new->pr_retval;
5424 		}
5425 	}
5426 	pm_dequeue_blocked(new);
5427 	mutex_exit(&pm_rsvp_lock);
5428 	cv_destroy(&new->pr_cv);
5429 	kmem_free(new, sizeof (*new));
5430 	return (ret);
5431 }
5432 
5433 /*
5434  * Returns true if the process is interested in power level changes (has issued
5435  * PM_GET_STATE_CHANGE ioctl).
5436  */
5437 int
5438 pm_interest_registered(int clone)
5439 {
5440 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5441 	return (pm_interest[clone]);
5442 }
5443 
5444 /*
5445  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5446  * watch all state transitions (dip == NULL).  Set up data
5447  * structs to communicate with process about state changes.
5448  */
5449 void
5450 pm_register_watcher(int clone, dev_info_t *dip)
5451 {
5452 	pscc_t	*p;
5453 	psce_t	*psce;
5454 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5455 
5456 	/*
5457 	 * We definitely need a control struct, then we have to search to see
5458 	 * there is already an entries struct (in the dip != NULL case).
5459 	 */
5460 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5461 	pscc->pscc_clone = clone;
5462 	pscc->pscc_dip = dip;
5463 
5464 	if (dip) {
5465 		int found = 0;
5466 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5467 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5468 			/*
5469 			 * Already an entry for this clone, so just use it
5470 			 * for the new one (for the case where a single
5471 			 * process is watching multiple devices)
5472 			 */
5473 			if (p->pscc_clone == clone) {
5474 				ASSERT(p->pscc_dip != dip);
5475 				pscc->pscc_entries = p->pscc_entries;
5476 				pscc->pscc_entries->psce_references++;
5477 				found++;
5478 			}
5479 		}
5480 		if (!found) {		/* create a new one */
5481 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5482 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5483 			psce->psce_first =
5484 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5485 			    KM_SLEEP);
5486 			psce->psce_in = psce->psce_out = psce->psce_first;
5487 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5488 			psce->psce_references = 1;
5489 			pscc->pscc_entries = psce;
5490 		}
5491 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5492 		rw_exit(&pm_pscc_direct_rwlock);
5493 	} else {
5494 		ASSERT(!pm_interest_registered(clone));
5495 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5496 #ifdef DEBUG
5497 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5498 			/*
5499 			 * Should not be an entry for this clone!
5500 			 */
5501 			ASSERT(p->pscc_clone != clone);
5502 		}
5503 #endif
5504 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5505 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5506 		    PSCCOUNT, KM_SLEEP);
5507 		psce->psce_in = psce->psce_out = psce->psce_first;
5508 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5509 		psce->psce_references = 1;
5510 		pscc->pscc_entries = psce;
5511 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5512 		pm_interest[clone] = 1;
5513 		rw_exit(&pm_pscc_interest_rwlock);
5514 	}
5515 }
5516 
5517 /*
5518  * Remove the given entry from the blocked list
5519  */
5520 void
5521 pm_dequeue_blocked(pm_rsvp_t *p)
5522 {
5523 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5524 	if (pm_blocked_list == p) {
5525 		ASSERT(p->pr_prev == NULL);
5526 		if (p->pr_next != NULL)
5527 			p->pr_next->pr_prev = NULL;
5528 		pm_blocked_list = p->pr_next;
5529 	} else {
5530 		ASSERT(p->pr_prev != NULL);
5531 		p->pr_prev->pr_next = p->pr_next;
5532 		if (p->pr_next != NULL)
5533 			p->pr_next->pr_prev = p->pr_prev;
5534 	}
5535 }
5536 
5537 /*
5538  * Remove the given control struct from the given list
5539  */
5540 static void
5541 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5542 {
5543 	if (*list == p) {
5544 		ASSERT(p->pscc_prev == NULL);
5545 		if (p->pscc_next != NULL)
5546 			p->pscc_next->pscc_prev = NULL;
5547 		*list = p->pscc_next;
5548 	} else {
5549 		ASSERT(p->pscc_prev != NULL);
5550 		p->pscc_prev->pscc_next = p->pscc_next;
5551 		if (p->pscc_next != NULL)
5552 			p->pscc_next->pscc_prev = p->pscc_prev;
5553 	}
5554 }
5555 
5556 /*
5557  * Stick the control struct specified on the front of the list
5558  */
5559 static void
5560 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5561 {
5562 	pscc_t *h;	/* entry at head of list */
5563 	if ((h = *list) == NULL) {
5564 		*list = p;
5565 		ASSERT(p->pscc_next == NULL);
5566 		ASSERT(p->pscc_prev == NULL);
5567 	} else {
5568 		p->pscc_next = h;
5569 		ASSERT(h->pscc_prev == NULL);
5570 		h->pscc_prev = p;
5571 		ASSERT(p->pscc_prev == NULL);
5572 		*list = p;
5573 	}
5574 }
5575 
5576 /*
5577  * If dip is NULL, process is closing "clone" clean up all its registrations.
5578  * Otherwise only clean up those for dip because process is just giving up
5579  * control of a direct device.
5580  */
5581 void
5582 pm_deregister_watcher(int clone, dev_info_t *dip)
5583 {
5584 	pscc_t	*p, *pn;
5585 	psce_t	*psce;
5586 	int found = 0;
5587 
5588 	if (dip == NULL) {
5589 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5590 		for (p = pm_pscc_interest; p; p = pn) {
5591 			pn = p->pscc_next;
5592 			if (p->pscc_clone == clone) {
5593 				pm_dequeue_pscc(p, &pm_pscc_interest);
5594 				psce = p->pscc_entries;
5595 				ASSERT(psce->psce_references == 1);
5596 				mutex_destroy(&psce->psce_lock);
5597 				kmem_free(psce->psce_first,
5598 				    sizeof (pm_state_change_t) * PSCCOUNT);
5599 				kmem_free(psce, sizeof (*psce));
5600 				kmem_free(p, sizeof (*p));
5601 			}
5602 		}
5603 		pm_interest[clone] = 0;
5604 		rw_exit(&pm_pscc_interest_rwlock);
5605 	}
5606 	found = 0;
5607 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5608 	for (p = pm_pscc_direct; p; p = pn) {
5609 		pn = p->pscc_next;
5610 		if ((dip && p->pscc_dip == dip) ||
5611 		    (dip == NULL && clone == p->pscc_clone)) {
5612 			ASSERT(clone == p->pscc_clone);
5613 			found++;
5614 			/*
5615 			 * Remove from control list
5616 			 */
5617 			pm_dequeue_pscc(p, &pm_pscc_direct);
5618 			/*
5619 			 * If we're the last reference, free the
5620 			 * entries struct.
5621 			 */
5622 			psce = p->pscc_entries;
5623 			ASSERT(psce);
5624 			if (psce->psce_references == 1) {
5625 				kmem_free(psce->psce_first,
5626 				    PSCCOUNT * sizeof (pm_state_change_t));
5627 				kmem_free(psce, sizeof (*psce));
5628 			} else {
5629 				psce->psce_references--;
5630 			}
5631 			kmem_free(p, sizeof (*p));
5632 		}
5633 	}
5634 	ASSERT(dip == NULL || found);
5635 	rw_exit(&pm_pscc_direct_rwlock);
5636 }
5637 
5638 /*
5639  * Search the indicated list for an entry that matches clone, and return a
5640  * pointer to it.  To be interesting, the entry must have something ready to
5641  * be passed up to the controlling process.
5642  * The returned entry will be locked upon return from this call.
5643  */
5644 static psce_t *
5645 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5646 {
5647 	pscc_t	*p;
5648 	psce_t	*psce;
5649 	rw_enter(lock, RW_READER);
5650 	for (p = *list; p; p = p->pscc_next) {
5651 		if (clone == p->pscc_clone) {
5652 			psce = p->pscc_entries;
5653 			mutex_enter(&psce->psce_lock);
5654 			if (psce->psce_out->size) {
5655 				rw_exit(lock);
5656 				return (psce);
5657 			} else {
5658 				mutex_exit(&psce->psce_lock);
5659 			}
5660 		}
5661 	}
5662 	rw_exit(lock);
5663 	return (NULL);
5664 }
5665 
5666 /*
5667  * Find an entry for a particular clone in the direct list.
5668  */
5669 psce_t *
5670 pm_psc_clone_to_direct(int clone)
5671 {
5672 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5673 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5674 	    &pm_pscc_direct_rwlock));
5675 }
5676 
5677 /*
5678  * Find an entry for a particular clone in the interest list.
5679  */
5680 psce_t *
5681 pm_psc_clone_to_interest(int clone)
5682 {
5683 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5684 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5685 	    &pm_pscc_interest_rwlock));
5686 }
5687 
5688 /*
5689  * Put the given entry at the head of the blocked list
5690  */
5691 void
5692 pm_enqueue_blocked(pm_rsvp_t *p)
5693 {
5694 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5695 	ASSERT(p->pr_next == NULL);
5696 	ASSERT(p->pr_prev == NULL);
5697 	if (pm_blocked_list != NULL) {
5698 		p->pr_next = pm_blocked_list;
5699 		ASSERT(pm_blocked_list->pr_prev == NULL);
5700 		pm_blocked_list->pr_prev = p;
5701 		pm_blocked_list = p;
5702 	} else {
5703 		pm_blocked_list = p;
5704 	}
5705 }
5706 
5707 /*
5708  * Sets every power managed device back to its default threshold
5709  */
5710 void
5711 pm_all_to_default_thresholds(void)
5712 {
5713 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5714 	    (void *) &pm_system_idle_threshold);
5715 }
5716 
5717 static int
5718 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5719 {
5720 	int thr = (int)(*(int *)arg);
5721 
5722 	if (!PM_GET_PM_INFO(dip))
5723 		return (DDI_WALK_CONTINUE);
5724 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5725 	return (DDI_WALK_CONTINUE);
5726 }
5727 
5728 /*
5729  * Returns the current threshold value (in seconds) for the indicated component
5730  */
5731 int
5732 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5733 {
5734 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5735 		return (DDI_FAILURE);
5736 	} else {
5737 		*threshp = cur_threshold(dip, comp);
5738 		return (DDI_SUCCESS);
5739 	}
5740 }
5741 
5742 /*
5743  * To be called when changing the power level of a component of a device.
5744  * On some platforms, changing power on one device may require that power
5745  * be changed on other, related devices in the same transaction.  Thus, we
5746  * always pass this request to the platform power manager so that all the
5747  * affected devices will be locked.
5748  */
5749 void
5750 pm_lock_power(dev_info_t *dip, int *circp)
5751 {
5752 	power_req_t power_req;
5753 	int result;
5754 
5755 	power_req.request_type = PMR_PPM_LOCK_POWER;
5756 	power_req.req.ppm_lock_power_req.who = dip;
5757 	power_req.req.ppm_lock_power_req.circp = circp;
5758 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5759 }
5760 
5761 /*
5762  * Release the lock (or locks) acquired to change the power of a device.
5763  * See comments for pm_lock_power.
5764  */
5765 void
5766 pm_unlock_power(dev_info_t *dip, int circ)
5767 {
5768 	power_req_t power_req;
5769 	int result;
5770 
5771 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5772 	power_req.req.ppm_unlock_power_req.who = dip;
5773 	power_req.req.ppm_unlock_power_req.circ = circ;
5774 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5775 }
5776 
5777 
5778 /*
5779  * Attempt (without blocking) to acquire the lock(s) needed to change the
5780  * power of a component of a device.  See comments for pm_lock_power.
5781  *
5782  * Return: 1 if lock(s) acquired, 0 if not.
5783  */
5784 int
5785 pm_try_locking_power(dev_info_t *dip, int *circp)
5786 {
5787 	power_req_t power_req;
5788 	int result;
5789 
5790 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5791 	power_req.req.ppm_lock_power_req.who = dip;
5792 	power_req.req.ppm_lock_power_req.circp = circp;
5793 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5794 	return (result);
5795 }
5796 
5797 
5798 /*
5799  * Lock power state of a device.
5800  *
5801  * The implementation handles a special case where another thread may have
5802  * acquired the lock and created/launched this thread to do the work.  If
5803  * the lock cannot be acquired immediately, we check to see if this thread
5804  * is registered as a borrower of the lock.  If so, we may proceed without
5805  * the lock.  This assumes that the lending thread blocks on the completion
5806  * of this thread.
5807  *
5808  * Note 1: for use by ppm only.
5809  *
5810  * Note 2: On failing to get the lock immediately, we search lock_loan list
5811  * for curthread (as borrower of the lock).  On a hit, we check that the
5812  * lending thread already owns the lock we want.  It is safe to compare
5813  * devi_busy_thread and thread id of the lender because in the == case (the
5814  * only one we care about) we know that the owner is blocked.  Similarly,
5815  * If we find that curthread isn't registered as a lock borrower, it is safe
5816  * to use the blocking call (ndi_devi_enter) because we know that if we
5817  * weren't already listed as a borrower (upstream on the call stack) we won't
5818  * become one.
5819  */
5820 void
5821 pm_lock_power_single(dev_info_t *dip, int *circp)
5822 {
5823 	lock_loan_t *cur;
5824 
5825 	/* if the lock is available, we are done. */
5826 	if (ndi_devi_tryenter(dip, circp))
5827 		return;
5828 
5829 	mutex_enter(&pm_loan_lock);
5830 	/* see if our thread is registered as a lock borrower. */
5831 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5832 		if (cur->pmlk_borrower == curthread)
5833 			break;
5834 	mutex_exit(&pm_loan_lock);
5835 
5836 	/* if this thread not already registered, it is safe to block */
5837 	if (cur == NULL)
5838 		ndi_devi_enter(dip, circp);
5839 	else {
5840 		/* registered: does lender own the lock we want? */
5841 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5842 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5843 			cur->pmlk_dip = dip;
5844 		} else /* no: just block for it */
5845 			ndi_devi_enter(dip, circp);
5846 
5847 	}
5848 }
5849 
5850 /*
5851  * Drop the lock on the device's power state.  See comment for
5852  * pm_lock_power_single() for special implementation considerations.
5853  *
5854  * Note: for use by ppm only.
5855  */
5856 void
5857 pm_unlock_power_single(dev_info_t *dip, int circ)
5858 {
5859 	lock_loan_t *cur;
5860 
5861 	/* optimization: mutex not needed to check empty list */
5862 	if (lock_loan_head.pmlk_next == NULL) {
5863 		ndi_devi_exit(dip, circ);
5864 		return;
5865 	}
5866 
5867 	mutex_enter(&pm_loan_lock);
5868 	/* see if our thread is registered as a lock borrower. */
5869 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5870 		if (cur->pmlk_borrower == curthread)
5871 			break;
5872 	mutex_exit(&pm_loan_lock);
5873 
5874 	if (cur == NULL || cur->pmlk_dip != dip)
5875 		/* we acquired the lock directly, so return it */
5876 		ndi_devi_exit(dip, circ);
5877 }
5878 
5879 /*
5880  * Try to take the lock for changing the power level of a component.
5881  *
5882  * Note: for use by ppm only.
5883  */
5884 int
5885 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5886 {
5887 	return (ndi_devi_tryenter(dip, circp));
5888 }
5889 
5890 #ifdef	DEBUG
5891 /*
5892  * The following are used only to print out data structures for debugging
5893  */
5894 void
5895 prdeps(char *msg)
5896 {
5897 
5898 	pm_pdr_t *rp;
5899 	int i;
5900 
5901 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
5902 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
5903 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
5904 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
5905 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
5906 		    (void *)rp->pdr_next);
5907 		if (rp->pdr_kept_count != 0) {
5908 			pm_log("kept list = ");
5909 			i = 0;
5910 			while (i < rp->pdr_kept_count) {
5911 				pm_log("%s ", rp->pdr_kept_paths[i]);
5912 				i++;
5913 			}
5914 			pm_log("\n");
5915 		}
5916 	}
5917 }
5918 
5919 void
5920 pr_noinvol(char *hdr)
5921 {
5922 	pm_noinvol_t *ip;
5923 
5924 	pm_log("%s\n", hdr);
5925 	rw_enter(&pm_noinvol_rwlock, RW_READER);
5926 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
5927 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
5928 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
5929 	rw_exit(&pm_noinvol_rwlock);
5930 }
5931 #endif
5932 
5933 /*
5934  * Attempt to apply the thresholds indicated by rp to the node specified by
5935  * dip.
5936  */
5937 void
5938 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5939 {
5940 	PMD_FUNC(pmf, "apply_recorded_thresh")
5941 	int i, j;
5942 	int comps = PM_NUMCMPTS(dip);
5943 	struct pm_component *cp;
5944 	pm_pte_t *ep;
5945 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
5946 
5947 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
5948 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
5949 	PM_LOCK_DIP(dip);
5950 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
5951 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
5952 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
5953 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
5954 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
5955 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
5956 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
5957 		PM_UNLOCK_DIP(dip);
5958 		return;
5959 	}
5960 
5961 	ep = rp->ptr_entries;
5962 	/*
5963 	 * Here we do the special case of a device threshold
5964 	 */
5965 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
5966 		ASSERT(ep && ep->pte_numthresh == 1);
5967 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
5968 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
5969 		PM_UNLOCK_DIP(dip);
5970 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
5971 		if (PM_SCANABLE(dip))
5972 			pm_rescan(dip);
5973 		return;
5974 	}
5975 	for (i = 0; i < comps; i++) {
5976 		cp = PM_CP(dip, i);
5977 		for (j = 0; j < ep->pte_numthresh; j++) {
5978 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
5979 			    "to %x\n", pmf, j, PM_DEVICE(dip),
5980 			    i, ep->pte_thresh[j]))
5981 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
5982 		}
5983 		ep++;
5984 	}
5985 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
5986 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
5987 	PM_UNLOCK_DIP(dip);
5988 
5989 	if (PM_SCANABLE(dip))
5990 		pm_rescan(dip);
5991 }
5992 
5993 /*
5994  * Returns true if the threshold specified by rp could be applied to dip
5995  * (that is, the number of components and transitions are the same)
5996  */
5997 int
5998 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5999 {
6000 	PMD_FUNC(pmf, "valid_thresh")
6001 	int comps, i;
6002 	pm_component_t *cp;
6003 	pm_pte_t *ep;
6004 
6005 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6006 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6007 		    rp->ptr_physpath))
6008 		return (0);
6009 	}
6010 	/*
6011 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6012 	 * an entry with numcomps == 0, (since we don't know how many
6013 	 * components there are in advance).  This is always a valid
6014 	 * spec.
6015 	 */
6016 	if (rp->ptr_numcomps == 0) {
6017 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6018 		return (1);
6019 	}
6020 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6021 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6022 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6023 		return (0);
6024 	}
6025 	ep = rp->ptr_entries;
6026 	for (i = 0; i < comps; i++) {
6027 		cp = PM_CP(dip, i);
6028 		if ((ep + i)->pte_numthresh !=
6029 		    cp->pmc_comp.pmc_numlevels - 1) {
6030 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6031 			    pmf, rp->ptr_physpath, i,
6032 			    cp->pmc_comp.pmc_numlevels - 1,
6033 			    (ep + i)->pte_numthresh))
6034 			return (0);
6035 		}
6036 	}
6037 	return (1);
6038 }
6039 
6040 /*
6041  * Remove any recorded threshold for device physpath
6042  * We know there will be at most one.
6043  */
6044 void
6045 pm_unrecord_threshold(char *physpath)
6046 {
6047 	pm_thresh_rec_t *pptr, *ptr;
6048 
6049 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6050 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6051 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6052 			if (pptr) {
6053 				pptr->ptr_next = ptr->ptr_next;
6054 			} else {
6055 				ASSERT(pm_thresh_head == ptr);
6056 				pm_thresh_head = ptr->ptr_next;
6057 			}
6058 			kmem_free(ptr, ptr->ptr_size);
6059 			break;
6060 		}
6061 		pptr = ptr;
6062 	}
6063 	rw_exit(&pm_thresh_rwlock);
6064 }
6065 
6066 /*
6067  * Discard all recorded thresholds.  We are returning to the default pm state.
6068  */
6069 void
6070 pm_discard_thresholds(void)
6071 {
6072 	pm_thresh_rec_t *rp;
6073 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6074 	while (pm_thresh_head) {
6075 		rp = pm_thresh_head;
6076 		pm_thresh_head = rp->ptr_next;
6077 		kmem_free(rp, rp->ptr_size);
6078 	}
6079 	rw_exit(&pm_thresh_rwlock);
6080 }
6081 
6082 /*
6083  * Discard all recorded dependencies.  We are returning to the default pm state.
6084  */
6085 void
6086 pm_discard_dependencies(void)
6087 {
6088 	pm_pdr_t *rp;
6089 	int i;
6090 	size_t length;
6091 
6092 #ifdef DEBUG
6093 	if (pm_debug & PMD_DEP)
6094 		prdeps("Before discard\n");
6095 #endif
6096 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6097 
6098 #ifdef DEBUG
6099 	if (pm_debug & PMD_DEP)
6100 		prdeps("After discard\n");
6101 #endif
6102 	while (pm_dep_head) {
6103 		rp = pm_dep_head;
6104 		if (!rp->pdr_isprop) {
6105 			ASSERT(rp->pdr_satisfied == 0);
6106 			ASSERT(pm_unresolved_deps);
6107 			pm_unresolved_deps--;
6108 		} else {
6109 			ASSERT(pm_prop_deps);
6110 			pm_prop_deps--;
6111 		}
6112 		pm_dep_head = rp->pdr_next;
6113 		if (rp->pdr_kept_count)  {
6114 			for (i = 0; i < rp->pdr_kept_count; i++) {
6115 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6116 				kmem_free(rp->pdr_kept_paths[i], length);
6117 			}
6118 			kmem_free(rp->pdr_kept_paths,
6119 				rp->pdr_kept_count * sizeof (char **));
6120 		}
6121 		kmem_free(rp, rp->pdr_size);
6122 	}
6123 }
6124 
6125 
6126 static int
6127 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6128 {
6129 	_NOTE(ARGUNUSED(arg))
6130 	char *pathbuf;
6131 
6132 	if (PM_GET_PM_INFO(dip) == NULL)
6133 		return (DDI_WALK_CONTINUE);
6134 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6135 	(void) ddi_pathname(dip, pathbuf);
6136 	pm_free_keeper(pathbuf, 0);
6137 	kmem_free(pathbuf, MAXPATHLEN);
6138 	return (DDI_WALK_CONTINUE);
6139 }
6140 
6141 static int
6142 pm_kept_walk(dev_info_t *dip, void *arg)
6143 {
6144 	_NOTE(ARGUNUSED(arg))
6145 	char *pathbuf;
6146 
6147 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6148 	(void) ddi_pathname(dip, pathbuf);
6149 	(void) pm_kept(pathbuf);
6150 	kmem_free(pathbuf, MAXPATHLEN);
6151 
6152 	return (DDI_WALK_CONTINUE);
6153 }
6154 
6155 static int
6156 pm_keeper_walk(dev_info_t *dip, void *arg)
6157 {
6158 	_NOTE(ARGUNUSED(arg))
6159 	char *pathbuf;
6160 
6161 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6162 	(void) ddi_pathname(dip, pathbuf);
6163 	(void) pm_keeper(pathbuf);
6164 	kmem_free(pathbuf, MAXPATHLEN);
6165 
6166 	return (DDI_WALK_CONTINUE);
6167 }
6168 
6169 static char *
6170 pdw_type_decode(int type)
6171 {
6172 	switch (type) {
6173 	case PM_DEP_WK_POWER_ON:
6174 		return ("power on");
6175 	case PM_DEP_WK_POWER_OFF:
6176 		return ("power off");
6177 	case PM_DEP_WK_DETACH:
6178 		return ("detach");
6179 	case PM_DEP_WK_REMOVE_DEP:
6180 		return ("remove dep");
6181 	case PM_DEP_WK_BRINGUP_SELF:
6182 		return ("bringup self");
6183 	case PM_DEP_WK_RECORD_KEEPER:
6184 		return ("add dependent");
6185 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6186 		return ("add dependent property");
6187 	case PM_DEP_WK_KEPT:
6188 		return ("kept");
6189 	case PM_DEP_WK_KEEPER:
6190 		return ("keeper");
6191 	case PM_DEP_WK_ATTACH:
6192 		return ("attach");
6193 	case PM_DEP_WK_CHECK_KEPT:
6194 		return ("check kept");
6195 	case PM_DEP_WK_CPR_SUSPEND:
6196 		return ("suspend");
6197 	case PM_DEP_WK_CPR_RESUME:
6198 		return ("resume");
6199 	default:
6200 		return ("unknown");
6201 	}
6202 
6203 }
6204 
6205 static void
6206 pm_rele_dep(char *keeper)
6207 {
6208 	PMD_FUNC(pmf, "rele_dep")
6209 	pm_pdr_t *dp;
6210 	char *kept_path = NULL;
6211 	dev_info_t *kept = NULL;
6212 	int count = 0;
6213 
6214 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6215 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6216 			continue;
6217 		for (count = 0; count < dp->pdr_kept_count; count++) {
6218 			kept_path = dp->pdr_kept_paths[count];
6219 			if (kept_path == NULL)
6220 				continue;
6221 			kept = pm_name_to_dip(kept_path, 1);
6222 			if (kept) {
6223 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6224 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6225 				    keeper))
6226 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6227 				pm_rele_power(kept);
6228 				ddi_release_devi(kept);
6229 			}
6230 		}
6231 	}
6232 }
6233 
6234 /*
6235  * Called when we are just released from direct PM.  Bring ourself up
6236  * if our keeper is up since dependency is not honored while a kept
6237  * device is under direct PM.
6238  */
6239 static void
6240 pm_bring_self_up(char *keptpath)
6241 {
6242 	PMD_FUNC(pmf, "bring_self_up")
6243 	dev_info_t *kept;
6244 	dev_info_t *keeper;
6245 	pm_pdr_t *dp;
6246 	int i, j;
6247 	int up = 0, circ;
6248 
6249 	kept = pm_name_to_dip(keptpath, 1);
6250 	if (kept == NULL)
6251 		return;
6252 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6253 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6254 		if (dp->pdr_kept_count == 0)
6255 			continue;
6256 		for (i = 0; i < dp->pdr_kept_count; i++) {
6257 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6258 				continue;
6259 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6260 			if (keeper) {
6261 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6262 				    pmf, PM_DEVICE(keeper)))
6263 				PM_LOCK_POWER(keeper, &circ);
6264 				for (j = 0; j < PM_NUMCMPTS(keeper);
6265 				    j++) {
6266 					if (PM_CURPOWER(keeper, j)) {
6267 						PMD(PMD_KEEPS, ("%s: comp="
6268 						    "%d is up\n", pmf, j))
6269 						up++;
6270 					}
6271 				}
6272 				if (up) {
6273 					if (PM_SKBU(kept))
6274 						DEVI(kept)->devi_pm_flags &=
6275 						    ~PMC_SKIP_BRINGUP;
6276 					bring_pmdep_up(kept, 1);
6277 				}
6278 				PM_UNLOCK_POWER(keeper, circ);
6279 				ddi_release_devi(keeper);
6280 			}
6281 		}
6282 	}
6283 	ddi_release_devi(kept);
6284 }
6285 
6286 static void
6287 pm_process_dep_request(pm_dep_wk_t *work)
6288 {
6289 	PMD_FUNC(pmf, "dep_req")
6290 	int ret;
6291 
6292 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6293 	    pdw_type_decode(work->pdw_type)))
6294 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6295 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6296 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6297 
6298 	switch (work->pdw_type) {
6299 	case PM_DEP_WK_POWER_ON:
6300 		/* Bring up the kept devices and put a hold on them */
6301 		bring_wekeeps_up(work->pdw_keeper);
6302 		break;
6303 	case PM_DEP_WK_POWER_OFF:
6304 		/* Release the kept devices */
6305 		pm_rele_dep(work->pdw_keeper);
6306 		break;
6307 	case PM_DEP_WK_DETACH:
6308 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6309 		break;
6310 	case PM_DEP_WK_REMOVE_DEP:
6311 		pm_discard_dependencies();
6312 		break;
6313 	case PM_DEP_WK_BRINGUP_SELF:
6314 		/*
6315 		 * We deferred satisfying our dependency till now, so satisfy
6316 		 * it again and bring ourselves up.
6317 		 */
6318 		pm_bring_self_up(work->pdw_kept);
6319 		break;
6320 	case PM_DEP_WK_RECORD_KEEPER:
6321 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6322 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6323 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6324 		break;
6325 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6326 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6327 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6328 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6329 		break;
6330 	case PM_DEP_WK_KEPT:
6331 		ret = pm_kept(work->pdw_kept);
6332 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6333 		    ret))
6334 		break;
6335 	case PM_DEP_WK_KEEPER:
6336 		ret = pm_keeper(work->pdw_keeper);
6337 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6338 		    pmf, ret))
6339 		break;
6340 	case PM_DEP_WK_ATTACH:
6341 		ret = pm_keeper(work->pdw_keeper);
6342 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6343 		    pmf, ret))
6344 		ret = pm_kept(work->pdw_kept);
6345 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6346 		    pmf, ret))
6347 		break;
6348 	case PM_DEP_WK_CHECK_KEPT:
6349 		ret = pm_is_kept(work->pdw_kept);
6350 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6351 		    pmf, work->pdw_kept, ret))
6352 		break;
6353 	case PM_DEP_WK_CPR_SUSPEND:
6354 		pm_discard_dependencies();
6355 		break;
6356 	case PM_DEP_WK_CPR_RESUME:
6357 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6358 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6359 		break;
6360 	default:
6361 		ASSERT(0);
6362 		break;
6363 	}
6364 	/*
6365 	 * Free the work structure if the requester is not waiting
6366 	 * Otherwise it is the requester's responsiblity to free it.
6367 	 */
6368 	if (!work->pdw_wait) {
6369 		if (work->pdw_keeper)
6370 			kmem_free(work->pdw_keeper,
6371 			    strlen(work->pdw_keeper) + 1);
6372 		if (work->pdw_kept)
6373 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6374 		kmem_free(work, sizeof (pm_dep_wk_t));
6375 	} else {
6376 		/*
6377 		 * Notify requester if it is waiting for it.
6378 		 */
6379 		work->pdw_ret = ret;
6380 		work->pdw_done = 1;
6381 		cv_signal(&work->pdw_cv);
6382 	}
6383 }
6384 
6385 /*
6386  * Process PM dependency requests.
6387  */
6388 static void
6389 pm_dep_thread(void)
6390 {
6391 	pm_dep_wk_t *work;
6392 	callb_cpr_t cprinfo;
6393 
6394 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6395 	    "pm_dep_thread");
6396 	for (;;) {
6397 		mutex_enter(&pm_dep_thread_lock);
6398 		if (pm_dep_thread_workq == NULL) {
6399 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6400 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6401 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6402 		}
6403 		work = pm_dep_thread_workq;
6404 		pm_dep_thread_workq = work->pdw_next;
6405 		if (pm_dep_thread_tail == work)
6406 			pm_dep_thread_tail = work->pdw_next;
6407 		mutex_exit(&pm_dep_thread_lock);
6408 		pm_process_dep_request(work);
6409 
6410 	}
6411 	/*NOTREACHED*/
6412 }
6413 
6414 /*
6415  * Set the power level of the indicated device to unknown (if it is not a
6416  * backwards compatible device), as it has just been resumed, and it won't
6417  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6418  */
6419 void
6420 pm_forget_power_level(dev_info_t *dip)
6421 {
6422 	dev_info_t *pdip = ddi_get_parent(dip);
6423 	int i, count = 0;
6424 
6425 	if (!PM_ISBC(dip)) {
6426 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6427 			count += (PM_CURPOWER(dip, i) == 0);
6428 
6429 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6430 			e_pm_hold_rele_power(pdip, count);
6431 
6432 		/*
6433 		 * Count this as a power cycle if we care
6434 		 */
6435 		if (DEVI(dip)->devi_pm_volpmd &&
6436 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6437 			DEVI(dip)->devi_pm_volpmd = 0;
6438 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6439 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6440 	}
6441 }
6442 
6443 /*
6444  * This function advises the caller whether it should make a power-off
6445  * transition at this time or not.  If the transition is not advised
6446  * at this time, the time that the next power-off transition can
6447  * be made from now is returned through "intervalp" pointer.
6448  * This function returns:
6449  *
6450  *  1  power-off advised
6451  *  0  power-off not advised, intervalp will point to seconds from
6452  *	  now that a power-off is advised.  If it is passed the number
6453  *	  of years that policy specifies the device should last,
6454  *	  a large number is returned as the time interval.
6455  *  -1  error
6456  */
6457 int
6458 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6459 {
6460 	PMD_FUNC(pmf, "pm_trans_check")
6461 	char dbuf[DC_SCSI_MFR_LEN];
6462 	struct pm_scsi_cycles *scp;
6463 	int service_years, service_weeks, full_years;
6464 	time_t now, service_seconds, tdiff;
6465 	time_t within_year, when_allowed;
6466 	char *ptr;
6467 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6468 	int cycles_diff, cycles_over;
6469 
6470 	if (datap == NULL) {
6471 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6472 		return (-1);
6473 	}
6474 
6475 	if (datap->format == DC_SCSI_FORMAT) {
6476 		/*
6477 		 * Power cycles of the scsi drives are distributed
6478 		 * over 5 years with the following percentage ratio:
6479 		 *
6480 		 *	30%, 25%, 20%, 15%, and 10%
6481 		 *
6482 		 * The power cycle quota for each year is distributed
6483 		 * linearly through out the year.  The equation for
6484 		 * determining the expected cycles is:
6485 		 *
6486 		 *	e = a * (n / y)
6487 		 *
6488 		 * e = expected cycles
6489 		 * a = allocated cycles for this year
6490 		 * n = number of seconds since beginning of this year
6491 		 * y = number of seconds in a year
6492 		 *
6493 		 * Note that beginning of the year starts the day that
6494 		 * the drive has been put on service.
6495 		 *
6496 		 * If the drive has passed its expected cycles, we
6497 		 * can determine when it can start to power cycle
6498 		 * again to keep it on track to meet the 5-year
6499 		 * life expectancy.  The equation for determining
6500 		 * when to power cycle is:
6501 		 *
6502 		 *	w = y * (c / a)
6503 		 *
6504 		 * w = when it can power cycle again
6505 		 * y = number of seconds in a year
6506 		 * c = current number of cycles
6507 		 * a = allocated cycles for the year
6508 		 *
6509 		 */
6510 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6511 
6512 		scp = &datap->un.scsi_cycles;
6513 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6514 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6515 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6516 		if (scp->ncycles < 0 || scp->flag != 0) {
6517 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6518 			return (-1);
6519 		}
6520 
6521 		if (scp->ncycles > scp->lifemax) {
6522 			*intervalp = (LONG_MAX / hz);
6523 			return (0);
6524 		}
6525 
6526 		/*
6527 		 * convert service date to time_t
6528 		 */
6529 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6530 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6531 		ptr = dbuf;
6532 		service_years = stoi(&ptr) - EPOCH_YEAR;
6533 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6534 		    DC_SCSI_WEEK_LEN);
6535 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6536 
6537 		/*
6538 		 * scsi standard does not specify WW data,
6539 		 * could be (00-51) or (01-52)
6540 		 */
6541 		ptr = dbuf;
6542 		service_weeks = stoi(&ptr);
6543 		if (service_years < 0 ||
6544 		    service_weeks < 0 || service_weeks > 52) {
6545 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6546 			    pmf, service_years, service_weeks))
6547 			return (-1);
6548 		}
6549 
6550 		/*
6551 		 * calculate service date in seconds-since-epoch,
6552 		 * adding one day for each leap-year.
6553 		 *
6554 		 * (years-since-epoch + 2) fixes integer truncation,
6555 		 * example: (8) leap-years during [1972, 2000]
6556 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6557 		 */
6558 		service_seconds = (service_years * DC_SPY) +
6559 		    (service_weeks * DC_SPW) +
6560 		    (((service_years + 2) / 4) * DC_SPD);
6561 
6562 		now = gethrestime_sec();
6563 		/*
6564 		 * since the granularity of 'svc_date' is day not second,
6565 		 * 'now' should be rounded up to full day.
6566 		 */
6567 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6568 		if (service_seconds > now) {
6569 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6570 			    "than now (%ld)!\n", pmf, service_seconds, now))
6571 			return (-1);
6572 		}
6573 
6574 		tdiff = now - service_seconds;
6575 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6576 
6577 		/*
6578 		 * NOTE - Leap years are not considered in the calculations
6579 		 * below.
6580 		 */
6581 		full_years = (tdiff / DC_SPY);
6582 		if ((full_years >= DC_SCSI_NPY) &&
6583 		    (scp->ncycles <= scp->lifemax))
6584 			return (1);
6585 
6586 		/*
6587 		 * Determine what is the normal cycle usage for the
6588 		 * device at the beginning and the end of this year.
6589 		 */
6590 		lower_bound_cycles = (!full_years) ? 0 :
6591 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6592 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6593 
6594 		if (scp->ncycles <= lower_bound_cycles)
6595 			return (1);
6596 
6597 		/*
6598 		 * The linear slope that determines how many cycles
6599 		 * are allowed this year is number of seconds
6600 		 * passed this year over total number of seconds in a year.
6601 		 */
6602 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6603 		within_year = (tdiff % DC_SPY);
6604 		cycles_allowed = lower_bound_cycles +
6605 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6606 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6607 		    full_years, within_year))
6608 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6609 		    cycles_allowed))
6610 
6611 		if (scp->ncycles <= cycles_allowed)
6612 			return (1);
6613 
6614 		/*
6615 		 * The transition is not advised now but we can
6616 		 * determine when the next transition can be made.
6617 		 *
6618 		 * Depending on how many cycles the device has been
6619 		 * over-used, we may need to skip years with
6620 		 * different percentage quota in order to determine
6621 		 * when the next transition can be made.
6622 		 */
6623 		cycles_over = (scp->ncycles - lower_bound_cycles);
6624 		while (cycles_over > cycles_diff) {
6625 			full_years++;
6626 			if (full_years >= DC_SCSI_NPY) {
6627 				*intervalp = (LONG_MAX / hz);
6628 				return (0);
6629 			}
6630 			cycles_over -= cycles_diff;
6631 			lower_bound_cycles = upper_bound_cycles;
6632 			upper_bound_cycles =
6633 			    (scp->lifemax * pcnt[full_years]) / 100;
6634 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6635 		}
6636 
6637 		/*
6638 		 * The linear slope that determines when the next transition
6639 		 * can be made is the relative position of used cycles within a
6640 		 * year over total number of cycles within that year.
6641 		 */
6642 		when_allowed = service_seconds + (full_years * DC_SPY) +
6643 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6644 		*intervalp = (when_allowed - now);
6645 		if (*intervalp > (LONG_MAX / hz))
6646 			*intervalp = (LONG_MAX / hz);
6647 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6648 		    *intervalp))
6649 		return (0);
6650 	}
6651 
6652 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6653 	return (-1);
6654 }
6655 
6656 /*
6657  * Nexus drivers call into pm framework to indicate which child driver is about
6658  * to be installed.  In some platforms, ppm may need to configure the hardware
6659  * for successful installation of a driver.
6660  */
6661 int
6662 pm_init_child(dev_info_t *dip)
6663 {
6664 	power_req_t power_req;
6665 
6666 	ASSERT(ddi_binding_name(dip));
6667 	ASSERT(ddi_get_name_addr(dip));
6668 	pm_ppm_claim(dip);
6669 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6670 		power_req.request_type = PMR_PPM_INIT_CHILD;
6671 		power_req.req.ppm_config_req.who = dip;
6672 		ASSERT(PPM(dip) != NULL);
6673 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6674 		    NULL));
6675 	} else {
6676 #ifdef DEBUG
6677 		/* pass it to the default handler so we can debug things */
6678 		power_req.request_type = PMR_PPM_INIT_CHILD;
6679 		power_req.req.ppm_config_req.who = dip;
6680 		(void) pm_ctlops(NULL, dip,
6681 		    DDI_CTLOPS_POWER, &power_req, NULL);
6682 #endif
6683 	}
6684 	return (DDI_SUCCESS);
6685 }
6686 
6687 /*
6688  * Bring parent of a node that is about to be probed up to full power, and
6689  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6690  * it is time to let it go down again
6691  */
6692 void
6693 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6694 {
6695 	int result;
6696 	power_req_t power_req;
6697 
6698 	bzero(cp, sizeof (*cp));
6699 	cp->ppc_dip = dip;
6700 
6701 	pm_ppm_claim(dip);
6702 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6703 		power_req.request_type = PMR_PPM_PRE_PROBE;
6704 		power_req.req.ppm_config_req.who = dip;
6705 		ASSERT(PPM(dip) != NULL);
6706 		(void) pm_ctlops(PPM(dip), dip,
6707 		    DDI_CTLOPS_POWER, &power_req, &result);
6708 		cp->ppc_ppm = PPM(dip);
6709 	} else {
6710 #ifdef DEBUG
6711 		/* pass it to the default handler so we can debug things */
6712 		power_req.request_type = PMR_PPM_PRE_PROBE;
6713 		power_req.req.ppm_config_req.who = dip;
6714 		(void) pm_ctlops(NULL, dip,
6715 		    DDI_CTLOPS_POWER, &power_req, &result);
6716 #endif
6717 		cp->ppc_ppm = NULL;
6718 	}
6719 }
6720 
6721 int
6722 pm_pre_config(dev_info_t *dip, char *devnm)
6723 {
6724 	PMD_FUNC(pmf, "pre_config")
6725 	int ret;
6726 
6727 	if (MDI_VHCI(dip)) {
6728 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6729 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6730 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6731 	} else if (!PM_GET_PM_INFO(dip))
6732 		return (DDI_SUCCESS);
6733 
6734 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6735 	pm_hold_power(dip);
6736 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6737 	if (ret != DDI_SUCCESS)
6738 		pm_rele_power(dip);
6739 	return (ret);
6740 }
6741 
6742 /*
6743  * This routine is called by devfs during its walk to unconfigue a node.
6744  * If the call is due to auto mod_unloads and the dip is not at its
6745  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6746  * return DDI_SUCCESS.
6747  */
6748 int
6749 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6750 {
6751 	PMD_FUNC(pmf, "pre_unconfig")
6752 	int ret;
6753 
6754 	if (MDI_VHCI(dip)) {
6755 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6756 		    PM_DEVICE(dip), flags))
6757 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6758 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6759 	} else if (!PM_GET_PM_INFO(dip))
6760 		return (DDI_SUCCESS);
6761 
6762 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6763 	    flags))
6764 	*held = 0;
6765 
6766 	/*
6767 	 * If the dip is a leaf node, don't power it up.
6768 	 */
6769 	if (!ddi_get_child(dip))
6770 		return (DDI_SUCCESS);
6771 
6772 	/*
6773 	 * Do not power up the node if it is called due to auto-modunload.
6774 	 */
6775 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6776 		return (DDI_FAILURE);
6777 
6778 	pm_hold_power(dip);
6779 	*held = 1;
6780 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6781 	if (ret != DDI_SUCCESS) {
6782 		pm_rele_power(dip);
6783 		*held = 0;
6784 	}
6785 	return (ret);
6786 }
6787 
6788 /*
6789  * Notify ppm of attach action.  Parent is already held at full power by
6790  * probe action.
6791  */
6792 void
6793 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6794 {
6795 	static char *me = "pm_pre_attach";
6796 	power_req_t power_req;
6797 	int result;
6798 
6799 	/*
6800 	 * Initialize and fill in the PPM cookie
6801 	 */
6802 	bzero(cp, sizeof (*cp));
6803 	cp->ppc_cmd = (int)cmd;
6804 	cp->ppc_ppm = PPM(dip);
6805 	cp->ppc_dip = dip;
6806 
6807 	/*
6808 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6809 	 * Power Management stuff. DDI_RESUME also has to purge it's
6810 	 * powerlevel information.
6811 	 */
6812 	switch (cmd) {
6813 	case DDI_ATTACH:
6814 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6815 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6816 			power_req.req.ppm_config_req.who = dip;
6817 			ASSERT(PPM(dip));
6818 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6819 			    &power_req, &result);
6820 		}
6821 #ifdef DEBUG
6822 		else {
6823 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6824 			power_req.req.ppm_config_req.who = dip;
6825 			(void) pm_ctlops(NULL, dip,
6826 			    DDI_CTLOPS_POWER, &power_req, &result);
6827 		}
6828 #endif
6829 		break;
6830 	case DDI_RESUME:
6831 		pm_forget_power_level(dip);
6832 
6833 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6834 			power_req.request_type = PMR_PPM_PRE_RESUME;
6835 			power_req.req.resume_req.who = cp->ppc_dip;
6836 			power_req.req.resume_req.cmd =
6837 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6838 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6839 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6840 			    DDI_CTLOPS_POWER, &power_req, &result);
6841 		}
6842 #ifdef DEBUG
6843 		else {
6844 			power_req.request_type = PMR_PPM_PRE_RESUME;
6845 			power_req.req.resume_req.who = cp->ppc_dip;
6846 			power_req.req.resume_req.cmd =
6847 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6848 			(void) pm_ctlops(NULL, cp->ppc_dip,
6849 			    DDI_CTLOPS_POWER, &power_req, &result);
6850 		}
6851 #endif
6852 		break;
6853 
6854 	case DDI_PM_RESUME:
6855 		break;
6856 
6857 	default:
6858 		panic(me);
6859 	}
6860 }
6861 
6862 /*
6863  * Nexus drivers call into pm framework to indicate which child driver is
6864  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6865  * hardware since the device driver is no longer installed.
6866  */
6867 int
6868 pm_uninit_child(dev_info_t *dip)
6869 {
6870 	power_req_t power_req;
6871 
6872 	ASSERT(ddi_binding_name(dip));
6873 	ASSERT(ddi_get_name_addr(dip));
6874 	pm_ppm_claim(dip);
6875 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6876 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6877 		power_req.req.ppm_config_req.who = dip;
6878 		ASSERT(PPM(dip));
6879 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6880 		    NULL));
6881 	} else {
6882 #ifdef DEBUG
6883 		/* pass it to the default handler so we can debug things */
6884 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6885 		power_req.req.ppm_config_req.who = dip;
6886 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6887 #endif
6888 	}
6889 	return (DDI_SUCCESS);
6890 }
6891 /*
6892  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6893  * Also notify ppm of result of probe if there is a ppm that cares
6894  */
6895 void
6896 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
6897 {
6898 	_NOTE(ARGUNUSED(probe_failed))
6899 	int result;
6900 	power_req_t power_req;
6901 
6902 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6903 		power_req.request_type = PMR_PPM_POST_PROBE;
6904 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6905 		power_req.req.ppm_config_req.result = ret;
6906 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6907 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
6908 		    &power_req, &result);
6909 	}
6910 #ifdef DEBUG
6911 	else {
6912 		power_req.request_type = PMR_PPM_POST_PROBE;
6913 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6914 		power_req.req.ppm_config_req.result = ret;
6915 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
6916 		    &power_req, &result);
6917 	}
6918 #endif
6919 }
6920 
6921 void
6922 pm_post_config(dev_info_t *dip, char *devnm)
6923 {
6924 	PMD_FUNC(pmf, "post_config")
6925 
6926 	if (MDI_VHCI(dip)) {
6927 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6928 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
6929 		return;
6930 	} else if (!PM_GET_PM_INFO(dip))
6931 		return;
6932 
6933 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6934 	pm_rele_power(dip);
6935 }
6936 
6937 void
6938 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
6939 {
6940 	PMD_FUNC(pmf, "post_unconfig")
6941 
6942 	if (MDI_VHCI(dip)) {
6943 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
6944 		    PM_DEVICE(dip), held))
6945 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
6946 		return;
6947 	} else if (!PM_GET_PM_INFO(dip))
6948 		return;
6949 
6950 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
6951 	    held))
6952 	if (!held)
6953 		return;
6954 	/*
6955 	 * We have held power in pre_unconfig, release it here.
6956 	 */
6957 	pm_rele_power(dip);
6958 }
6959 
6960 /*
6961  * Notify ppm of result of attach if there is a ppm that cares
6962  */
6963 void
6964 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
6965 {
6966 	int result;
6967 	power_req_t power_req;
6968 	dev_info_t	*dip;
6969 
6970 	if (cp->ppc_cmd != DDI_ATTACH)
6971 		return;
6972 
6973 	dip = cp->ppc_dip;
6974 
6975 	if (ret == DDI_SUCCESS) {
6976 		/*
6977 		 * Attach succeeded, so proceed to doing post-attach pm tasks
6978 		 */
6979 		if (PM_GET_PM_INFO(dip) == NULL)
6980 			(void) pm_start(dip);
6981 	} else {
6982 		/*
6983 		 * Attach may have got pm started before failing
6984 		 */
6985 		pm_stop(dip);
6986 	}
6987 
6988 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6989 		power_req.request_type = PMR_PPM_POST_ATTACH;
6990 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6991 		power_req.req.ppm_config_req.result = ret;
6992 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6993 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6994 		    DDI_CTLOPS_POWER, &power_req, &result);
6995 	}
6996 #ifdef DEBUG
6997 	else {
6998 		power_req.request_type = PMR_PPM_POST_ATTACH;
6999 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7000 		power_req.req.ppm_config_req.result = ret;
7001 		(void) pm_ctlops(NULL, cp->ppc_dip,
7002 		    DDI_CTLOPS_POWER, &power_req, &result);
7003 	}
7004 #endif
7005 }
7006 
7007 /*
7008  * Notify ppm of attach action.  Parent is already held at full power by
7009  * probe action.
7010  */
7011 void
7012 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7013 {
7014 	int result;
7015 	power_req_t power_req;
7016 
7017 	bzero(cp, sizeof (*cp));
7018 	cp->ppc_dip = dip;
7019 	cp->ppc_cmd = (int)cmd;
7020 
7021 	switch (cmd) {
7022 	case DDI_DETACH:
7023 		pm_detaching(dip);		/* suspend pm while detaching */
7024 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7025 			power_req.request_type = PMR_PPM_PRE_DETACH;
7026 			power_req.req.ppm_config_req.who = dip;
7027 			ASSERT(PPM(dip));
7028 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7029 			    &power_req, &result);
7030 			cp->ppc_ppm = PPM(dip);
7031 		} else {
7032 #ifdef DEBUG
7033 			/* pass to the default handler so we can debug things */
7034 			power_req.request_type = PMR_PPM_PRE_DETACH;
7035 			power_req.req.ppm_config_req.who = dip;
7036 			(void) pm_ctlops(NULL, dip,
7037 			    DDI_CTLOPS_POWER, &power_req, &result);
7038 #endif
7039 			cp->ppc_ppm = NULL;
7040 		}
7041 		break;
7042 
7043 	default:
7044 		break;
7045 	}
7046 }
7047 
7048 /*
7049  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7050  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7051  * make an entry to record the details, which includes certain flag settings.
7052  */
7053 static void
7054 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7055     int wasvolpmd, major_t major)
7056 {
7057 	PMD_FUNC(pmf, "record_invol_path")
7058 	major_t pm_path_to_major(char *);
7059 	size_t plen;
7060 	pm_noinvol_t *ip, *np, *pp;
7061 	pp = NULL;
7062 
7063 	plen = strlen(path) + 1;
7064 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7065 	np->ni_size = plen;
7066 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7067 	np->ni_noinvolpm = noinvolpm;
7068 	np->ni_volpmd = volpmd;
7069 	np->ni_wasvolpmd = wasvolpmd;
7070 	np->ni_flags = flags;
7071 	(void) strcpy(np->ni_path, path);
7072 	/*
7073 	 * If we haven't actually seen the node attached, it is hard to figure
7074 	 * out its major.  If we could hold the node by path, we would be much
7075 	 * happier here.
7076 	 */
7077 	if (major == (major_t)-1) {
7078 		np->ni_major = pm_path_to_major(path);
7079 	} else {
7080 		np->ni_major = major;
7081 	}
7082 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7083 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7084 		int comp = strcmp(path, ip->ni_path);
7085 		if (comp < 0) {
7086 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7087 			    pmf, path, ip->ni_path))
7088 			/* insert before current entry */
7089 			np->ni_next = ip;
7090 			if (pp) {
7091 				pp->ni_next = np;
7092 			} else {
7093 				pm_noinvol_head = np;
7094 			}
7095 			rw_exit(&pm_noinvol_rwlock);
7096 #ifdef DEBUG
7097 			if (pm_debug & PMD_NOINVOL)
7098 				pr_noinvol("record_invol_path exit0");
7099 #endif
7100 			return;
7101 		} else if (comp == 0) {
7102 			panic("%s already in pm_noinvol list", path);
7103 		}
7104 	}
7105 	/*
7106 	 * If we did not find an entry in the list that this should go before,
7107 	 * then it must go at the end
7108 	 */
7109 	if (pp) {
7110 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7111 		    pp->ni_path))
7112 		ASSERT(pp->ni_next == 0);
7113 		pp->ni_next = np;
7114 	} else {
7115 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7116 		ASSERT(!pm_noinvol_head);
7117 		pm_noinvol_head = np;
7118 	}
7119 	rw_exit(&pm_noinvol_rwlock);
7120 #ifdef DEBUG
7121 	if (pm_debug & PMD_NOINVOL)
7122 		pr_noinvol("record_invol_path exit");
7123 #endif
7124 }
7125 
7126 void
7127 pm_record_invol(dev_info_t *dip)
7128 {
7129 	char *pathbuf;
7130 	int pm_all_components_off(dev_info_t *);
7131 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7132 
7133 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7134 	(void) ddi_pathname(dip, pathbuf);
7135 
7136 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7137 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7138 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7139 
7140 	/*
7141 	 * If this child's detach will be holding up its ancestors, then we
7142 	 * allow for an exception to that if all children of this type have
7143 	 * gone down voluntarily.
7144 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7145 	 */
7146 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7147 	    dip);
7148 	kmem_free(pathbuf, MAXPATHLEN);
7149 }
7150 
7151 void
7152 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7153 {
7154 	dev_info_t *dip = cp->ppc_dip;
7155 	int result;
7156 	power_req_t power_req;
7157 
7158 	switch (cp->ppc_cmd) {
7159 	case DDI_DETACH:
7160 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7161 			power_req.request_type = PMR_PPM_POST_DETACH;
7162 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7163 			power_req.req.ppm_config_req.result = ret;
7164 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7165 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7166 			    DDI_CTLOPS_POWER, &power_req, &result);
7167 		}
7168 #ifdef DEBUG
7169 		else {
7170 			power_req.request_type = PMR_PPM_POST_DETACH;
7171 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7172 			power_req.req.ppm_config_req.result = ret;
7173 			(void) pm_ctlops(NULL, cp->ppc_dip,
7174 			    DDI_CTLOPS_POWER, &power_req, &result);
7175 		}
7176 #endif
7177 		if (ret == DDI_SUCCESS) {
7178 			/*
7179 			 * For hotplug detach we assume it is *really* gone
7180 			 */
7181 			if (cp->ppc_cmd == DDI_DETACH &&
7182 			    ((DEVI(dip)->devi_pm_flags &
7183 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7184 			    DEVI(dip)->devi_pm_noinvolpm))
7185 				pm_record_invol(dip);
7186 			DEVI(dip)->devi_pm_flags &=
7187 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7188 
7189 			/*
7190 			 * If console fb is detaching, then we don't need to
7191 			 * worry any more about it going off (pm_detaching has
7192 			 * brought up all components)
7193 			 */
7194 			if (PM_IS_CFB(dip)) {
7195 				mutex_enter(&pm_cfb_lock);
7196 				ASSERT(cfb_dip_detaching);
7197 				ASSERT(cfb_dip == NULL);
7198 				ASSERT(pm_cfb_comps_off == 0);
7199 				cfb_dip_detaching = NULL;
7200 				mutex_exit(&pm_cfb_lock);
7201 			}
7202 			pm_stop(dip);	/* make it permanent */
7203 		} else {
7204 			if (PM_IS_CFB(dip)) {
7205 				mutex_enter(&pm_cfb_lock);
7206 				ASSERT(cfb_dip_detaching);
7207 				ASSERT(cfb_dip == NULL);
7208 				ASSERT(pm_cfb_comps_off == 0);
7209 				cfb_dip = cfb_dip_detaching;
7210 				cfb_dip_detaching = NULL;
7211 				mutex_exit(&pm_cfb_lock);
7212 			}
7213 			pm_detach_failed(dip);	/* resume power management */
7214 		}
7215 		break;
7216 	case DDI_PM_SUSPEND:
7217 		break;
7218 	case DDI_SUSPEND:
7219 		break;				/* legal, but nothing to do */
7220 	default:
7221 #ifdef DEBUG
7222 		panic("pm_post_detach: unrecognized cmd %d for detach",
7223 		    cp->ppc_cmd);
7224 		/*NOTREACHED*/
7225 #else
7226 		break;
7227 #endif
7228 	}
7229 }
7230 
7231 /*
7232  * Called after vfs_mountroot has got the clock started to fix up timestamps
7233  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7234  * devices look busy but have a 0 busycnt
7235  */
7236 int
7237 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7238 {
7239 	_NOTE(ARGUNUSED(arg))
7240 
7241 	pm_info_t *info = PM_GET_PM_INFO(dip);
7242 	struct pm_component *cp;
7243 	int i;
7244 
7245 	if (!info)
7246 		return (DDI_WALK_CONTINUE);
7247 	PM_LOCK_BUSY(dip);
7248 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7249 		cp = PM_CP(dip, i);
7250 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7251 			cp->pmc_timestamp = gethrestime_sec();
7252 	}
7253 	PM_UNLOCK_BUSY(dip);
7254 	return (DDI_WALK_CONTINUE);
7255 }
7256 
7257 /*
7258  * Called at attach time to see if the device being attached has a record in
7259  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7260  * parents and set a flag in the dip
7261  */
7262 void
7263 pm_noinvol_specd(dev_info_t *dip)
7264 {
7265 	PMD_FUNC(pmf, "noinvol_specd")
7266 	char *pathbuf;
7267 	pm_noinvol_t *ip, *pp = NULL;
7268 	int wasvolpmd;
7269 	int found = 0;
7270 
7271 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7272 		return;
7273 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7274 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7275 	(void) ddi_pathname(dip, pathbuf);
7276 
7277 	PM_LOCK_DIP(dip);
7278 	DEVI(dip)->devi_pm_volpmd = 0;
7279 	DEVI(dip)->devi_pm_noinvolpm = 0;
7280 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7281 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7282 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7283 		    pmf, pathbuf, ip->ni_path))
7284 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7285 			found++;
7286 			break;
7287 		}
7288 	}
7289 	rw_exit(&pm_noinvol_rwlock);
7290 	if (!found) {
7291 		PM_UNLOCK_DIP(dip);
7292 		kmem_free(pathbuf, MAXPATHLEN);
7293 		return;
7294 	}
7295 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7296 	pp = NULL;
7297 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7298 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7299 		    pmf, pathbuf, ip->ni_path))
7300 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7301 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7302 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7303 			/*
7304 			 * Handle special case of console fb
7305 			 */
7306 			if (PM_IS_CFB(dip)) {
7307 				mutex_enter(&pm_cfb_lock);
7308 				cfb_dip = dip;
7309 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7310 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7311 				mutex_exit(&pm_cfb_lock);
7312 			}
7313 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7314 			ASSERT((DEVI(dip)->devi_pm_flags &
7315 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7316 			    DEVI(dip)->devi_pm_noinvolpm);
7317 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7318 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7319 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7320 			    ip->ni_noinvolpm, ip->ni_volpmd,
7321 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7322 			/*
7323 			 * free the entry in hopes the list will now be empty
7324 			 * and we won't have to search it any more until the
7325 			 * device detaches
7326 			 */
7327 			if (pp) {
7328 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7329 				    pmf, ip->ni_path, pp->ni_path))
7330 				pp->ni_next = ip->ni_next;
7331 			} else {
7332 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7333 				    pmf, ip->ni_path))
7334 				ASSERT(pm_noinvol_head == ip);
7335 				pm_noinvol_head = ip->ni_next;
7336 			}
7337 			PM_UNLOCK_DIP(dip);
7338 			wasvolpmd = ip->ni_wasvolpmd;
7339 			rw_exit(&pm_noinvol_rwlock);
7340 			kmem_free(ip->ni_path, ip->ni_size);
7341 			kmem_free(ip, sizeof (*ip));
7342 			/*
7343 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7344 			 * (and volpmd if appropriate)
7345 			 */
7346 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7347 			    wasvolpmd, pathbuf, dip);
7348 #ifdef DEBUG
7349 			if (pm_debug & PMD_NOINVOL)
7350 				pr_noinvol("noinvol_specd exit");
7351 #endif
7352 			kmem_free(pathbuf, MAXPATHLEN);
7353 			return;
7354 		}
7355 	}
7356 	kmem_free(pathbuf, MAXPATHLEN);
7357 	rw_exit(&pm_noinvol_rwlock);
7358 	PM_UNLOCK_DIP(dip);
7359 }
7360 
7361 int
7362 pm_all_components_off(dev_info_t *dip)
7363 {
7364 	int i;
7365 	pm_component_t *cp;
7366 
7367 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7368 		cp = PM_CP(dip, i);
7369 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7370 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7371 			return (0);
7372 	}
7373 	return (1);	/* all off */
7374 }
7375 
7376 /*
7377  * Make sure that all "no involuntary power cycles" devices are attached.
7378  * Called before doing a cpr suspend to make sure the driver has a say about
7379  * the power cycle
7380  */
7381 int
7382 pm_reattach_noinvol(void)
7383 {
7384 	PMD_FUNC(pmf, "reattach_noinvol")
7385 	pm_noinvol_t *ip;
7386 	char *path;
7387 	dev_info_t *dip;
7388 
7389 	/*
7390 	 * Prevent the modunload thread from unloading any modules until we
7391 	 * have completely stopped all kernel threads.
7392 	 */
7393 	modunload_disable();
7394 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7395 		/*
7396 		 * Forget we'v ever seen any entry
7397 		 */
7398 		ip->ni_persistent = 0;
7399 	}
7400 restart:
7401 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7402 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7403 #ifdef PMDDEBUG
7404 		major_t maj;
7405 		maj = ip->ni_major;
7406 #endif
7407 		path = ip->ni_path;
7408 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7409 			if (ip->ni_persistent) {
7410 				/*
7411 				 * If we weren't able to make this entry
7412 				 * go away, then we give up, as
7413 				 * holding/attaching the driver ought to have
7414 				 * resulted in this entry being deleted
7415 				 */
7416 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7417 				    "(%s|%d)\n", pmf, ip->ni_path,
7418 				    ddi_major_to_name(maj), (int)maj))
7419 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7420 				    ip->ni_path);
7421 				modunload_enable();
7422 				rw_exit(&pm_noinvol_rwlock);
7423 				return (0);
7424 			}
7425 			ip->ni_persistent++;
7426 			rw_exit(&pm_noinvol_rwlock);
7427 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7428 			dip = e_ddi_hold_devi_by_path(path, 0);
7429 			if (dip == NULL) {
7430 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7431 				    pmf, path, (int)maj))
7432 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7433 				    "driver", path);
7434 				modunload_enable();
7435 				return (0);
7436 			} else {
7437 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7438 				/*
7439 				 * Since the modunload thread is stopped, we
7440 				 * don't have to keep the driver held, which
7441 				 * saves a ton of bookkeeping
7442 				 */
7443 				ddi_release_devi(dip);
7444 				goto restart;
7445 			}
7446 		} else {
7447 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7448 			    pmf, ip->ni_path))
7449 			continue;
7450 		}
7451 	}
7452 	rw_exit(&pm_noinvol_rwlock);
7453 	return (1);
7454 }
7455 
7456 void
7457 pm_reattach_noinvol_fini(void)
7458 {
7459 	modunload_enable();
7460 }
7461 
7462 /*
7463  * Display pm support code
7464  */
7465 
7466 
7467 /*
7468  * console frame-buffer power-mgmt gets enabled when debugging
7469  * services are not present or console fbpm override is set
7470  */
7471 void
7472 pm_cfb_setup(const char *stdout_path)
7473 {
7474 	PMD_FUNC(pmf, "cfb_setup")
7475 	extern int obpdebug;
7476 	char *devname;
7477 	dev_info_t *dip;
7478 	int devname_len;
7479 	extern dev_info_t *fbdip;
7480 
7481 	/*
7482 	 * By virtue of this function being called (from consconfig),
7483 	 * we know stdout is a framebuffer.
7484 	 */
7485 	stdout_is_framebuffer = 1;
7486 
7487 	if (obpdebug || (boothowto & RB_DEBUG)) {
7488 		if (pm_cfb_override == 0) {
7489 			/*
7490 			 * Console is frame buffer, but we want to suppress
7491 			 * pm on it because of debugging setup
7492 			 */
7493 			pm_cfb_enabled = 0;
7494 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7495 			    "console power management.");
7496 			/*
7497 			 * however, we still need to know which is the console
7498 			 * fb in order to suppress pm on it
7499 			 */
7500 		} else {
7501 			cmn_err(CE_WARN, "Kernel debugger present: see "
7502 			    "kmdb(1M) for interaction with power management.");
7503 		}
7504 	}
7505 #ifdef DEBUG
7506 	/*
7507 	 * IF console is fb and is power managed, don't do prom_printfs from
7508 	 * pm debug macro
7509 	 */
7510 	if (pm_cfb_enabled) {
7511 		if (pm_debug)
7512 			prom_printf("pm debug output will be to log only\n");
7513 		pm_divertdebug++;
7514 	}
7515 #endif
7516 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7517 	devname_len = strlen(devname) + 1;
7518 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7519 	/* if the driver is attached */
7520 	if ((dip = fbdip) != NULL) {
7521 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7522 		    PM_DEVICE(dip)))
7523 		/*
7524 		 * We set up here as if the driver were power manageable in case
7525 		 * we get a later attach of a pm'able driver (which would result
7526 		 * in a panic later)
7527 		 */
7528 		cfb_dip = dip;
7529 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7530 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7531 		    PM_DEVICE(dip)))
7532 #ifdef DEBUG
7533 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7534 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7535 			    pmf, PM_DEVICE(dip)))
7536 		}
7537 #endif
7538 	} else {
7539 		char *ep;
7540 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7541 		pm_record_invol_path(devname,
7542 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7543 		    (major_t)-1);
7544 		for (ep = strrchr(devname, '/'); ep != devname;
7545 		    ep = strrchr(devname, '/')) {
7546 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7547 			*ep = '\0';
7548 			dip = pm_name_to_dip(devname, 0);
7549 			if (dip != NULL) {
7550 				/*
7551 				 * Walk up the tree incrementing
7552 				 * devi_pm_noinvolpm
7553 				 */
7554 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7555 				    0, 0, devname, dip);
7556 				break;
7557 			} else {
7558 				pm_record_invol_path(devname,
7559 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7560 			}
7561 		}
7562 	}
7563 	kmem_free(devname, devname_len);
7564 }
7565 
7566 void
7567 pm_cfb_rele(void)
7568 {
7569 	mutex_enter(&pm_cfb_lock);
7570 	/*
7571 	 * this call isn't using the console any  more, it is ok to take it
7572 	 * down if the count goes to 0
7573 	 */
7574 	cfb_inuse--;
7575 	mutex_exit(&pm_cfb_lock);
7576 }
7577 
7578 /*
7579  * software interrupt handler for fbpm; this function exists because we can't
7580  * bring up the frame buffer power from above lock level.  So if we need to,
7581  * we instead schedule a softint that runs this routine and takes us into
7582  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7583  */
7584 static uint_t
7585 pm_cfb_softint(caddr_t int_handler_arg)
7586 {
7587 	_NOTE(ARGUNUSED(int_handler_arg))
7588 	int rval = DDI_INTR_UNCLAIMED;
7589 
7590 	mutex_enter(&pm_cfb_lock);
7591 	if (pm_soft_pending) {
7592 		mutex_exit(&pm_cfb_lock);
7593 		debug_enter((char *)NULL);
7594 		/* acquired in debug_enter before calling pm_cfb_trigger */
7595 		pm_cfb_rele();
7596 		mutex_enter(&pm_cfb_lock);
7597 		pm_soft_pending = 0;
7598 		mutex_exit(&pm_cfb_lock);
7599 		rval = DDI_INTR_CLAIMED;
7600 	} else
7601 		mutex_exit(&pm_cfb_lock);
7602 
7603 	return (rval);
7604 }
7605 
7606 void
7607 pm_cfb_setup_intr(void)
7608 {
7609 	PMD_FUNC(pmf, "cfb_setup_intr")
7610 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7611 	void pm_cfb_check_and_powerup(void);
7612 
7613 	if (!stdout_is_framebuffer) {
7614 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7615 		return;
7616 	}
7617 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7618 #ifdef DEBUG
7619 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7620 #endif
7621 	/*
7622 	 * setup software interrupt handler
7623 	 */
7624 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7625 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7626 		panic("pm: unable to register soft intr.");
7627 
7628 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7629 }
7630 
7631 /*
7632  * Checks to see if it is safe to write to the console wrt power management
7633  * (i.e. if the console is a framebuffer, then it must be at full power)
7634  * returns 1 when power is off (power-up is needed)
7635  * returns 0 when power is on (power-up not needed)
7636  */
7637 int
7638 pm_cfb_check_and_hold(void)
7639 {
7640 	/*
7641 	 * cfb_dip is set iff console is a power manageable frame buffer
7642 	 * device
7643 	 */
7644 	extern int modrootloaded;
7645 
7646 	mutex_enter(&pm_cfb_lock);
7647 	cfb_inuse++;
7648 	ASSERT(cfb_inuse);	/* wrap? */
7649 	if (modrootloaded && cfb_dip) {
7650 		/*
7651 		 * don't power down the frame buffer, the prom is using it
7652 		 */
7653 		if (pm_cfb_comps_off) {
7654 			mutex_exit(&pm_cfb_lock);
7655 			return (1);
7656 		}
7657 	}
7658 	mutex_exit(&pm_cfb_lock);
7659 	return (0);
7660 }
7661 
7662 /*
7663  * turn on cfb power (which is known to be off).
7664  * Must be called below lock level!
7665  */
7666 void
7667 pm_cfb_powerup(void)
7668 {
7669 	pm_info_t *info;
7670 	int norm;
7671 	int ccount, ci;
7672 	int unused;
7673 #ifdef DEBUG
7674 	/*
7675 	 * Can't reenter prom_prekern, so suppress pm debug messages
7676 	 * (still go to circular buffer).
7677 	 */
7678 	mutex_enter(&pm_debug_lock);
7679 	pm_divertdebug++;
7680 	mutex_exit(&pm_debug_lock);
7681 #endif
7682 	info = PM_GET_PM_INFO(cfb_dip);
7683 	ASSERT(info);
7684 
7685 	ccount = PM_NUMCMPTS(cfb_dip);
7686 	for (ci = 0; ci < ccount; ci++) {
7687 		norm = pm_get_normal_power(cfb_dip, ci);
7688 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7689 		    PM_CANBLOCK_BYPASS, 0, &unused);
7690 	}
7691 #ifdef DEBUG
7692 	mutex_enter(&pm_debug_lock);
7693 	pm_divertdebug--;
7694 	mutex_exit(&pm_debug_lock);
7695 #endif
7696 }
7697 
7698 /*
7699  * Check if the console framebuffer is powered up.  If not power it up.
7700  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7701  * must be released by calling pm_cfb_rele when the console fb operation
7702  * is completed.
7703  */
7704 void
7705 pm_cfb_check_and_powerup(void)
7706 {
7707 	if (pm_cfb_check_and_hold())
7708 		pm_cfb_powerup();
7709 }
7710 
7711 /*
7712  * Trigger a low level interrupt to power up console frame buffer.
7713  */
7714 void
7715 pm_cfb_trigger(void)
7716 {
7717 	if (cfb_dip == NULL)
7718 		return;
7719 
7720 	mutex_enter(&pm_cfb_lock);
7721 	/*
7722 	 * If machine appears to be hung, pulling the keyboard connector of
7723 	 * the console will cause a high level interrupt and go to debug_enter.
7724 	 * But, if the fb is powered down, this routine will be called to bring
7725 	 * it up (by generating a softint to do the work).  If soft interrupts
7726 	 * are not running, and the keyboard connector is pulled again, the
7727 	 * following code detects this condition and calls panic which allows
7728 	 * the fb to be brought up from high level.
7729 	 *
7730 	 * If two nearly simultaneous calls to debug_enter occur (both from
7731 	 * high level) the code described above will cause a panic.
7732 	 */
7733 	if (lbolt <= pm_soft_pending) {
7734 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7735 		panic(panicstr);	/* does a power up at any intr level */
7736 		/* NOTREACHED */
7737 	}
7738 	pm_soft_pending = lbolt;
7739 	mutex_exit(&pm_cfb_lock);
7740 	ddi_trigger_softintr(pm_soft_id);
7741 }
7742 
7743 major_t
7744 pm_path_to_major(char *path)
7745 {
7746 	PMD_FUNC(pmf, "path_to_major")
7747 	char *np, *ap, *bp;
7748 	major_t ret;
7749 	size_t len;
7750 	static major_t i_path_to_major(char *, char *);
7751 
7752 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7753 
7754 	np = strrchr(path, '/');
7755 	if (np != NULL)
7756 		np++;
7757 	else
7758 		np = path;
7759 	len = strlen(np) + 1;
7760 	bp = kmem_alloc(len, KM_SLEEP);
7761 	(void) strcpy(bp, np);
7762 	if ((ap = strchr(bp, '@')) != NULL) {
7763 		*ap = '\0';
7764 	}
7765 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7766 	ret = i_path_to_major(path, np);
7767 	kmem_free(bp, len);
7768 	return (ret);
7769 }
7770 
7771 #ifdef DEBUG
7772 
7773 char *pm_msgp;
7774 char *pm_bufend;
7775 char *pm_msgbuf = NULL;
7776 int   pm_logpages = 2;
7777 
7778 #define	PMLOGPGS	pm_logpages
7779 
7780 /*PRINTFLIKE1*/
7781 void
7782 pm_log(const char *fmt, ...)
7783 {
7784 	va_list adx;
7785 	size_t size;
7786 
7787 	mutex_enter(&pm_debug_lock);
7788 	if (pm_msgbuf == NULL) {
7789 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7790 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7791 		pm_msgp = pm_msgbuf;
7792 	}
7793 	va_start(adx, fmt);
7794 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7795 	va_end(adx);
7796 	va_start(adx, fmt);
7797 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7798 		bzero(pm_msgp, pm_bufend - pm_msgp);
7799 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7800 		if (!pm_divertdebug)
7801 			prom_printf("%s", pm_msgp);
7802 		pm_msgp = pm_msgbuf + size;
7803 	} else {
7804 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7805 		if (!pm_divertdebug)
7806 			prom_printf("%s", pm_msgp);
7807 		pm_msgp += size;
7808 	}
7809 	va_end(adx);
7810 	mutex_exit(&pm_debug_lock);
7811 }
7812 #endif	/* DEBUG */
7813 
7814 /*
7815  * We want to save the state of any directly pm'd devices over the suspend/
7816  * resume process so that we can put them back the way the controlling
7817  * process left them.
7818  */
7819 void
7820 pm_save_direct_levels(void)
7821 {
7822 	pm_processes_stopped = 1;
7823 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7824 }
7825 
7826 static int
7827 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7828 {
7829 	_NOTE(ARGUNUSED(arg))
7830 	int i;
7831 	int *ip;
7832 	pm_info_t *info = PM_GET_PM_INFO(dip);
7833 
7834 	if (!info)
7835 		return (DDI_WALK_CONTINUE);
7836 
7837 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7838 		if (PM_NUMCMPTS(dip) > 2) {
7839 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7840 			    sizeof (int), KM_SLEEP);
7841 			ip = info->pmi_lp;
7842 		} else {
7843 			ip = info->pmi_levels;
7844 		}
7845 		/* autopm and processes are stopped, ok not to lock power */
7846 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7847 			*ip++ = PM_CURPOWER(dip, i);
7848 		/*
7849 		 * There is a small window between stopping the
7850 		 * processes and setting pm_processes_stopped where
7851 		 * a driver could get hung up in a pm_raise_power()
7852 		 * call.  Free any such driver now.
7853 		 */
7854 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7855 	}
7856 
7857 	return (DDI_WALK_CONTINUE);
7858 }
7859 
7860 void
7861 pm_restore_direct_levels(void)
7862 {
7863 	/*
7864 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
7865 	 * threads failed) then we don't want to try to restore them
7866 	 */
7867 	if (!pm_processes_stopped)
7868 		return;
7869 
7870 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
7871 	pm_processes_stopped = 0;
7872 }
7873 
7874 static int
7875 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
7876 {
7877 	_NOTE(ARGUNUSED(arg))
7878 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
7879 	int i, nc, result;
7880 	int *ip;
7881 
7882 	pm_info_t *info = PM_GET_PM_INFO(dip);
7883 	if (!info)
7884 		return (DDI_WALK_CONTINUE);
7885 
7886 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7887 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
7888 			ip = &info->pmi_lp[nc - 1];
7889 		} else {
7890 			ip = &info->pmi_levels[nc - 1];
7891 		}
7892 		/*
7893 		 * Because fb drivers fail attempts to turn off the
7894 		 * fb when the monitor is on, but treat a request to
7895 		 * turn on the monitor as a request to turn on the
7896 		 * fb too, we process components in descending order
7897 		 * Because autopm is disabled and processes aren't
7898 		 * running, it is ok to examine current power outside
7899 		 * of the power lock
7900 		 */
7901 		for (i = nc - 1; i >= 0; i--, ip--) {
7902 			if (PM_CURPOWER(dip, i) == *ip)
7903 				continue;
7904 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
7905 			    PM_CANBLOCK_BYPASS, 0, &result) !=
7906 				DDI_SUCCESS) {
7907 				cmn_err(CE_WARN, "cpr: unable "
7908 				    "to restore power level of "
7909 				    "component %d of directly "
7910 				    "power manged device %s@%s"
7911 				    " to %d",
7912 				    i, PM_NAME(dip),
7913 				    PM_ADDR(dip), *ip);
7914 				PMD(PMD_FAIL, ("%s: failed to restore "
7915 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
7916 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
7917 				    PM_CURPOWER(dip, i), *ip, result))
7918 			}
7919 		}
7920 		if (nc > 2) {
7921 			kmem_free(info->pmi_lp, nc * sizeof (int));
7922 			info->pmi_lp = NULL;
7923 		}
7924 	}
7925 	return (DDI_WALK_CONTINUE);
7926 }
7927 
7928 /*
7929  * Stolen from the bootdev module
7930  * attempt to convert a path to a major number
7931  */
7932 static major_t
7933 i_path_to_major(char *path, char *leaf_name)
7934 {
7935 	extern major_t path_to_major(char *pathname);
7936 	major_t maj;
7937 
7938 	if ((maj = path_to_major(path)) == (major_t)-1) {
7939 		maj = ddi_name_to_major(leaf_name);
7940 	}
7941 
7942 	return (maj);
7943 }
7944 
7945 /*
7946  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
7947  * An entry in the list means that the device is detached, so we need to
7948  * adjust its ancestors as if they had just seen this attach, and any detached
7949  * ancestors need to have their list entries adjusted.
7950  */
7951 void
7952 pm_driver_removed(major_t major)
7953 {
7954 	static void i_pm_driver_removed(major_t major);
7955 
7956 	/*
7957 	 * Serialize removal of drivers. This is to keep ancestors of
7958 	 * a node that is being deleted from getting deleted and added back
7959 	 * with different counters.
7960 	 */
7961 	mutex_enter(&pm_remdrv_lock);
7962 	i_pm_driver_removed(major);
7963 	mutex_exit(&pm_remdrv_lock);
7964 }
7965 
7966 /*
7967  * This routine is called recursively by pm_noinvol_process_ancestors()
7968  */
7969 static void
7970 i_pm_driver_removed(major_t major)
7971 {
7972 	PMD_FUNC(pmf, "driver_removed")
7973 	static void adjust_ancestors(char *, int);
7974 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
7975 	static void pm_noinvol_process_ancestors(char *);
7976 	pm_noinvol_t *ip, *pp = NULL;
7977 	int wasvolpmd;
7978 	ASSERT(major != (major_t)-1);
7979 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
7980 again:
7981 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7982 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7983 		if (major != ip->ni_major)
7984 			continue;
7985 		/*
7986 		 * If it is an ancestor of no-invol node, which is
7987 		 * not removed, skip it. This is to cover the case of
7988 		 * ancestor removed without removing its descendants.
7989 		 */
7990 		if (pm_is_noinvol_ancestor(ip)) {
7991 			ip->ni_flags |= PMC_DRIVER_REMOVED;
7992 			continue;
7993 		}
7994 		wasvolpmd = ip->ni_wasvolpmd;
7995 		/*
7996 		 * remove the entry from the list
7997 		 */
7998 		if (pp) {
7999 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8000 			    pmf, ip->ni_path, pp->ni_path))
8001 			pp->ni_next = ip->ni_next;
8002 		} else {
8003 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8004 			    ip->ni_path))
8005 			ASSERT(pm_noinvol_head == ip);
8006 			pm_noinvol_head = ip->ni_next;
8007 		}
8008 		rw_exit(&pm_noinvol_rwlock);
8009 		adjust_ancestors(ip->ni_path, wasvolpmd);
8010 		/*
8011 		 * Had an ancestor been removed before this node, it would have
8012 		 * been skipped. Adjust the no-invol counters for such skipped
8013 		 * ancestors.
8014 		 */
8015 		pm_noinvol_process_ancestors(ip->ni_path);
8016 		kmem_free(ip->ni_path, ip->ni_size);
8017 		kmem_free(ip, sizeof (*ip));
8018 		goto again;
8019 	}
8020 	rw_exit(&pm_noinvol_rwlock);
8021 }
8022 
8023 /*
8024  * returns 1, if *aip is a ancestor of a no-invol node
8025  *	   0, otherwise
8026  */
8027 static int
8028 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8029 {
8030 	pm_noinvol_t *ip;
8031 
8032 	ASSERT(strlen(aip->ni_path) != 0);
8033 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8034 		if (ip == aip)
8035 			continue;
8036 		/*
8037 		 * To be an ancestor, the path must be an initial substring of
8038 		 * the descendent, and end just before a '/' in the
8039 		 * descendent's path.
8040 		 */
8041 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8042 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8043 			return (1);
8044 	}
8045 	return (0);
8046 }
8047 
8048 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8049 /*
8050  * scan through the pm_noinvolpm list adjusting ancestors of the current
8051  * node;  Modifies string *path.
8052  */
8053 static void
8054 adjust_ancestors(char *path, int wasvolpmd)
8055 {
8056 	PMD_FUNC(pmf, "adjust_ancestors")
8057 	char *cp;
8058 	pm_noinvol_t *lp;
8059 	pm_noinvol_t *pp = NULL;
8060 	major_t locked = (major_t)UINT_MAX;
8061 	dev_info_t *dip;
8062 	char	*pathbuf;
8063 	size_t pathbuflen = strlen(path) + 1;
8064 
8065 	/*
8066 	 * First we look up the ancestor's dip.  If we find it, then we
8067 	 * adjust counts up the tree
8068 	 */
8069 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8070 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8071 	(void) strcpy(pathbuf, path);
8072 	cp = strrchr(pathbuf, '/');
8073 	if (cp == NULL)	{
8074 		/* if no ancestors, then nothing to do */
8075 		kmem_free(pathbuf, pathbuflen);
8076 		return;
8077 	}
8078 	*cp = '\0';
8079 	dip = pm_name_to_dip(pathbuf, 1);
8080 	if (dip != NULL) {
8081 		locked = PM_MAJOR(dip);
8082 
8083 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8084 		    path, dip);
8085 
8086 		if (locked != (major_t)UINT_MAX)
8087 			ddi_release_devi(dip);
8088 	} else {
8089 		char *apath;
8090 		size_t len = strlen(pathbuf) + 1;
8091 		int  lock_held = 1;
8092 
8093 		/*
8094 		 * Now check for ancestors that exist only in the list
8095 		 */
8096 		apath = kmem_alloc(len, KM_SLEEP);
8097 		(void) strcpy(apath, pathbuf);
8098 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8099 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8100 			/*
8101 			 * This can only happen once.  Since we have to drop
8102 			 * the lock, we need to extract the relevant info.
8103 			 */
8104 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8105 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8106 				    lp->ni_path, lp->ni_noinvolpm,
8107 				    lp->ni_noinvolpm - 1))
8108 				lp->ni_noinvolpm--;
8109 				if (wasvolpmd && lp->ni_volpmd) {
8110 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8111 					    "%d\n", pmf, lp->ni_path,
8112 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8113 					lp->ni_volpmd--;
8114 				}
8115 				/*
8116 				 * remove the entry from the list, if there
8117 				 * are no more no-invol descendants and node
8118 				 * itself is not a no-invol node.
8119 				 */
8120 				if (!(lp->ni_noinvolpm ||
8121 				    (lp->ni_flags & PMC_NO_INVOL))) {
8122 					ASSERT(lp->ni_volpmd == 0);
8123 					if (pp) {
8124 						PMD(PMD_NOINVOL, ("%s: freeing "
8125 						    "%s, prev is %s\n", pmf,
8126 						    lp->ni_path, pp->ni_path))
8127 						pp->ni_next = lp->ni_next;
8128 					} else {
8129 						PMD(PMD_NOINVOL, ("%s: free %s "
8130 						    "head\n", pmf, lp->ni_path))
8131 						ASSERT(pm_noinvol_head == lp);
8132 						pm_noinvol_head = lp->ni_next;
8133 					}
8134 					lock_held = 0;
8135 					rw_exit(&pm_noinvol_rwlock);
8136 					adjust_ancestors(apath, wasvolpmd);
8137 					/* restore apath */
8138 					(void) strcpy(apath, pathbuf);
8139 					kmem_free(lp->ni_path, lp->ni_size);
8140 					kmem_free(lp, sizeof (*lp));
8141 				}
8142 				break;
8143 			}
8144 		}
8145 		if (lock_held)
8146 			rw_exit(&pm_noinvol_rwlock);
8147 		adjust_ancestors(apath, wasvolpmd);
8148 		kmem_free(apath, len);
8149 	}
8150 	kmem_free(pathbuf, pathbuflen);
8151 }
8152 
8153 /*
8154  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8155  * which were skipped even though their drivers were removed.
8156  */
8157 static void
8158 pm_noinvol_process_ancestors(char *path)
8159 {
8160 	pm_noinvol_t *lp;
8161 
8162 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8163 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8164 		if (strstr(path, lp->ni_path) &&
8165 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8166 			rw_exit(&pm_noinvol_rwlock);
8167 			i_pm_driver_removed(lp->ni_major);
8168 			return;
8169 		}
8170 	}
8171 	rw_exit(&pm_noinvol_rwlock);
8172 }
8173 
8174 /*
8175  * Returns true if (detached) device needs to be kept up because it exported the
8176  * "no-involuntary-power-cycles" property or we're pretending it did (console
8177  * fb case) or it is an ancestor of such a device and has used up the "one
8178  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8179  * upon detach.  In any event, we need an exact hit on the path or we return
8180  * false.
8181  */
8182 int
8183 pm_noinvol_detached(char *path)
8184 {
8185 	PMD_FUNC(pmf, "noinvol_detached")
8186 	pm_noinvol_t *ip;
8187 	int ret = 0;
8188 
8189 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8190 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8191 		if (strcmp(path, ip->ni_path) == 0) {
8192 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8193 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8194 				    "%s\n", pmf, path))
8195 				ret = 1;
8196 				break;
8197 			}
8198 #ifdef	DEBUG
8199 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8200 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8201 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8202 				    path))
8203 #endif
8204 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8205 			break;
8206 		}
8207 	}
8208 	rw_exit(&pm_noinvol_rwlock);
8209 	return (ret);
8210 }
8211 
8212 int
8213 pm_is_cfb(dev_info_t *dip)
8214 {
8215 	return (dip == cfb_dip);
8216 }
8217 
8218 #ifdef	DEBUG
8219 /*
8220  * Return true if all components of the console frame buffer are at
8221  * "normal" power, i.e., fully on.  For the case where the console is not
8222  * a framebuffer, we also return true
8223  */
8224 int
8225 pm_cfb_is_up(void)
8226 {
8227 	return (pm_cfb_comps_off == 0);
8228 }
8229 #endif
8230 
8231 /*
8232  * Preventing scan from powering down the node by incrementing the
8233  * kidsupcnt.
8234  */
8235 void
8236 pm_hold_power(dev_info_t *dip)
8237 {
8238 	e_pm_hold_rele_power(dip, 1);
8239 }
8240 
8241 /*
8242  * Releasing the hold by decrementing the kidsupcnt allowing scan
8243  * to power down the node if all conditions are met.
8244  */
8245 void
8246 pm_rele_power(dev_info_t *dip)
8247 {
8248 	e_pm_hold_rele_power(dip, -1);
8249 }
8250 
8251 /*
8252  * A wrapper of pm_all_to_normal() to power up a dip
8253  * to its normal level
8254  */
8255 int
8256 pm_powerup(dev_info_t *dip)
8257 {
8258 	PMD_FUNC(pmf, "pm_powerup")
8259 
8260 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8261 	ASSERT(!(servicing_interrupt()));
8262 
8263 	/*
8264 	 * in case this node is not already participating pm
8265 	 */
8266 	if (!PM_GET_PM_INFO(dip)) {
8267 		if (!DEVI_IS_ATTACHING(dip))
8268 			return (DDI_SUCCESS);
8269 		if (pm_start(dip) != DDI_SUCCESS)
8270 			return (DDI_FAILURE);
8271 		if (!PM_GET_PM_INFO(dip))
8272 			return (DDI_SUCCESS);
8273 	}
8274 
8275 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8276 }
8277 
8278 int
8279 pm_rescan_walk(dev_info_t *dip, void *arg)
8280 {
8281 	_NOTE(ARGUNUSED(arg))
8282 
8283 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8284 		return (DDI_WALK_CONTINUE);
8285 
8286 	/*
8287 	 * Currently pm_cpr_callb/resume code is the only caller
8288 	 * and it needs to make sure that stopped scan get
8289 	 * reactivated. Otherwise, rescan walk needn't reactive
8290 	 * stopped scan.
8291 	 */
8292 	pm_scan_init(dip);
8293 
8294 	(void) pm_rescan(dip);
8295 	return (DDI_WALK_CONTINUE);
8296 }
8297 
8298 static dev_info_t *
8299 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8300 {
8301 	dev_info_t *wdip, *pdip;
8302 
8303 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8304 		pdip = ddi_get_parent(wdip);
8305 		if (pdip == dip)
8306 			return (wdip);
8307 	}
8308 	return (NULL);
8309 }
8310 
8311 int
8312 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8313     void *arg, void *result)
8314 {
8315 	PMD_FUNC(pmf, "bp_bus_power")
8316 	dev_info_t	*cdip;
8317 	pm_info_t	*cinfo;
8318 	pm_bp_child_pwrchg_t	*bpc;
8319 	pm_sp_misc_t		*pspm;
8320 	pm_bp_nexus_pwrup_t *bpn;
8321 	pm_bp_child_pwrchg_t new_bpc;
8322 	pm_bp_noinvol_t *bpi;
8323 	dev_info_t *tdip;
8324 	char *pathbuf;
8325 	int		ret = DDI_SUCCESS;
8326 	int		errno = 0;
8327 	pm_component_t *cp;
8328 
8329 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8330 	    pm_decode_op(op)))
8331 	switch (op) {
8332 	case BUS_POWER_CHILD_PWRCHG:
8333 		bpc = (pm_bp_child_pwrchg_t *)arg;
8334 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8335 		tdip = bpc->bpc_dip;
8336 		cdip = pm_get_next_descendent(dip, tdip);
8337 		cinfo = PM_GET_PM_INFO(cdip);
8338 		if (cdip != tdip) {
8339 			/*
8340 			 * If the node is an involved parent, it needs to
8341 			 * power up the node as it is needed.  There is nothing
8342 			 * else the framework can do here.
8343 			 */
8344 			if (PM_WANTS_NOTIFICATION(cdip)) {
8345 				PMD(PMD_SET, ("%s: call bus_power for "
8346 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8347 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8348 				    impl_arg, op, arg, result));
8349 			}
8350 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8351 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8352 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8353 			/*
8354 			 * we presume that the parent needs to be up in
8355 			 * order for the child to change state (either
8356 			 * because it must already be on if the child is on
8357 			 * (and the pm_all_to_normal_nexus() will be a nop)
8358 			 * or because it will need to be on for the child
8359 			 * to come on; so we make the call regardless
8360 			 */
8361 			pm_hold_power(cdip);
8362 			if (cinfo) {
8363 				pm_canblock_t canblock = pspm->pspm_canblock;
8364 				ret = pm_all_to_normal_nexus(cdip, canblock);
8365 				if (ret != DDI_SUCCESS) {
8366 					pm_rele_power(cdip);
8367 					return (ret);
8368 				}
8369 			}
8370 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8371 			    PM_DEVICE(cdip)))
8372 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8373 			    result);
8374 			pm_rele_power(cdip);
8375 		} else {
8376 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8377 			    result);
8378 		}
8379 		return (ret);
8380 
8381 	case BUS_POWER_NEXUS_PWRUP:
8382 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8383 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8384 
8385 		if (!e_pm_valid_info(dip, NULL) ||
8386 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8387 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8388 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8389 			    pmf, PM_DEVICE(dip)))
8390 			*pspm->pspm_errnop = EIO;
8391 			*(int *)result = DDI_FAILURE;
8392 			return (DDI_FAILURE);
8393 		}
8394 
8395 		ASSERT(bpn->bpn_dip == dip);
8396 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8397 		    PM_DEVICE(dip)))
8398 		new_bpc.bpc_dip = dip;
8399 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8400 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8401 		new_bpc.bpc_comp = bpn->bpn_comp;
8402 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8403 		new_bpc.bpc_nlevel = bpn->bpn_level;
8404 		new_bpc.bpc_private = bpn->bpn_private;
8405 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8406 		    PM_LEVEL_UPONLY;
8407 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8408 		    &errno;
8409 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8410 		    (void *)&new_bpc, result);
8411 		kmem_free(pathbuf, MAXPATHLEN);
8412 		return (ret);
8413 
8414 	case BUS_POWER_NOINVOL:
8415 		bpi = (pm_bp_noinvol_t *)arg;
8416 		tdip = bpi->bpni_dip;
8417 		cdip = pm_get_next_descendent(dip, tdip);
8418 
8419 		/* In case of rem_drv, the leaf node has been removed */
8420 		if (cdip == NULL)
8421 			return (DDI_SUCCESS);
8422 
8423 		cinfo = PM_GET_PM_INFO(cdip);
8424 		if (cdip != tdip) {
8425 			if (PM_WANTS_NOTIFICATION(cdip)) {
8426 				PMD(PMD_NOINVOL,
8427 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8428 				    pmf, PM_DEVICE(cdip)))
8429 				ret = (*PM_BUS_POWER_FUNC(cdip))
8430 				    (cdip, NULL, op, arg, result);
8431 				if ((cinfo) && (ret == DDI_SUCCESS))
8432 					(void) pm_noinvol_update_node(cdip,
8433 					    bpi);
8434 				return (ret);
8435 			} else {
8436 				PMD(PMD_NOINVOL,
8437 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8438 				    PM_DEVICE(cdip)))
8439 				ret = pm_busop_bus_power(cdip, NULL, op,
8440 				    arg, result);
8441 				/*
8442 				 * Update the current node.
8443 				 */
8444 				if ((cinfo) && (ret == DDI_SUCCESS))
8445 					(void) pm_noinvol_update_node(cdip,
8446 					    bpi);
8447 				return (ret);
8448 			}
8449 		} else {
8450 			/*
8451 			 * For attach, detach, power up:
8452 			 * Do nothing for leaf node since its
8453 			 * counts are already updated.
8454 			 * For CFB and driver removal, since the
8455 			 * path and the target dip passed in is up to and incl.
8456 			 * the immediate ancestor, need to do the update.
8457 			 */
8458 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8459 			    "reached\n", pmf, PM_DEVICE(cdip)))
8460 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8461 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8462 				(void) pm_noinvol_update_node(cdip, bpi);
8463 			return (DDI_SUCCESS);
8464 		}
8465 
8466 	default:
8467 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8468 		return (DDI_FAILURE);
8469 	}
8470 }
8471 
8472 static int
8473 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8474     void *arg, void *resultp)
8475 {
8476 	_NOTE(ARGUNUSED(impl_arg))
8477 	PMD_FUNC(pmf, "bp_set_power")
8478 	pm_ppm_devlist_t *devl;
8479 	int clevel, circ;
8480 #ifdef	DEBUG
8481 	int circ_db, ccirc_db;
8482 #endif
8483 	int ret = DDI_SUCCESS;
8484 	dev_info_t *cdip;
8485 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8486 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8487 	pm_canblock_t canblock = pspm->pspm_canblock;
8488 	int scan = pspm->pspm_scan;
8489 	int comp = bpc->bpc_comp;
8490 	int olevel = bpc->bpc_olevel;
8491 	int nlevel = bpc->bpc_nlevel;
8492 	int comps_off_incr = 0;
8493 	dev_info_t *pdip = ddi_get_parent(dip);
8494 	int dodeps;
8495 	int direction = pspm->pspm_direction;
8496 	int *errnop = pspm->pspm_errnop;
8497 #ifdef PMDDEBUG
8498 	char *dir = pm_decode_direction(direction);
8499 #endif
8500 	int *iresp = (int *)resultp;
8501 	time_t	idletime, thresh;
8502 	pm_component_t *cp = PM_CP(dip, comp);
8503 	int work_type;
8504 
8505 	*iresp = DDI_SUCCESS;
8506 	*errnop = 0;
8507 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8508 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8509 	    pm_decode_op(op)))
8510 
8511 	/*
8512 	 * The following set of conditions indicate we are here to handle a
8513 	 * driver's pm_[raise|lower]_power request, but the device is being
8514 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8515 	 * we want to pm_block and pass a status back to the caller based
8516 	 * on whether the controlling process's next activity on the device
8517 	 * matches the current request or not.  This distinction tells
8518 	 * downstream functions to avoid calling into a driver or changing
8519 	 * the framework's power state.  To actually block, we need:
8520 	 *
8521 	 * PM_ISDIRECT(dip)
8522 	 *	no reason to block unless a process is directly controlling dev
8523 	 * direction != PM_LEVEL_EXACT
8524 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8525 	 * !pm_processes_stopped
8526 	 *	don't block if controlling proc already be stopped for cpr
8527 	 * canblock != PM_CANBLOCK_BYPASS
8528 	 *	our caller must not have explicitly prevented blocking
8529 	 */
8530 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8531 		PM_LOCK_DIP(dip);
8532 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8533 			/* releases dip lock */
8534 			ret = pm_busop_match_request(dip, bpc);
8535 			if (ret == EAGAIN) {
8536 				PM_LOCK_DIP(dip);
8537 				continue;
8538 			}
8539 			return (*iresp = ret);
8540 		}
8541 		PM_UNLOCK_DIP(dip);
8542 	}
8543 	/* BC device is never scanned, so power will stick until we are done */
8544 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8545 	    direction != PM_LEVEL_DOWNONLY) {
8546 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8547 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8548 		    canblock, 0, resultp) != DDI_SUCCESS) {
8549 			/* *resultp set by pm_set_power */
8550 			return (DDI_FAILURE);
8551 		}
8552 	}
8553 	if (PM_WANTS_NOTIFICATION(pdip)) {
8554 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8555 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8556 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8557 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8558 		if (ret != DDI_SUCCESS) {
8559 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8560 			    pmf, PM_DEVICE(pdip)))
8561 			return (DDI_FAILURE);
8562 		}
8563 	} else {
8564 		/*
8565 		 * Since we don't know what the actual power level is,
8566 		 * we place a power hold on the parent no matter what
8567 		 * component and level is changing.
8568 		 */
8569 		pm_hold_power(pdip);
8570 	}
8571 	PM_LOCK_POWER(dip, &circ);
8572 	clevel = PM_CURPOWER(dip, comp);
8573 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8574 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8575 	    clevel, dir))
8576 	switch (direction) {
8577 	case PM_LEVEL_UPONLY:
8578 		/* Powering up */
8579 		if (clevel >= nlevel) {
8580 			PMD(PMD_SET, ("%s: current level is already "
8581 			    "at or above the requested level.\n", pmf))
8582 			*iresp = DDI_SUCCESS;
8583 			ret = DDI_SUCCESS;
8584 			goto post_notify;
8585 		}
8586 		break;
8587 	case PM_LEVEL_EXACT:
8588 		/* specific level request */
8589 		if (clevel == nlevel && !PM_ISBC(dip)) {
8590 			PMD(PMD_SET, ("%s: current level is already "
8591 			    "at the requested level.\n", pmf))
8592 			*iresp = DDI_SUCCESS;
8593 			ret = DDI_SUCCESS;
8594 			goto post_notify;
8595 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8596 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8597 			if (!pm_cfb_enabled) {
8598 				PMD(PMD_ERROR | PMD_CFB,
8599 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8600 				*errnop = EINVAL;
8601 				*iresp = DDI_FAILURE;
8602 				ret = DDI_FAILURE;
8603 				goto post_notify;
8604 			}
8605 			mutex_enter(&pm_cfb_lock);
8606 			while (cfb_inuse) {
8607 				mutex_exit(&pm_cfb_lock);
8608 				if (delay_sig(1) == EINTR) {
8609 					ret = DDI_FAILURE;
8610 					*iresp = DDI_FAILURE;
8611 					*errnop = EINTR;
8612 					goto post_notify;
8613 				}
8614 				mutex_enter(&pm_cfb_lock);
8615 			}
8616 			mutex_exit(&pm_cfb_lock);
8617 		}
8618 		break;
8619 	case PM_LEVEL_DOWNONLY:
8620 		/* Powering down */
8621 		thresh = cur_threshold(dip, comp);
8622 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8623 		if (scan && ((PM_KUC(dip) != 0) ||
8624 		    (cp->pmc_busycount > 0) ||
8625 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8626 #ifdef	DEBUG
8627 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8628 				PMD(PMD_SET, ("%s: scan failed: "
8629 				    "kidsupcnt != 0\n", pmf))
8630 			if (cp->pmc_busycount > 0)
8631 				PMD(PMD_SET, ("%s: scan failed: "
8632 				    "device become busy\n", pmf))
8633 			if (idletime < thresh)
8634 				PMD(PMD_SET, ("%s: scan failed: device "
8635 				    "hasn't been idle long enough\n", pmf))
8636 #endif
8637 			*iresp = DDI_FAILURE;
8638 			*errnop = EBUSY;
8639 			ret = DDI_FAILURE;
8640 			goto post_notify;
8641 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8642 			PMD(PMD_SET, ("%s: current level is already at "
8643 			    "or below the requested level.\n", pmf))
8644 			*iresp = DDI_SUCCESS;
8645 			ret = DDI_SUCCESS;
8646 			goto post_notify;
8647 		}
8648 		break;
8649 	}
8650 
8651 	if (PM_IS_CFB(dip) && (comps_off_incr =
8652 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8653 		/*
8654 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8655 		 * component from full power.  Remember that we tried to
8656 		 * lower power in case it fails and we need to back out
8657 		 * the adjustment.
8658 		 */
8659 		update_comps_off(comps_off_incr, dip);
8660 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8661 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8662 		    pm_cfb_comps_off))
8663 	}
8664 
8665 	if ((*iresp = power_dev(dip,
8666 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8667 #ifdef DEBUG
8668 		/*
8669 		 * All descendents of this node should already be powered off.
8670 		 */
8671 		if (PM_CURPOWER(dip, comp) == 0) {
8672 			pm_desc_pwrchk_t pdpchk;
8673 			pdpchk.pdpc_dip = dip;
8674 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8675 			ndi_devi_enter(dip, &circ_db);
8676 			for (cdip = ddi_get_child(dip); cdip != NULL;
8677 			    cdip = ddi_get_next_sibling(cdip)) {
8678 				ndi_devi_enter(cdip, &ccirc_db);
8679 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8680 				    (void *)&pdpchk);
8681 				ndi_devi_exit(cdip, ccirc_db);
8682 			}
8683 			ndi_devi_exit(dip, circ_db);
8684 		}
8685 #endif
8686 		/*
8687 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8688 		 * back up to full power.
8689 		 */
8690 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8691 			update_comps_off(comps_off_incr, dip);
8692 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8693 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8694 			    comp, clevel, nlevel, pm_cfb_comps_off))
8695 		}
8696 		dodeps = 0;
8697 		if (POWERING_OFF(clevel, nlevel)) {
8698 			if (PM_ISBC(dip)) {
8699 				dodeps = (comp == 0);
8700 			} else {
8701 				int i;
8702 				dodeps = 1;
8703 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8704 					/* if some component still on */
8705 					if (PM_CURPOWER(dip, i)) {
8706 						dodeps = 0;
8707 						break;
8708 					}
8709 				}
8710 			}
8711 			if (dodeps)
8712 				work_type = PM_DEP_WK_POWER_OFF;
8713 		} else if (POWERING_ON(clevel, nlevel)) {
8714 			if (PM_ISBC(dip)) {
8715 				dodeps = (comp == 0);
8716 			} else {
8717 				int i;
8718 				dodeps = 1;
8719 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8720 					if (i == comp)
8721 						continue;
8722 					if (PM_CURPOWER(dip, i) > 0) {
8723 						dodeps = 0;
8724 						break;
8725 					}
8726 				}
8727 			}
8728 			if (dodeps)
8729 				work_type = PM_DEP_WK_POWER_ON;
8730 		}
8731 
8732 		if (dodeps) {
8733 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8734 
8735 			(void) ddi_pathname(dip, pathbuf);
8736 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8737 			    PM_DEP_NOWAIT, NULL, 0);
8738 			kmem_free(pathbuf, MAXPATHLEN);
8739 		}
8740 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8741 			int old;
8742 
8743 			/* If old power cached during deadlock, use it. */
8744 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8745 			    cp->pmc_phc_pwr : olevel);
8746 			mutex_enter(&pm_rsvp_lock);
8747 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8748 			    old, canblock);
8749 			pm_enqueue_notify_others(&devl, canblock);
8750 			mutex_exit(&pm_rsvp_lock);
8751 		}
8752 
8753 		/*
8754 		 * If we are coming from a scan, don't do it again,
8755 		 * else we can have infinite loops.
8756 		 */
8757 		if (!scan)
8758 			pm_rescan(dip);
8759 	} else {
8760 		/* if we incremented pm_comps_off_count, but failed */
8761 		if (comps_off_incr > 0) {
8762 			update_comps_off(-comps_off_incr, dip);
8763 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8764 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8765 			    comp, clevel, nlevel, pm_cfb_comps_off))
8766 		}
8767 		*errnop = EIO;
8768 	}
8769 
8770 post_notify:
8771 	/*
8772 	 * This thread may have been in deadlock with pm_power_has_changed.
8773 	 * Before releasing power lock, clear the flag which marks this
8774 	 * condition.
8775 	 */
8776 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8777 
8778 	/*
8779 	 * Update the old power level in the bus power structure with the
8780 	 * actual power level before the transition was made to the new level.
8781 	 * Some involved parents depend on this information to keep track of
8782 	 * their children's power transition.
8783 	 */
8784 	if (*iresp != DDI_FAILURE)
8785 		bpc->bpc_olevel = clevel;
8786 
8787 	if (PM_WANTS_NOTIFICATION(pdip)) {
8788 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8789 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8790 		PM_UNLOCK_POWER(dip, circ);
8791 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8792 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8793 		    PM_DEVICE(dip), ret))
8794 	} else {
8795 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8796 		PM_UNLOCK_POWER(dip, circ);
8797 		/*
8798 		 * Now that we know what power transition has occurred
8799 		 * (if any), release the power hold.  Leave the hold
8800 		 * in effect in the case of OFF->ON transition.
8801 		 */
8802 		if (!(clevel == 0 && nlevel > 0 &&
8803 		    (!PM_ISBC(dip) || comp == 0)))
8804 			pm_rele_power(pdip);
8805 		/*
8806 		 * If the power transition was an ON->OFF transition,
8807 		 * remove the power hold from the parent.
8808 		 */
8809 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8810 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8811 			pm_rele_power(pdip);
8812 	}
8813 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8814 		return (DDI_FAILURE);
8815 	else
8816 		return (DDI_SUCCESS);
8817 }
8818 
8819 /*
8820  * If an app (SunVTS or Xsun) has taken control, then block until it
8821  * gives it up or makes the requested power level change, unless
8822  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8823  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8824  */
8825 static int
8826 pm_busop_match_request(dev_info_t *dip, void *arg)
8827 {
8828 	PMD_FUNC(pmf, "bp_match_request")
8829 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8830 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8831 	int comp = bpc->bpc_comp;
8832 	int nlevel = bpc->bpc_nlevel;
8833 	pm_canblock_t canblock = pspm->pspm_canblock;
8834 	int direction = pspm->pspm_direction;
8835 	int clevel, circ;
8836 
8837 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8838 	PM_LOCK_POWER(dip, &circ);
8839 	clevel = PM_CURPOWER(dip, comp);
8840 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8841 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8842 	if (direction == PM_LEVEL_UPONLY) {
8843 		if (clevel >= nlevel) {
8844 			PM_UNLOCK_POWER(dip, circ);
8845 			PM_UNLOCK_DIP(dip);
8846 			return (DDI_SUCCESS);
8847 		}
8848 	} else if (clevel == nlevel) {
8849 		PM_UNLOCK_POWER(dip, circ);
8850 		PM_UNLOCK_DIP(dip);
8851 		return (DDI_SUCCESS);
8852 	}
8853 	if (canblock == PM_CANBLOCK_FAIL) {
8854 		PM_UNLOCK_POWER(dip, circ);
8855 		PM_UNLOCK_DIP(dip);
8856 		return (DDI_FAILURE);
8857 	}
8858 	if (canblock == PM_CANBLOCK_BLOCK) {
8859 		/*
8860 		 * To avoid a deadlock, we must not hold the
8861 		 * power lock when we pm_block.
8862 		 */
8863 		PM_UNLOCK_POWER(dip, circ);
8864 		PMD(PMD_SET, ("%s: blocking\n", pmf))
8865 		    /* pm_block releases dip lock */
8866 		    switch (pm_block(dip, comp, nlevel, clevel)) {
8867 		    case PMP_RELEASE:
8868 				return (EAGAIN);
8869 		    case PMP_SUCCEED:
8870 				return (DDI_SUCCESS);
8871 		    case PMP_FAIL:
8872 				return (DDI_FAILURE);
8873 		    }
8874 	} else {
8875 		ASSERT(0);
8876 	}
8877 	_NOTE(NOTREACHED);
8878 	return (DDI_FAILURE);	/* keep gcc happy */
8879 }
8880 
8881 static int
8882 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
8883 {
8884 	PMD_FUNC(pmf, "all_to_normal_nexus")
8885 	int		*normal;
8886 	int		i, ncomps;
8887 	size_t		size;
8888 	int		changefailed = 0;
8889 	int		ret, result = DDI_SUCCESS;
8890 	pm_bp_nexus_pwrup_t	bpn;
8891 	pm_sp_misc_t	pspm;
8892 
8893 	ASSERT(PM_GET_PM_INFO(dip));
8894 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8895 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
8896 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
8897 		return (DDI_FAILURE);
8898 	}
8899 	ncomps = PM_NUMCMPTS(dip);
8900 	for (i = 0; i < ncomps; i++) {
8901 		bpn.bpn_dip = dip;
8902 		bpn.bpn_comp = i;
8903 		bpn.bpn_level = normal[i];
8904 		pspm.pspm_canblock = canblock;
8905 		pspm.pspm_scan = 0;
8906 		bpn.bpn_private = &pspm;
8907 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
8908 		    (void *)&bpn, (void *)&result);
8909 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
8910 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
8911 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
8912 			    i, normal[i], result))
8913 			changefailed++;
8914 		}
8915 	}
8916 	kmem_free(normal, size);
8917 	if (changefailed) {
8918 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
8919 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
8920 		return (DDI_FAILURE);
8921 	}
8922 	return (DDI_SUCCESS);
8923 }
8924 
8925 int
8926 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
8927     dev_info_t *tdip)
8928 {
8929 	PMD_FUNC(pmf, "noinvol_update")
8930 	pm_bp_noinvol_t args;
8931 	int ret;
8932 	int result = DDI_SUCCESS;
8933 
8934 	args.bpni_path = path;
8935 	args.bpni_dip = tdip;
8936 	args.bpni_cmd = subcmd;
8937 	args.bpni_wasvolpmd = wasvolpmd;
8938 	args.bpni_volpmd = volpmd;
8939 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
8940 	    "volpmd %d wasvolpmd %d\n", pmf,
8941 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
8942 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
8943 	    &args, &result);
8944 	return (ret);
8945 }
8946 
8947 void
8948 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
8949 {
8950 	PMD_FUNC(pmf, "noinvol_update_node")
8951 
8952 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8953 	switch (req->bpni_cmd) {
8954 	case PM_BP_NOINVOL_ATTACH:
8955 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
8956 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8957 		    DEVI(dip)->devi_pm_noinvolpm,
8958 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8959 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8960 		PM_LOCK_DIP(dip);
8961 		DEVI(dip)->devi_pm_noinvolpm--;
8962 		if (req->bpni_wasvolpmd) {
8963 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
8964 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8965 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8966 			    DEVI(dip)->devi_pm_volpmd - 1))
8967 			if (DEVI(dip)->devi_pm_volpmd)
8968 				DEVI(dip)->devi_pm_volpmd--;
8969 		}
8970 		PM_UNLOCK_DIP(dip);
8971 		break;
8972 
8973 	case PM_BP_NOINVOL_DETACH:
8974 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
8975 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
8976 		    DEVI(dip)->devi_pm_noinvolpm,
8977 		    DEVI(dip)->devi_pm_noinvolpm + 1))
8978 		PM_LOCK_DIP(dip);
8979 		DEVI(dip)->devi_pm_noinvolpm++;
8980 		if (req->bpni_wasvolpmd) {
8981 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
8982 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8983 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8984 			    DEVI(dip)->devi_pm_volpmd + 1))
8985 			DEVI(dip)->devi_pm_volpmd++;
8986 		}
8987 		PM_UNLOCK_DIP(dip);
8988 		break;
8989 
8990 	case PM_BP_NOINVOL_REMDRV:
8991 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8992 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8993 		    DEVI(dip)->devi_pm_noinvolpm,
8994 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8995 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8996 		PM_LOCK_DIP(dip);
8997 		DEVI(dip)->devi_pm_noinvolpm--;
8998 		if (req->bpni_wasvolpmd) {
8999 			PMD(PMD_NOINVOL,
9000 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9001 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9002 			    DEVI(dip)->devi_pm_volpmd,
9003 			    DEVI(dip)->devi_pm_volpmd - 1))
9004 			/*
9005 			 * A power up could come in between and
9006 			 * clear the volpmd, if that's the case,
9007 			 * volpmd would be clear.
9008 			 */
9009 			if (DEVI(dip)->devi_pm_volpmd)
9010 				DEVI(dip)->devi_pm_volpmd--;
9011 		}
9012 		PM_UNLOCK_DIP(dip);
9013 		break;
9014 
9015 	case PM_BP_NOINVOL_CFB:
9016 		PMD(PMD_NOINVOL,
9017 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9018 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9019 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9020 		PM_LOCK_DIP(dip);
9021 		DEVI(dip)->devi_pm_noinvolpm++;
9022 		PM_UNLOCK_DIP(dip);
9023 		break;
9024 
9025 	case PM_BP_NOINVOL_POWER:
9026 		PMD(PMD_NOINVOL,
9027 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9028 		    pmf, PM_DEVICE(dip),
9029 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9030 		    req->bpni_volpmd))
9031 		PM_LOCK_DIP(dip);
9032 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9033 		PM_UNLOCK_DIP(dip);
9034 		break;
9035 
9036 	default:
9037 		break;
9038 	}
9039 
9040 }
9041 
9042 #ifdef DEBUG
9043 static int
9044 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9045 {
9046 	PMD_FUNC(pmf, "desc_pwrchk")
9047 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9048 	pm_info_t *info = PM_GET_PM_INFO(dip);
9049 	int i, curpwr, ce_level;
9050 
9051 	if (!info)
9052 		return (DDI_WALK_CONTINUE);
9053 
9054 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9055 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9056 		curpwr = PM_CURPOWER(dip, i);
9057 		if (curpwr == 0)
9058 			continue;
9059 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9060 		    CE_WARN;
9061 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9062 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9063 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9064 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9065 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9066 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9067 	}
9068 	return (DDI_WALK_CONTINUE);
9069 }
9070 #endif
9071 
9072 /*
9073  * Record the fact that one thread is borrowing the lock on a device node.
9074  * Use is restricted to the case where the lending thread will block until
9075  * the borrowing thread (always curthread) completes.
9076  */
9077 void
9078 pm_borrow_lock(kthread_t *lender)
9079 {
9080 	lock_loan_t *prev = &lock_loan_head;
9081 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9082 
9083 	cur->pmlk_borrower = curthread;
9084 	cur->pmlk_lender = lender;
9085 	mutex_enter(&pm_loan_lock);
9086 	cur->pmlk_next = prev->pmlk_next;
9087 	prev->pmlk_next = cur;
9088 	mutex_exit(&pm_loan_lock);
9089 }
9090 
9091 /*
9092  * Return the borrowed lock.  A thread can borrow only one.
9093  */
9094 void
9095 pm_return_lock(void)
9096 {
9097 	lock_loan_t *cur;
9098 	lock_loan_t *prev = &lock_loan_head;
9099 
9100 	mutex_enter(&pm_loan_lock);
9101 	ASSERT(prev->pmlk_next != NULL);
9102 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9103 		if (cur->pmlk_borrower == curthread)
9104 			break;
9105 
9106 	ASSERT(cur != NULL);
9107 	prev->pmlk_next = cur->pmlk_next;
9108 	mutex_exit(&pm_loan_lock);
9109 	kmem_free(cur, sizeof (*cur));
9110 }
9111