xref: /illumos-gate/usr/src/uts/common/os/sunpm.c (revision 618b6b99eb6eee4272ca949f5ac45efb4425f02c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sunpm.c builds sunpm.o	"power management framework"
30  *	kernel-resident power management code.  Implements power management
31  *	policy
32  *	Assumes: all backwards compat. device components wake up on &
33  *		 the pm_info pointer in dev_info is initially NULL
34  *
35  * PM - (device) Power Management
36  *
37  * Each device may have 0 or more components.  If a device has no components,
38  * then it can't be power managed.  Each component has 2 or more
39  * power states.
40  *
41  * "Backwards Compatible" (bc) devices:
42  * There are two different types of devices from the point of view of this
43  * code.  The original type, left over from the original PM implementation on
44  * the voyager platform are known in this code as "backwards compatible"
45  * devices (PM_ISBC(dip) returns true).
46  * They are recognized by the pm code by the lack of a pm-components property
47  * and a call made by the driver to pm_create_components(9F).
48  * For these devices, component 0 is special, and represents the power state
49  * of the device.  If component 0 is to be set to power level 0 (off), then
50  * the framework must first call into the driver's detach(9E) routine with
51  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
52  * After setting component 0 from 0 to a non-zero power level, a call must be
53  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
54  *
55  * Currently, the only way to get a bc device power managed is via a set of
56  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
57  *
58  * For non-bc devices, the driver describes the components by exporting a
59  * pm-components(9P) property that tells how many components there are,
60  * tells what each component's power state values are, and provides human
61  * readable strings (currently unused) for each component name and power state.
62  * Devices which export pm-components(9P) are automatically power managed
63  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
64  * after parsing power.conf(4)). The exception to this rule is that power
65  * manageable CPU devices may be automatically managed independently of autopm
66  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
67  * ioctls) cpupm. If the CPU devices are not managed independently, then they
68  * are managed by autopm. In either case, for automatically power managed
69  * devices, all components are considered independent of each other, and it is
70  * up to the driver to decide when a transition requires saving or restoring
71  * hardware state.
72  *
73  * Each device component also has a threshold time associated with each power
74  * transition (see power.conf(4)), and a busy/idle state maintained by the
75  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
76  * Components are created idle.
77  *
78  * The PM framework provides several functions:
79  * -implement PM policy as described in power.conf(4)
80  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
81  *  Policies consist of:
82  *    -set threshold values (defaults if none provided by pmconfig)
83  *    -set dependencies among devices
84  *    -enable/disable autopm
85  *    -enable/disable cpupm
86  *    -turn down idle components based on thresholds (if autopm or cpupm is
87  *     enabled) (aka scanning)
88  *    -maintain power states based on dependencies among devices
89  *    -upon request, or when the frame buffer powers off, attempt to turn off
90  *     all components that are idle or become idle over the next (10 sec)
91  *     period in an attempt to get down to an EnergyStar compliant state
92  *    -prevent powering off of a device which exported the
93  *     pm-no-involuntary-power-cycles property without active involvement of
94  *     the device's driver (so no removing power when the device driver is
95  *     not attached)
96  * -provide a mechanism for a device driver to request that a device's component
97  *  be brought back to the power level necessary for the use of the device
98  * -allow a process to directly control the power levels of device components
99  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
100  * -ensure that the console frame buffer is powered up before being referenced
101  *  via prom_printf() or other prom calls that might generate console output
102  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
103  * -provide "backwards compatible" behavior for devices without pm-components
104  *  property
105  *
106  * Scanning:
107  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
108  * component of each managed device to its lowest power based on the threshold
109  * of idleness associated with each transition and the busy/idle state of the
110  * component.
111  *
112  * The actual work of this is done by pm_scan_dev(), which cycles through each
113  * component of a device, checking its idleness against its current threshold,
114  * and calling pm_set_power() as appropriate to change the power level.
115  * This function also indicates when it would next be profitable to scan the
116  * device again, and a new scan is scheduled after that time.
117  *
118  * Dependencies:
119  * It is possible to establish a dependency between the power states of two
120  * otherwise unrelated devices.  This is currently done to ensure that the
121  * cdrom is always up whenever the console framebuffer is up, so that the user
122  * can insert a cdrom and see a popup as a result.
123  *
124  * The dependency terminology used in power.conf(4) is not easy to understand,
125  * so we've adopted a different terminology in the implementation.  We write
126  * of a "keeps up" and a "kept up" device.  A relationship can be established
127  * where one device keeps up another.  That means that if the keepsup device
128  * has any component that is at a non-zero power level, all components of the
129  * "kept up" device must be brought to full power.  This relationship is
130  * asynchronous.  When the keeping device is powered up, a request is queued
131  * to a worker thread to bring up the kept device.  The caller does not wait.
132  * Scan will not turn down a kept up device.
133  *
134  * Direct PM:
135  * A device may be directly power managed by a process.  If a device is
136  * directly pm'd, then it will not be scanned, and dependencies will not be
137  * enforced.  * If a directly pm'd device's driver requests a power change (via
138  * pm_raise_power(9F)), then the request is blocked and notification is sent
139  * to the controlling process, which must issue the requested power change for
140  * the driver to proceed.
141  *
142  */
143 
144 #include <sys/types.h>
145 #include <sys/errno.h>
146 #include <sys/callb.h>		/* callback registration during CPR */
147 #include <sys/conf.h>		/* driver flags and functions */
148 #include <sys/open.h>		/* OTYP_CHR definition */
149 #include <sys/stat.h>		/* S_IFCHR definition */
150 #include <sys/pathname.h>	/* name -> dev_info xlation */
151 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
152 #include <sys/kmem.h>		/* memory alloc stuff */
153 #include <sys/debug.h>
154 #include <sys/archsystm.h>
155 #include <sys/pm.h>
156 #include <sys/ddi.h>
157 #include <sys/sunddi.h>
158 #include <sys/sunndi.h>
159 #include <sys/sunpm.h>
160 #include <sys/epm.h>
161 #include <sys/vfs.h>
162 #include <sys/mode.h>
163 #include <sys/mkdev.h>
164 #include <sys/promif.h>
165 #include <sys/consdev.h>
166 #include <sys/esunddi.h>
167 #include <sys/modctl.h>
168 #include <sys/fs/ufs_fs.h>
169 #include <sys/note.h>
170 #include <sys/taskq.h>
171 #include <sys/bootconf.h>
172 #include <sys/reboot.h>
173 #include <sys/spl.h>
174 #include <sys/disp.h>
175 #include <sys/sobject.h>
176 #include <sys/sunmdi.h>
177 
178 
179 /*
180  * PM LOCKING
181  *	The list of locks:
182  * Global pm mutex locks.
183  *
184  * pm_scan_lock:
185  *		It protects the timeout id of the scan thread, and the value
186  *		of autopm_enabled and cpupm.  This lock is not held
187  *		concurrently with any other PM locks.
188  *
189  * pm_clone_lock:	Protects the clone list and count of poll events
190  *		pending for the pm driver.
191  *		Lock ordering:
192  *			pm_clone_lock -> pm_pscc_interest_rwlock,
193  *			pm_clone_lock -> pm_pscc_direct_rwlock.
194  *
195  * pm_rsvp_lock:
196  *		Used to synchronize the data structures used for processes
197  *		to rendezvous with state change information when doing
198  *		direct PM.
199  *		Lock ordering:
200  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
201  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
202  *			pm_rsvp_lock -> pm_clone_lock.
203  *
204  * ppm_lock:	protects the list of registered ppm drivers
205  *		Lock ordering:
206  *			ppm_lock -> ppm driver unit_lock
207  *
208  * pm_compcnt_lock:
209  *		Protects count of components that are not at their lowest
210  *		power level.
211  *		Lock ordering:
212  *			pm_compcnt_lock -> ppm_lock.
213  *
214  * pm_dep_thread_lock:
215  *		Protects work list for pm_dep_thread.  Not taken concurrently
216  *		with any other pm lock.
217  *
218  * pm_remdrv_lock:
219  *		Serializes the operation of removing noinvol data structure
220  *		entries for a branch of the tree when a driver has been
221  *		removed from the system (modctl_rem_major).
222  *		Lock ordering:
223  *			pm_remdrv_lock -> pm_noinvol_rwlock.
224  *
225  * pm_cfb_lock: (High level spin lock)
226  *		Protects the count of how many components of the console
227  *		frame buffer are off (so we know if we have to bring up the
228  *		console as a result of a prom_printf, etc.
229  *		No other locks are taken while holding this lock.
230  *
231  * pm_loan_lock:
232  *		Protects the lock_loan list.  List is used to record that one
233  *		thread has acquired a power lock but has launched another thread
234  *		to complete its processing.  An entry in the list indicates that
235  *		the worker thread can borrow the lock held by the other thread,
236  *		which must block on the completion of the worker.  Use is
237  *		specific to module loading.
238  *		No other locks are taken while holding this lock.
239  *
240  * Global PM rwlocks
241  *
242  * pm_thresh_rwlock:
243  *		Protects the list of thresholds recorded for future use (when
244  *		devices attach).
245  *		Lock ordering:
246  *			pm_thresh_rwlock -> devi_pm_lock
247  *
248  * pm_noinvol_rwlock:
249  *		Protects list of detached nodes that had noinvol registered.
250  *		No other PM locks are taken while holding pm_noinvol_rwlock.
251  *
252  * pm_pscc_direct_rwlock:
253  *		Protects the list that maps devices being directly power
254  *		managed to the processes that manage them.
255  *		Lock ordering:
256  *			pm_pscc_direct_rwlock -> psce_lock
257  *
258  * pm_pscc_interest_rwlock;
259  *		Protects the list that maps state change events to processes
260  *		that want to know about them.
261  *		Lock ordering:
262  *			pm_pscc_interest_rwlock -> psce_lock
263  *
264  * per-dip locks:
265  *
266  * Each node has these per-dip locks, which are only used if the device is
267  * a candidate for power management (e.g. has pm components)
268  *
269  * devi_pm_lock:
270  *		Protects all power management state of the node except for
271  *		power level, which is protected by ndi_devi_enter().
272  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
273  *		Lock ordering:
274  *			devi_pm_lock -> pm_rsvp_lock,
275  *			devi_pm_lock -> pm_dep_thread_lock,
276  *			devi_pm_lock -> pm_noinvol_rwlock,
277  *			devi_pm_lock -> power lock
278  *
279  * power lock (ndi_devi_enter()):
280  *		Since changing power level is possibly a slow operation (30
281  *		seconds to spin up a disk drive), this is locked separately.
282  *		Since a call into the driver to change the power level of one
283  *		component may result in a call back into the framework to change
284  *		the power level of another, this lock allows re-entrancy by
285  *		the same thread (ndi_devi_enter is used for this because
286  *		the USB framework uses ndi_devi_enter in its power entry point,
287  *		and use of any other lock would produce a deadlock.
288  *
289  * devi_pm_busy_lock:
290  *		This lock protects the integrity of the busy count.  It is
291  *		only taken by pm_busy_component() and pm_idle_component and
292  *		some code that adjust the busy time after the timer gets set
293  *		up or after a CPR operation.  It is per-dip to keep from
294  *		single-threading all the disk drivers on a system.
295  *		It could be per component instead, but most devices have
296  *		only one component.
297  *		No other PM locks are taken while holding this lock.
298  *
299  */
300 
301 static int stdout_is_framebuffer;
302 static kmutex_t	e_pm_power_lock;
303 static kmutex_t pm_loan_lock;
304 kmutex_t	pm_scan_lock;
305 callb_id_t	pm_cpr_cb_id;
306 callb_id_t	pm_panic_cb_id;
307 callb_id_t	pm_halt_cb_id;
308 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
309 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
310 
311 clock_t pm_min_scan = PM_MIN_SCAN;
312 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
313 
314 static int pm_busop_set_power(dev_info_t *,
315     void *, pm_bus_power_op_t, void *, void *);
316 static int pm_busop_match_request(dev_info_t *, void *);
317 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
318 static void e_pm_set_max_power(dev_info_t *, int, int);
319 static int e_pm_get_max_power(dev_info_t *, int);
320 
321 /*
322  * Dependency Processing is done thru a seperate thread.
323  */
324 kmutex_t	pm_dep_thread_lock;
325 kcondvar_t	pm_dep_thread_cv;
326 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
327 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
328 
329 /*
330  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
331  * power managing things in single user mode that have been suppressed via
332  * power.conf entries.  Protected by pm_scan_lock.
333  */
334 int		autopm_enabled;
335 
336 /*
337  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
338  * to define the power management behavior of CPU devices separate from
339  * autopm. Protected by pm_scan_lock.
340  */
341 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
342 
343 /*
344  * This flag is true while processes are stopped for a checkpoint/resume.
345  * Controlling processes of direct pm'd devices are not available to
346  * participate in power level changes, so we bypass them when this is set.
347  */
348 static int	pm_processes_stopped;
349 
350 #ifdef	DEBUG
351 
352 /*
353  * see common/sys/epm.h for PMD_* values
354  */
355 uint_t		pm_debug = 0;
356 
357 /*
358  * If pm_divertdebug is set, then no prom_printf calls will be made by
359  * PMD(), which will prevent debug output from bringing up the console
360  * frame buffer.  Clearing this variable before setting pm_debug will result
361  * in PMD output going to the console.
362  *
363  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
364  * deadlocks and decremented at the end of pm_set_power()
365  */
366 uint_t		pm_divertdebug = 1;
367 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
368 
369 void prdeps(char *);
370 #endif
371 
372 /* Globals */
373 
374 /*
375  * List of recorded thresholds and dependencies
376  */
377 pm_thresh_rec_t *pm_thresh_head;
378 krwlock_t pm_thresh_rwlock;
379 
380 pm_pdr_t *pm_dep_head;
381 static int pm_unresolved_deps = 0;
382 static int pm_prop_deps = 0;
383 
384 /*
385  * List of devices that exported no-involuntary-power-cycles property
386  */
387 pm_noinvol_t *pm_noinvol_head;
388 
389 /*
390  * Locks used in noinvol processing
391  */
392 krwlock_t pm_noinvol_rwlock;
393 kmutex_t pm_remdrv_lock;
394 
395 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
396 int pm_system_idle_threshold;
397 int pm_cpu_idle_threshold;
398 
399 /*
400  * By default nexus has 0 threshold, and depends on its children to keep it up
401  */
402 int pm_default_nexus_threshold = 0;
403 
404 /*
405  * Data structures shared with common/io/pm.c
406  */
407 kmutex_t	pm_clone_lock;
408 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
409 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
410 unsigned char	pm_interest[PM_MAX_CLONE];
411 struct pollhead	pm_pollhead;
412 
413 extern int	hz;
414 extern char	*platform_module_list[];
415 
416 /*
417  * Wrappers for use in ddi_walk_devs
418  */
419 
420 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
421 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
422 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
423 static int		pm_discard_dep_walk(dev_info_t *, void *);
424 #ifdef DEBUG
425 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
426 #endif
427 
428 /*
429  * Routines for managing noinvol devices
430  */
431 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
432 void			pm_noinvol_update_node(dev_info_t *,
433 			    pm_bp_noinvol_t *req);
434 
435 kmutex_t pm_rsvp_lock;
436 kmutex_t pm_compcnt_lock;
437 krwlock_t pm_pscc_direct_rwlock;
438 krwlock_t pm_pscc_interest_rwlock;
439 
440 #define	PSC_INTEREST	0	/* belongs to interest psc list */
441 #define	PSC_DIRECT	1	/* belongs to direct psc list */
442 
443 pscc_t *pm_pscc_interest;
444 pscc_t *pm_pscc_direct;
445 
446 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
447 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
448 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
449 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
450 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
451 
452 #define	PM_INCR_NOTLOWEST(dip) {					\
453 	mutex_enter(&pm_compcnt_lock);					\
454 	if (!PM_IS_NEXUS(dip) ||					\
455 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
456 		if (pm_comps_notlowest == 0)				\
457 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
458 		pm_comps_notlowest++;					\
459 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
460 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
461 	}								\
462 	mutex_exit(&pm_compcnt_lock);					\
463 }
464 #define	PM_DECR_NOTLOWEST(dip) {					\
465 	mutex_enter(&pm_compcnt_lock);					\
466 	if (!PM_IS_NEXUS(dip) ||					\
467 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
468 		ASSERT(pm_comps_notlowest);				\
469 		pm_comps_notlowest--;					\
470 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
471 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
472 		if (pm_comps_notlowest == 0)				\
473 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
474 	}								\
475 	mutex_exit(&pm_compcnt_lock);					\
476 }
477 
478 /*
479  * console frame-buffer power-management is not enabled when
480  * debugging services are present.  to override, set pm_cfb_override
481  * to non-zero.
482  */
483 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
484 kmutex_t pm_cfb_lock;
485 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
486 #ifdef DEBUG
487 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
488 #else
489 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
490 #endif
491 
492 static dev_info_t *cfb_dip = 0;
493 static dev_info_t *cfb_dip_detaching = 0;
494 uint_t cfb_inuse = 0;
495 static ddi_softintr_t pm_soft_id;
496 static clock_t pm_soft_pending;
497 int	pm_scans_disabled = 0;
498 
499 /*
500  * A structure to record the fact that one thread has borrowed a lock held
501  * by another thread.  The context requires that the lender block on the
502  * completion of the borrower.
503  */
504 typedef struct lock_loan {
505 	struct lock_loan	*pmlk_next;
506 	kthread_t		*pmlk_borrower;
507 	kthread_t		*pmlk_lender;
508 	dev_info_t		*pmlk_dip;
509 } lock_loan_t;
510 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
511 
512 #ifdef	DEBUG
513 #ifdef PMDDEBUG
514 #define	PMD_FUNC(func, name)	char *(func) = (name);
515 #else
516 #define	PMD_FUNC(func, name)
517 #endif
518 #else
519 #define	PMD_FUNC(func, name)
520 #endif
521 
522 
523 /*
524  * Must be called before first device (including pseudo) attach
525  */
526 void
527 pm_init_locks(void)
528 {
529 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
530 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
531 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
532 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
533 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
534 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
535 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
536 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
537 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
538 }
539 
540 static boolean_t
541 pm_cpr_callb(void *arg, int code)
542 {
543 	_NOTE(ARGUNUSED(arg))
544 	static int auto_save;
545 	static pm_cpupm_t cpupm_save;
546 	static int pm_reset_timestamps(dev_info_t *, void *);
547 
548 	switch (code) {
549 	case CB_CODE_CPR_CHKPT:
550 		/*
551 		 * Cancel scan or wait for scan in progress to finish
552 		 * Other threads may be trying to restart the scan, so we
553 		 * have to keep at it unil it sticks
554 		 */
555 		mutex_enter(&pm_scan_lock);
556 		ASSERT(!pm_scans_disabled);
557 		pm_scans_disabled = 1;
558 		auto_save = autopm_enabled;
559 		autopm_enabled = 0;
560 		cpupm_save = cpupm;
561 		cpupm = PM_CPUPM_NOTSET;
562 		mutex_exit(&pm_scan_lock);
563 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
564 		break;
565 
566 	case CB_CODE_CPR_RESUME:
567 		ASSERT(!autopm_enabled);
568 		ASSERT(cpupm == PM_CPUPM_NOTSET);
569 		ASSERT(pm_scans_disabled);
570 		pm_scans_disabled = 0;
571 		/*
572 		 * Call pm_reset_timestamps to reset timestamps of each
573 		 * device to the time when the system is resumed so that their
574 		 * idleness can be re-calculated. That's to avoid devices from
575 		 * being powered down right after resume if the system was in
576 		 * suspended mode long enough.
577 		 */
578 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
579 
580 		autopm_enabled = auto_save;
581 		cpupm = cpupm_save;
582 		/*
583 		 * If there is any auto-pm device, get the scanning
584 		 * going. Otherwise don't bother.
585 		 */
586 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
587 		break;
588 	}
589 	return (B_TRUE);
590 }
591 
592 /*
593  * This callback routine is called when there is a system panic.  This function
594  * exists for prototype matching.
595  */
596 static boolean_t
597 pm_panic_callb(void *arg, int code)
598 {
599 	_NOTE(ARGUNUSED(arg, code))
600 	void pm_cfb_check_and_powerup(void);
601 	PMD(PMD_CFB, ("pm_panic_callb\n"))
602 	pm_cfb_check_and_powerup();
603 	return (B_TRUE);
604 }
605 
606 static boolean_t
607 pm_halt_callb(void *arg, int code)
608 {
609 	_NOTE(ARGUNUSED(arg, code))
610 	return (B_TRUE);	/* XXX for now */
611 }
612 
613 /*
614  * This needs to be called after the root and platform drivers are loaded
615  * and be single-threaded with respect to driver attach/detach
616  */
617 void
618 pm_init(void)
619 {
620 	PMD_FUNC(pmf, "pm_init")
621 	char **mod;
622 	extern pri_t minclsyspri;
623 	static void pm_dep_thread(void);
624 
625 	pm_comps_notlowest = 0;
626 	pm_system_idle_threshold = pm_default_idle_threshold;
627 	pm_cpu_idle_threshold = 0;
628 
629 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
630 	    CB_CL_CPR_PM, "pm_cpr");
631 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
632 	    CB_CL_PANIC, "pm_panic");
633 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
634 	    CB_CL_HALT, "pm_halt");
635 
636 	/*
637 	 * Create a thread to do dependency processing.
638 	 */
639 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
640 	    TS_RUN, minclsyspri);
641 
642 	/*
643 	 * loadrootmodules already loaded these ppm drivers, now get them
644 	 * attached so they can claim the root drivers as they attach
645 	 */
646 	for (mod = platform_module_list; *mod; mod++) {
647 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
648 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
649 			    *mod);
650 		} else {
651 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
652 			    ddi_major_to_name(ddi_name_to_major(*mod))))
653 		}
654 	}
655 }
656 
657 /*
658  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
659  * enabled) when device becomes power managed or after a failed detach and
660  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
661  * a CPR resume to get all the devices scanning again.
662  */
663 void
664 pm_scan_init(dev_info_t *dip)
665 {
666 	PMD_FUNC(pmf, "scan_init")
667 	pm_scan_t	*scanp;
668 
669 	ASSERT(!PM_ISBC(dip));
670 
671 	PM_LOCK_DIP(dip);
672 	scanp = PM_GET_PM_SCAN(dip);
673 	if (!scanp) {
674 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
675 		    pmf, PM_DEVICE(dip)))
676 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
677 		DEVI(dip)->devi_pm_scan = scanp;
678 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
679 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
680 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
681 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
682 	}
683 	PM_UNLOCK_DIP(dip);
684 }
685 
686 /*
687  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
688  */
689 void
690 pm_scan_fini(dev_info_t *dip)
691 {
692 	PMD_FUNC(pmf, "scan_fini")
693 	pm_scan_t	*scanp;
694 
695 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
696 	ASSERT(!PM_ISBC(dip));
697 	PM_LOCK_DIP(dip);
698 	scanp = PM_GET_PM_SCAN(dip);
699 	if (!scanp) {
700 		PM_UNLOCK_DIP(dip);
701 		return;
702 	}
703 
704 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
705 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
706 
707 	kmem_free(scanp, sizeof (pm_scan_t));
708 	DEVI(dip)->devi_pm_scan = NULL;
709 	PM_UNLOCK_DIP(dip);
710 }
711 
712 /*
713  * Given a pointer to a component struct, return the current power level
714  * (struct contains index unless it is a continuous level).
715  * Located here in hopes of getting both this and dev_is_needed into the
716  * cache together
717  */
718 static int
719 cur_power(pm_component_t *cp)
720 {
721 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
722 		return (cp->pmc_cur_pwr);
723 
724 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
725 }
726 
727 static char *
728 pm_decode_direction(int direction)
729 {
730 	switch (direction) {
731 	case PM_LEVEL_UPONLY:
732 		return ("up");
733 
734 	case PM_LEVEL_EXACT:
735 		return ("exact");
736 
737 	case PM_LEVEL_DOWNONLY:
738 		return ("down");
739 
740 	default:
741 		return ("INVALID DIRECTION");
742 	}
743 }
744 
745 char *
746 pm_decode_op(pm_bus_power_op_t op)
747 {
748 	switch (op) {
749 	case BUS_POWER_CHILD_PWRCHG:
750 		return ("CHILD_PWRCHG");
751 	case BUS_POWER_NEXUS_PWRUP:
752 		return ("NEXUS_PWRUP");
753 	case BUS_POWER_PRE_NOTIFICATION:
754 		return ("PRE_NOTIFICATION");
755 	case BUS_POWER_POST_NOTIFICATION:
756 		return ("POST_NOTIFICATION");
757 	case BUS_POWER_HAS_CHANGED:
758 		return ("HAS_CHANGED");
759 	case BUS_POWER_NOINVOL:
760 		return ("NOINVOL");
761 	default:
762 		return ("UNKNOWN OP");
763 	}
764 }
765 
766 /*
767  * Returns true if level is a possible (valid) power level for component
768  */
769 int
770 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
771 {
772 	PMD_FUNC(pmf, "e_pm_valid_power")
773 	pm_component_t *cp = PM_CP(dip, cmpt);
774 	int i;
775 	int *ip = cp->pmc_comp.pmc_lvals;
776 	int limit = cp->pmc_comp.pmc_numlevels;
777 
778 	if (level < 0)
779 		return (0);
780 	for (i = 0; i < limit; i++) {
781 		if (level == *ip++)
782 			return (1);
783 	}
784 #ifdef DEBUG
785 	if (pm_debug & PMD_FAIL) {
786 		ip = cp->pmc_comp.pmc_lvals;
787 
788 		for (i = 0; i < limit; i++)
789 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
790 			    pmf, i, *ip++))
791 	}
792 #endif
793 	return (0);
794 }
795 
796 /*
797  * Returns true if device is pm'd (after calling pm_start if need be)
798  */
799 int
800 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
801 {
802 	pm_info_t *info;
803 	static int pm_start(dev_info_t *dip);
804 
805 	/*
806 	 * Check if the device is power managed if not.
807 	 * To make the common case (device is power managed already)
808 	 * fast, we check without the lock.  If device is not already
809 	 * power managed, then we take the lock and the long route through
810 	 * go get it managed.  Devices never go unmanaged until they
811 	 * detach.
812 	 */
813 	info = PM_GET_PM_INFO(dip);
814 	if (!info) {
815 		if (!DEVI_IS_ATTACHING(dip)) {
816 			return (0);
817 		}
818 		if (pm_start(dip) != DDI_SUCCESS) {
819 			return (0);
820 		}
821 		info = PM_GET_PM_INFO(dip);
822 	}
823 	ASSERT(info);
824 	if (infop != NULL)
825 		*infop = info;
826 	return (1);
827 }
828 
829 int
830 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
831 {
832 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
833 		if (cpp != NULL)
834 			*cpp = PM_CP(dip, cmpt);
835 		return (1);
836 	} else {
837 		return (0);
838 	}
839 }
840 
841 /*
842  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
843  */
844 static int
845 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
846 {
847 	PMD_FUNC(pmf, "din")
848 	pm_component_t *cp;
849 	char *pathbuf;
850 	int result;
851 
852 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
853 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
854 	    !e_pm_valid_power(dip, cmpt, level))
855 		return (DDI_FAILURE);
856 
857 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
858 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
859 	    level, cur_power(cp)))
860 
861 	if (pm_set_power(dip, cmpt, level,  direction,
862 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
863 		if (direction == PM_LEVEL_UPONLY) {
864 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
865 			(void) ddi_pathname(dip, pathbuf);
866 			cmn_err(CE_WARN, "Device %s failed to power up.",
867 			    pathbuf);
868 			kmem_free(pathbuf, MAXPATHLEN);
869 		}
870 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
871 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
872 		    pm_decode_direction(direction), level, result))
873 		return (DDI_FAILURE);
874 	}
875 
876 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
877 	    PM_DEVICE(dip)))
878 	pm_rescan(dip);
879 	return (DDI_SUCCESS);
880 }
881 
882 /*
883  * We can get multiple pm_rescan() threads, if one of them discovers
884  * that no scan is running at the moment, it kicks it into action.
885  * Otherwise, it tells the current scanning thread to scan again when
886  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
887  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
888  * thread at a time runs the pm_scan_dev() code.
889  */
890 void
891 pm_rescan(void *arg)
892 {
893 	PMD_FUNC(pmf, "rescan")
894 	dev_info_t	*dip = (dev_info_t *)arg;
895 	pm_info_t	*info;
896 	pm_scan_t	*scanp;
897 	timeout_id_t	scanid;
898 
899 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
900 	PM_LOCK_DIP(dip);
901 	info = PM_GET_PM_INFO(dip);
902 	scanp = PM_GET_PM_SCAN(dip);
903 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
904 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
905 		PM_UNLOCK_DIP(dip);
906 		return;
907 	}
908 	if (scanp->ps_scan_flags & PM_SCANNING) {
909 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
910 		PM_UNLOCK_DIP(dip);
911 		return;
912 	} else if (scanp->ps_scan_id) {
913 		scanid = scanp->ps_scan_id;
914 		scanp->ps_scan_id = 0;
915 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
916 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
917 		PM_UNLOCK_DIP(dip);
918 		(void) untimeout(scanid);
919 		PM_LOCK_DIP(dip);
920 	}
921 
922 	/*
923 	 * Dispatching pm_scan during attach time is risky due to the fact that
924 	 * attach might soon fail and dip dissolved, and panic may happen while
925 	 * attempting to stop scan. So schedule a pm_rescan instead.
926 	 * (Note that if either of the first two terms are true, taskq_dispatch
927 	 * will not be invoked).
928 	 *
929 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
930 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
931 	 * to regulate the dispatching.
932 	 *
933 	 * Scan is stopped before the device is detached (in pm_detaching())
934 	 * but it may get re-started during the post_detach processing if the
935 	 * driver fails to detach.
936 	 */
937 	if (DEVI_IS_ATTACHING(dip) ||
938 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
939 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
940 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
941 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
942 		if (scanp->ps_scan_id) {
943 			scanid = scanp->ps_scan_id;
944 			scanp->ps_scan_id = 0;
945 			PM_UNLOCK_DIP(dip);
946 			(void) untimeout(scanid);
947 			PM_LOCK_DIP(dip);
948 			if (scanp->ps_scan_id) {
949 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
950 				    "thread scheduled pm_rescan, scanid %lx\n",
951 				    pmf, PM_DEVICE(dip),
952 				    (ulong_t)scanp->ps_scan_id))
953 				PM_UNLOCK_DIP(dip);
954 				return;
955 			}
956 		}
957 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
958 		    (scanp->ps_idle_down ? pm_id_ticks :
959 		    (pm_min_scan * hz)));
960 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
961 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
962 		    (ulong_t)scanp->ps_scan_id))
963 	} else {
964 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
965 		    pmf, PM_DEVICE(dip)))
966 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
967 	}
968 	PM_UNLOCK_DIP(dip);
969 }
970 
971 void
972 pm_scan(void *arg)
973 {
974 	PMD_FUNC(pmf, "scan")
975 	dev_info_t	*dip = (dev_info_t *)arg;
976 	pm_scan_t	*scanp;
977 	time_t		nextscan;
978 
979 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
980 
981 	PM_LOCK_DIP(dip);
982 	scanp = PM_GET_PM_SCAN(dip);
983 	ASSERT(scanp && PM_GET_PM_INFO(dip));
984 
985 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
986 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
987 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
988 		PM_UNLOCK_DIP(dip);
989 		return;
990 	}
991 
992 	if (scanp->ps_idle_down) {
993 		/*
994 		 * make sure we remember idledown was in affect until
995 		 * we've completed the scan
996 		 */
997 		PMID_SET_SCANS(scanp->ps_idle_down)
998 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
999 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1000 	}
1001 
1002 	/* possible having two threads running pm_scan() */
1003 	if (scanp->ps_scan_flags & PM_SCANNING) {
1004 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1005 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1006 		    pmf, PM_DEVICE(dip)))
1007 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1008 		PM_UNLOCK_DIP(dip);
1009 		return;
1010 	}
1011 
1012 	scanp->ps_scan_flags |= PM_SCANNING;
1013 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1014 	do {
1015 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1016 		PM_UNLOCK_DIP(dip);
1017 		nextscan = pm_scan_dev(dip);
1018 		PM_LOCK_DIP(dip);
1019 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1020 
1021 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1022 	scanp->ps_scan_flags &= ~PM_SCANNING;
1023 
1024 	if (scanp->ps_idle_down) {
1025 		scanp->ps_idle_down &= ~PMID_SCANS;
1026 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1027 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1028 	}
1029 
1030 	/* schedule for next idle check */
1031 	if (nextscan != LONG_MAX) {
1032 		if (nextscan > (LONG_MAX / hz))
1033 			nextscan = (LONG_MAX - 1) / hz;
1034 		if (scanp->ps_scan_id) {
1035 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1036 			    "another rescan scheduled scanid(%lx)\n", pmf,
1037 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1038 			PM_UNLOCK_DIP(dip);
1039 			return;
1040 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1041 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1042 			    (clock_t)(nextscan * hz));
1043 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1044 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1045 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1046 		}
1047 	}
1048 	PM_UNLOCK_DIP(dip);
1049 }
1050 
1051 void
1052 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1053 {
1054 	int components = PM_NUMCMPTS(dip);
1055 	int i;
1056 
1057 	ASSERT(components > 0);
1058 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1059 	for (i = 0; i < components; i++) {
1060 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1061 	}
1062 	PM_UNLOCK_BUSY(dip);
1063 }
1064 
1065 /*
1066  * Returns true if device needs to be kept up because it exported the
1067  * "no-involuntary-power-cycles" property or we're pretending it did (console
1068  * fb case) or it is an ancestor of such a device and has used up the "one
1069  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1070  * upon detach
1071  */
1072 int
1073 pm_noinvol(dev_info_t *dip)
1074 {
1075 	PMD_FUNC(pmf, "noinvol")
1076 
1077 	/*
1078 	 * This doesn't change over the life of a driver, so no locking needed
1079 	 */
1080 	if (PM_IS_CFB(dip)) {
1081 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1082 		    pmf, PM_DEVICE(dip)))
1083 		return (1);
1084 	}
1085 	/*
1086 	 * Not an issue if no such kids
1087 	 */
1088 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1089 #ifdef DEBUG
1090 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1091 			dev_info_t *pdip = dip;
1092 			do {
1093 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1094 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1095 				    DEVI(pdip)->devi_pm_noinvolpm,
1096 				    DEVI(pdip)->devi_pm_volpmd))
1097 				pdip = ddi_get_parent(pdip);
1098 			} while (pdip);
1099 		}
1100 #endif
1101 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1102 		return (0);
1103 	}
1104 
1105 	/*
1106 	 * Since we now maintain the counts correct at every node, we no longer
1107 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1108 	 * without the children getting their counts adjusted.
1109 	 */
1110 
1111 #ifdef	DEBUG
1112 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1113 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1114 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1115 		    PM_DEVICE(dip)))
1116 #endif
1117 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1118 }
1119 
1120 /*
1121  * This function performs the actual scanning of the device.
1122  * It attempts to power off the indicated device's components if they have
1123  * been idle and other restrictions are met.
1124  * pm_scan_dev calculates and returns when the next scan should happen for
1125  * this device.
1126  */
1127 time_t
1128 pm_scan_dev(dev_info_t *dip)
1129 {
1130 	PMD_FUNC(pmf, "scan_dev")
1131 	pm_scan_t	*scanp;
1132 	time_t		*timestamp, idletime, now, thresh;
1133 	time_t		timeleft = 0;
1134 #ifdef PMDDEBUG
1135 	int		curpwr;
1136 #endif
1137 	int		i, nxtpwr, pwrndx, unused;
1138 	size_t		size;
1139 	pm_component_t	 *cp;
1140 	dev_info_t	*pdip = ddi_get_parent(dip);
1141 	int		circ;
1142 	static int	cur_threshold(dev_info_t *, int);
1143 	static int	pm_next_lower_power(pm_component_t *, int);
1144 
1145 	/*
1146 	 * skip attaching device
1147 	 */
1148 	if (DEVI_IS_ATTACHING(dip)) {
1149 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1150 		    pmf, PM_DEVICE(dip), pm_min_scan))
1151 		return (pm_min_scan);
1152 	}
1153 
1154 	PM_LOCK_DIP(dip);
1155 	scanp = PM_GET_PM_SCAN(dip);
1156 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1157 
1158 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1159 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1160 	    PM_KUC(dip)))
1161 
1162 	/* no scan under the following conditions */
1163 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1164 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1165 	    (PM_KUC(dip) != 0) ||
1166 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1167 		PM_UNLOCK_DIP(dip);
1168 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1169 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1170 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1171 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1172 		    cpupm, PM_KUC(dip),
1173 		    PM_ISDIRECT(dip) ? "is" : "is not",
1174 		    pm_noinvol(dip) ? "is" : "is not"))
1175 		return (LONG_MAX);
1176 	}
1177 	PM_UNLOCK_DIP(dip);
1178 
1179 	if (!ndi_devi_tryenter(pdip, &circ)) {
1180 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1181 		    pmf, PM_DEVICE(pdip)))
1182 		return ((time_t)1);
1183 	}
1184 	now = gethrestime_sec();
1185 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1186 	timestamp = kmem_alloc(size, KM_SLEEP);
1187 	pm_get_timestamps(dip, timestamp);
1188 
1189 	/*
1190 	 * Since we removed support for backwards compatible devices,
1191 	 * (see big comment at top of file)
1192 	 * it is no longer required to deal with component 0 last.
1193 	 */
1194 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1195 		/*
1196 		 * If already off (an optimization, perhaps)
1197 		 */
1198 		cp = PM_CP(dip, i);
1199 		pwrndx = cp->pmc_cur_pwr;
1200 #ifdef PMDDEBUG
1201 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1202 		    PM_LEVEL_UNKNOWN :
1203 		    cp->pmc_comp.pmc_lvals[pwrndx];
1204 #endif
1205 
1206 		if (pwrndx == 0) {
1207 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1208 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1209 			/* skip device if off or at its lowest */
1210 			continue;
1211 		}
1212 
1213 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1214 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1215 			/* were busy or newly became busy by another thread */
1216 			if (timeleft == 0)
1217 				timeleft = max(thresh, pm_min_scan);
1218 			else
1219 				timeleft = min(
1220 				    timeleft, max(thresh, pm_min_scan));
1221 			continue;
1222 		}
1223 
1224 		idletime = now - timestamp[i];		/* idle time */
1225 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1226 		    pmf, PM_DEVICE(dip), i, idletime))
1227 		if (idletime >= thresh || PM_IS_PID(dip)) {
1228 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1229 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1230 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1231 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1232 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1233 			    PM_CURPOWER(dip, i) != nxtpwr) {
1234 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1235 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1236 				    i, curpwr, nxtpwr))
1237 				timeleft = pm_min_scan;
1238 				continue;
1239 			} else {
1240 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1241 				    "%d->%d, GOOD curpwr %d\n", pmf,
1242 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1243 				    cur_power(cp)))
1244 
1245 				if (nxtpwr == 0)	/* component went off */
1246 					continue;
1247 
1248 				/*
1249 				 * scan to next lower level
1250 				 */
1251 				if (timeleft == 0)
1252 					timeleft = max(
1253 					    1, cur_threshold(dip, i));
1254 				else
1255 					timeleft = min(timeleft,
1256 					    max(1, cur_threshold(dip, i)));
1257 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1258 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1259 				    i, timeleft))
1260 			}
1261 		} else {	/* comp not idle long enough */
1262 			if (timeleft == 0)
1263 				timeleft = thresh - idletime;
1264 			else
1265 				timeleft = min(timeleft, (thresh - idletime));
1266 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1267 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1268 		}
1269 	}
1270 	ndi_devi_exit(pdip, circ);
1271 	kmem_free(timestamp, size);
1272 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1273 	    PM_DEVICE(dip), timeleft))
1274 
1275 	/*
1276 	 * if components are already at lowest level, timeleft is left 0
1277 	 */
1278 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1279 }
1280 
1281 /*
1282  * pm_scan_stop - cancel scheduled pm_rescan,
1283  *                wait for termination of dispatched pm_scan thread
1284  *                     and active pm_scan_dev thread.
1285  */
1286 void
1287 pm_scan_stop(dev_info_t *dip)
1288 {
1289 	PMD_FUNC(pmf, "scan_stop")
1290 	pm_scan_t	*scanp;
1291 	timeout_id_t	scanid;
1292 
1293 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1294 	PM_LOCK_DIP(dip);
1295 	scanp = PM_GET_PM_SCAN(dip);
1296 	if (!scanp) {
1297 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1298 		    pmf, PM_DEVICE(dip)))
1299 		PM_UNLOCK_DIP(dip);
1300 		return;
1301 	}
1302 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1303 
1304 	/* cancel scheduled scan taskq */
1305 	while (scanp->ps_scan_id) {
1306 		scanid = scanp->ps_scan_id;
1307 		scanp->ps_scan_id = 0;
1308 		PM_UNLOCK_DIP(dip);
1309 		(void) untimeout(scanid);
1310 		PM_LOCK_DIP(dip);
1311 	}
1312 
1313 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1314 		PM_UNLOCK_DIP(dip);
1315 		delay(1);
1316 		PM_LOCK_DIP(dip);
1317 	}
1318 	PM_UNLOCK_DIP(dip);
1319 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1320 }
1321 
1322 int
1323 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1324 {
1325 	_NOTE(ARGUNUSED(arg))
1326 
1327 	if (!PM_GET_PM_SCAN(dip))
1328 		return (DDI_WALK_CONTINUE);
1329 	ASSERT(!PM_ISBC(dip));
1330 	pm_scan_stop(dip);
1331 	return (DDI_WALK_CONTINUE);
1332 }
1333 
1334 /*
1335  * Converts a power level value to its index
1336  */
1337 static int
1338 power_val_to_index(pm_component_t *cp, int val)
1339 {
1340 	int limit, i, *ip;
1341 
1342 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1343 	    val != PM_LEVEL_EXACT);
1344 	/*  convert power value into index (i) */
1345 	limit = cp->pmc_comp.pmc_numlevels;
1346 	ip = cp->pmc_comp.pmc_lvals;
1347 	for (i = 0; i < limit; i++)
1348 		if (val == *ip++)
1349 			return (i);
1350 	return (-1);
1351 }
1352 
1353 /*
1354  * Converts a numeric power level to a printable string
1355  */
1356 static char *
1357 power_val_to_string(pm_component_t *cp, int val)
1358 {
1359 	int index;
1360 
1361 	if (val == PM_LEVEL_UPONLY)
1362 		return ("<UPONLY>");
1363 
1364 	if (val == PM_LEVEL_UNKNOWN ||
1365 	    (index = power_val_to_index(cp, val)) == -1)
1366 		return ("<LEVEL_UNKNOWN>");
1367 
1368 	return (cp->pmc_comp.pmc_lnames[index]);
1369 }
1370 
1371 /*
1372  * Return true if this node has been claimed by a ppm.
1373  */
1374 static int
1375 pm_ppm_claimed(dev_info_t *dip)
1376 {
1377 	return (PPM(dip) != NULL);
1378 }
1379 
1380 /*
1381  * A node which was voluntarily power managed has just used up its "free cycle"
1382  * and need is volpmd field cleared, and the same done to all its descendents
1383  */
1384 static void
1385 pm_clear_volpm_dip(dev_info_t *dip)
1386 {
1387 	PMD_FUNC(pmf, "clear_volpm_dip")
1388 
1389 	if (dip == NULL)
1390 		return;
1391 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1392 	    PM_DEVICE(dip)))
1393 	DEVI(dip)->devi_pm_volpmd = 0;
1394 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1395 		pm_clear_volpm_dip(dip);
1396 	}
1397 }
1398 
1399 /*
1400  * A node which was voluntarily power managed has used up the "free cycles"
1401  * for the subtree that it is the root of.  Scan through the list of detached
1402  * nodes and adjust the counts of any that are descendents of the node.
1403  */
1404 static void
1405 pm_clear_volpm_list(dev_info_t *dip)
1406 {
1407 	PMD_FUNC(pmf, "clear_volpm_list")
1408 	char	*pathbuf;
1409 	size_t	len;
1410 	pm_noinvol_t *ip;
1411 
1412 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1413 	(void) ddi_pathname(dip, pathbuf);
1414 	len = strlen(pathbuf);
1415 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1416 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1417 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1418 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1419 		    ip->ni_path))
1420 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1421 		    ip->ni_path[len] == '/') {
1422 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1423 			    ip->ni_path))
1424 			ip->ni_volpmd = 0;
1425 			ip->ni_wasvolpmd = 0;
1426 		}
1427 	}
1428 	kmem_free(pathbuf, MAXPATHLEN);
1429 	rw_exit(&pm_noinvol_rwlock);
1430 }
1431 
1432 /*
1433  * Powers a device, suspending or resuming the driver if it is a backward
1434  * compatible device, calling into ppm to change power level.
1435  * Called with the component's power lock held.
1436  */
1437 static int
1438 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1439     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1440 {
1441 	PMD_FUNC(pmf, "power_dev")
1442 	power_req_t power_req;
1443 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1444 	int		resume_needed = 0;
1445 	int		suspended = 0;
1446 	int		result;
1447 #ifdef PMDDEBUG
1448 	struct pm_component *cp = PM_CP(dip, comp);
1449 #endif
1450 	int		bc = PM_ISBC(dip);
1451 	int pm_all_components_off(dev_info_t *);
1452 	int		clearvolpmd = 0;
1453 	char		pathbuf[MAXNAMELEN];
1454 #ifdef PMDDEBUG
1455 	char *ppmname, *ppmaddr;
1456 #endif
1457 	/*
1458 	 * If this is comp 0 of a backwards compat device and we are
1459 	 * going to take the power away, we need to detach it with
1460 	 * DDI_PM_SUSPEND command.
1461 	 */
1462 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1463 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1464 			/* We could not suspend before turning cmpt zero off */
1465 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1466 			    pmf, PM_DEVICE(dip)))
1467 			return (DDI_FAILURE);
1468 		} else {
1469 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1470 			suspended++;
1471 		}
1472 	}
1473 	power_req.request_type = PMR_PPM_SET_POWER;
1474 	power_req.req.ppm_set_power_req.who = dip;
1475 	power_req.req.ppm_set_power_req.cmpt = comp;
1476 	power_req.req.ppm_set_power_req.old_level = old_level;
1477 	power_req.req.ppm_set_power_req.new_level = level;
1478 	power_req.req.ppm_set_power_req.canblock = canblock;
1479 	power_req.req.ppm_set_power_req.cookie = NULL;
1480 #ifdef PMDDEBUG
1481 	if (pm_ppm_claimed(dip)) {
1482 		ppmname = PM_NAME(PPM(dip));
1483 		ppmaddr = PM_ADDR(PPM(dip));
1484 
1485 	} else {
1486 		ppmname = "noppm";
1487 		ppmaddr = "0";
1488 	}
1489 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1490 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1491 	    power_val_to_string(cp, old_level), old_level,
1492 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1493 #endif
1494 	/*
1495 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1496 	 * bc device comp 0 is powering on, then we count it as a power cycle
1497 	 * against its voluntary count.
1498 	 */
1499 	if (DEVI(dip)->devi_pm_volpmd &&
1500 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1501 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1502 		clearvolpmd = 1;
1503 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1504 	    &power_req, &result)) == DDI_SUCCESS) {
1505 		/*
1506 		 * Now do involuntary pm accounting;  If we've just cycled power
1507 		 * on a voluntarily pm'd node, and by inference on its entire
1508 		 * subtree, we need to set the subtree (including those nodes
1509 		 * already detached) volpmd counts to 0, and subtract out the
1510 		 * value of the current node's volpmd count from the ancestors
1511 		 */
1512 		if (clearvolpmd) {
1513 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1514 			pm_clear_volpm_dip(dip);
1515 			pm_clear_volpm_list(dip);
1516 			if (volpmd) {
1517 				(void) ddi_pathname(dip, pathbuf);
1518 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1519 				    volpmd, 0, pathbuf, dip);
1520 			}
1521 		}
1522 	} else {
1523 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1524 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1525 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1526 	}
1527 	/*
1528 	 * If some other devices were also powered up (e.g. other cpus in
1529 	 * the same domain) return a pointer to that list
1530 	 */
1531 	if (devlist) {
1532 		*devlist = (pm_ppm_devlist_t *)
1533 		    power_req.req.ppm_set_power_req.cookie;
1534 	}
1535 	/*
1536 	 * We will have to resume the device if the device is backwards compat
1537 	 * device and either of the following is true:
1538 	 * -This is comp 0 and we have successfully powered it up
1539 	 * -This is comp 0 and we have failed to power it down. Resume is
1540 	 *  needed because we have suspended it above
1541 	 */
1542 
1543 	if (bc && comp == 0) {
1544 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1545 		if (power_op_ret == DDI_SUCCESS) {
1546 			if (POWERING_ON(old_level, level)) {
1547 				/*
1548 				 * It must be either suspended or resumed
1549 				 * via pm_power_has_changed path
1550 				 */
1551 				ASSERT((DEVI(dip)->devi_pm_flags &
1552 				    PMC_SUSPENDED) ||
1553 				    (PM_CP(dip, comp)->pmc_flags &
1554 				    PM_PHC_WHILE_SET_POWER));
1555 
1556 					resume_needed = suspended;
1557 			}
1558 		} else {
1559 			if (POWERING_OFF(old_level, level)) {
1560 				/*
1561 				 * It must be either suspended or resumed
1562 				 * via pm_power_has_changed path
1563 				 */
1564 				ASSERT((DEVI(dip)->devi_pm_flags &
1565 				    PMC_SUSPENDED) ||
1566 				    (PM_CP(dip, comp)->pmc_flags &
1567 				    PM_PHC_WHILE_SET_POWER));
1568 
1569 					resume_needed = suspended;
1570 			}
1571 		}
1572 	}
1573 	if (resume_needed) {
1574 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1575 		/* ppm is not interested in DDI_PM_RESUME */
1576 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1577 		    DDI_SUCCESS) {
1578 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1579 		} else
1580 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1581 			    PM_DEVICE(dip));
1582 	}
1583 	return (power_op_ret);
1584 }
1585 
1586 /*
1587  * Return true if we are the owner or a borrower of the devi lock.  See
1588  * pm_lock_power_single() about borrowing the lock.
1589  */
1590 static int
1591 pm_devi_lock_held(dev_info_t *dip)
1592 {
1593 	lock_loan_t *cur;
1594 
1595 	if (DEVI_BUSY_OWNED(dip))
1596 		return (1);
1597 
1598 	/* return false if no locks borrowed */
1599 	if (lock_loan_head.pmlk_next == NULL)
1600 		return (0);
1601 
1602 	mutex_enter(&pm_loan_lock);
1603 	/* see if our thread is registered as a lock borrower. */
1604 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1605 		if (cur->pmlk_borrower == curthread)
1606 			break;
1607 	mutex_exit(&pm_loan_lock);
1608 
1609 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1610 }
1611 
1612 /*
1613  * pm_set_power: adjusts power level of device.	 Assumes device is power
1614  * manageable & component exists.
1615  *
1616  * Cases which require us to bring up devices we keep up ("wekeepups") for
1617  * backwards compatible devices:
1618  *	component 0 is off and we're bringing it up from 0
1619  *		bring up wekeepup first
1620  *	and recursively when component 0 is off and we bring some other
1621  *	component up from 0
1622  * For devices which are not backward compatible, our dependency notion is much
1623  * simpler.  Unless all components are off, then wekeeps must be on.
1624  * We don't treat component 0 differently.
1625  * Canblock tells how to deal with a direct pm'd device.
1626  * Scan arg tells us if we were called from scan, in which case we don't need
1627  * to go back to the root node and walk down to change power.
1628  */
1629 int
1630 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1631     pm_canblock_t canblock, int scan, int *retp)
1632 {
1633 	PMD_FUNC(pmf, "set_power")
1634 	char		*pathbuf;
1635 	pm_bp_child_pwrchg_t bpc;
1636 	pm_sp_misc_t	pspm;
1637 	int		ret = DDI_SUCCESS;
1638 	int		unused = DDI_SUCCESS;
1639 	dev_info_t	*pdip = ddi_get_parent(dip);
1640 
1641 #ifdef DEBUG
1642 	int		diverted = 0;
1643 
1644 	/*
1645 	 * This prevents operations on the console from calling prom_printf and
1646 	 * either deadlocking or bringing up the console because of debug
1647 	 * output
1648 	 */
1649 	if (dip == cfb_dip) {
1650 		diverted++;
1651 		mutex_enter(&pm_debug_lock);
1652 		pm_divertdebug++;
1653 		mutex_exit(&pm_debug_lock);
1654 	}
1655 #endif
1656 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1657 	    direction == PM_LEVEL_EXACT);
1658 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1659 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1660 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1661 	(void) ddi_pathname(dip, pathbuf);
1662 	bpc.bpc_dip = dip;
1663 	bpc.bpc_path = pathbuf;
1664 	bpc.bpc_comp = comp;
1665 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1666 	bpc.bpc_nlevel = level;
1667 	pspm.pspm_direction = direction;
1668 	pspm.pspm_errnop = retp;
1669 	pspm.pspm_canblock = canblock;
1670 	pspm.pspm_scan = scan;
1671 	bpc.bpc_private = &pspm;
1672 
1673 	/*
1674 	 * If a config operation is being done (we've locked the parent) or
1675 	 * we already hold the power lock (we've locked the node)
1676 	 * then we can operate directly on the node because we have already
1677 	 * brought up all the ancestors, otherwise, we have to go back to the
1678 	 * top of the tree.
1679 	 */
1680 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1681 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1682 		    (void *)&bpc, (void *)&unused);
1683 	else
1684 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1685 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1686 #ifdef DEBUG
1687 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1688 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1689 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1690 	}
1691 	if (diverted) {
1692 		mutex_enter(&pm_debug_lock);
1693 		pm_divertdebug--;
1694 		mutex_exit(&pm_debug_lock);
1695 	}
1696 #endif
1697 	kmem_free(pathbuf, MAXPATHLEN);
1698 	return (ret);
1699 }
1700 
1701 /*
1702  * If holddip is set, then if a dip is found we return with the node held.
1703  *
1704  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1705  * (resolve_pathname), but it does not drive attach.
1706  */
1707 dev_info_t *
1708 pm_name_to_dip(char *pathname, int holddip)
1709 {
1710 	struct pathname pn;
1711 	char		*component;
1712 	dev_info_t	*parent, *child;
1713 	int		circ;
1714 
1715 	if ((pathname == NULL) || (*pathname != '/'))
1716 		return (NULL);
1717 
1718 	/* setup pathname and allocate component */
1719 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1720 		return (NULL);
1721 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1722 
1723 	/* start at top, process '/' component */
1724 	parent = child = ddi_root_node();
1725 	ndi_hold_devi(parent);
1726 	pn_skipslash(&pn);
1727 	ASSERT(i_ddi_devi_attached(parent));
1728 
1729 	/* process components of pathname */
1730 	while (pn_pathleft(&pn)) {
1731 		(void) pn_getcomponent(&pn, component);
1732 
1733 		/* enter parent and search for component child */
1734 		ndi_devi_enter(parent, &circ);
1735 		child = ndi_devi_findchild(parent, component);
1736 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1737 			child = NULL;
1738 			ndi_devi_exit(parent, circ);
1739 			ndi_rele_devi(parent);
1740 			goto out;
1741 		}
1742 
1743 		/* attached child found, hold child and release parent */
1744 		ndi_hold_devi(child);
1745 		ndi_devi_exit(parent, circ);
1746 		ndi_rele_devi(parent);
1747 
1748 		/* child becomes parent, and process next component */
1749 		parent = child;
1750 		pn_skipslash(&pn);
1751 
1752 		/* loop with active ndi_devi_hold of child->parent */
1753 	}
1754 
1755 out:
1756 	pn_free(&pn);
1757 	kmem_free(component, MAXNAMELEN);
1758 
1759 	/* if we are not asked to return with hold, drop current hold */
1760 	if (child && !holddip)
1761 		ndi_rele_devi(child);
1762 	return (child);
1763 }
1764 
1765 /*
1766  * Search for a dependency and mark it unsatisfied
1767  */
1768 static void
1769 pm_unsatisfy(char *keeper, char *kept)
1770 {
1771 	PMD_FUNC(pmf, "unsatisfy")
1772 	pm_pdr_t *dp;
1773 
1774 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1775 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1776 		if (!dp->pdr_isprop) {
1777 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1778 			    (dp->pdr_kept_count > 0) &&
1779 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1780 				if (dp->pdr_satisfied) {
1781 					dp->pdr_satisfied = 0;
1782 					pm_unresolved_deps++;
1783 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1784 					    "pm_unresolved_deps now %d\n", pmf,
1785 					    pm_unresolved_deps))
1786 				}
1787 			}
1788 		}
1789 	}
1790 }
1791 
1792 /*
1793  * Device dip is being un power managed, it keeps up count other devices.
1794  * We need to release any hold we have on the kept devices, and also
1795  * mark the dependency no longer satisfied.
1796  */
1797 static void
1798 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1799 {
1800 	PMD_FUNC(pmf, "unkeeps")
1801 	int i, j;
1802 	dev_info_t *kept;
1803 	dev_info_t *dip;
1804 	struct pm_component *cp;
1805 	int keeper_on = 0, circ;
1806 
1807 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1808 	    keeper, (void *)keptpaths))
1809 	/*
1810 	 * Try to grab keeper. Keeper may have gone away by now,
1811 	 * in this case, used the passed in value pwr
1812 	 */
1813 	dip = pm_name_to_dip(keeper, 1);
1814 	for (i = 0; i < count; i++) {
1815 		/* Release power hold */
1816 		kept = pm_name_to_dip(keptpaths[i], 1);
1817 		if (kept) {
1818 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1819 			    PM_DEVICE(kept), i))
1820 			/*
1821 			 * We need to check if we skipped a bringup here
1822 			 * because we could have failed the bringup
1823 			 * (ie DIRECT PM device) and have
1824 			 * not increment the count.
1825 			 */
1826 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1827 				keeper_on = 0;
1828 				PM_LOCK_POWER(dip, &circ);
1829 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1830 					cp = &DEVI(dip)->devi_pm_components[j];
1831 					if (cur_power(cp)) {
1832 						keeper_on++;
1833 						break;
1834 					}
1835 				}
1836 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1837 					pm_rele_power(kept);
1838 					DEVI(kept)->devi_pm_flags
1839 					    &= ~PMC_SKIP_BRINGUP;
1840 				}
1841 				PM_UNLOCK_POWER(dip, circ);
1842 			} else if (pwr) {
1843 				if (PM_SKBU(kept) == 0) {
1844 					pm_rele_power(kept);
1845 					DEVI(kept)->devi_pm_flags
1846 					    &= ~PMC_SKIP_BRINGUP;
1847 				}
1848 			}
1849 			ddi_release_devi(kept);
1850 		}
1851 		/*
1852 		 * mark this dependency not satisfied
1853 		 */
1854 		pm_unsatisfy(keeper, keptpaths[i]);
1855 	}
1856 	if (dip)
1857 		ddi_release_devi(dip);
1858 }
1859 
1860 /*
1861  * Device kept is being un power managed, it is kept up by keeper.
1862  * We need to mark the dependency no longer satisfied.
1863  */
1864 static void
1865 pm_unkepts(char *kept, char *keeper)
1866 {
1867 	PMD_FUNC(pmf, "unkepts")
1868 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1869 	ASSERT(keeper != NULL);
1870 	/*
1871 	 * mark this dependency not satisfied
1872 	 */
1873 	pm_unsatisfy(keeper, kept);
1874 }
1875 
1876 /*
1877  * Removes dependency information and hold on the kepts, if the path is a
1878  * path of a keeper.
1879  */
1880 static void
1881 pm_free_keeper(char *path, int pwr)
1882 {
1883 	pm_pdr_t *dp;
1884 	int i;
1885 	size_t length;
1886 
1887 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1888 		if (strcmp(dp->pdr_keeper, path) != 0)
1889 			continue;
1890 		/*
1891 		 * Remove all our kept holds and the dependency records,
1892 		 * then free up the kept lists.
1893 		 */
1894 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1895 		if (dp->pdr_kept_count)  {
1896 			for (i = 0; i < dp->pdr_kept_count; i++) {
1897 				length = strlen(dp->pdr_kept_paths[i]);
1898 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1899 			}
1900 			kmem_free(dp->pdr_kept_paths,
1901 			    dp->pdr_kept_count * sizeof (char **));
1902 			dp->pdr_kept_paths = NULL;
1903 			dp->pdr_kept_count = 0;
1904 		}
1905 	}
1906 }
1907 
1908 /*
1909  * Removes the device represented by path from the list of kepts, if the
1910  * path is a path of a kept
1911  */
1912 static void
1913 pm_free_kept(char *path)
1914 {
1915 	pm_pdr_t *dp;
1916 	int i;
1917 	int j, count;
1918 	size_t length;
1919 	char **paths;
1920 
1921 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1922 		if (dp->pdr_kept_count == 0)
1923 			continue;
1924 		count = dp->pdr_kept_count;
1925 		/* Remove this device from the kept path lists */
1926 		for (i = 0; i < count; i++) {
1927 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1928 				pm_unkepts(path, dp->pdr_keeper);
1929 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1930 				kmem_free(dp->pdr_kept_paths[i], length);
1931 				dp->pdr_kept_paths[i] = NULL;
1932 				dp->pdr_kept_count--;
1933 			}
1934 		}
1935 		/* Compact the kept paths array */
1936 		if (dp->pdr_kept_count) {
1937 			length = dp->pdr_kept_count * sizeof (char **);
1938 			paths = kmem_zalloc(length, KM_SLEEP);
1939 			j = 0;
1940 			for (i = 0; i < count; i++) {
1941 				if (dp->pdr_kept_paths[i] != NULL) {
1942 					paths[j] = dp->pdr_kept_paths[i];
1943 					j++;
1944 				}
1945 			}
1946 			ASSERT(j == dp->pdr_kept_count);
1947 		}
1948 		/* Now free the old array and point to the new one */
1949 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1950 		if (dp->pdr_kept_count)
1951 			dp->pdr_kept_paths = paths;
1952 		else
1953 			dp->pdr_kept_paths = NULL;
1954 	}
1955 }
1956 
1957 /*
1958  * Free the dependency information for a device.
1959  */
1960 void
1961 pm_free_keeps(char *path, int pwr)
1962 {
1963 	PMD_FUNC(pmf, "free_keeps")
1964 
1965 #ifdef DEBUG
1966 	int doprdeps = 0;
1967 	void prdeps(char *);
1968 
1969 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
1970 	if (pm_debug & PMD_KEEPS) {
1971 		doprdeps = 1;
1972 		prdeps("pm_free_keeps before");
1973 	}
1974 #endif
1975 	/*
1976 	 * First assume we are a keeper and remove all our kepts.
1977 	 */
1978 	pm_free_keeper(path, pwr);
1979 	/*
1980 	 * Now assume we a kept device, and remove all our records.
1981 	 */
1982 	pm_free_kept(path);
1983 #ifdef	DEBUG
1984 	if (doprdeps) {
1985 		prdeps("pm_free_keeps after");
1986 	}
1987 #endif
1988 }
1989 
1990 static int
1991 pm_is_kept(char *path)
1992 {
1993 	pm_pdr_t *dp;
1994 	int i;
1995 
1996 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1997 		if (dp->pdr_kept_count == 0)
1998 			continue;
1999 		for (i = 0; i < dp->pdr_kept_count; i++) {
2000 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2001 				return (1);
2002 		}
2003 	}
2004 	return (0);
2005 }
2006 
2007 static void
2008 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2009 {
2010 	PMD_FUNC(pmf, "hold_rele_power")
2011 	int circ;
2012 
2013 	if ((dip == NULL) ||
2014 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2015 		return;
2016 
2017 	PM_LOCK_POWER(dip, &circ);
2018 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2019 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2020 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2021 
2022 	PM_KUC(dip) += cnt;
2023 
2024 	ASSERT(PM_KUC(dip) >= 0);
2025 	PM_UNLOCK_POWER(dip, circ);
2026 
2027 	if (cnt < 0 && PM_KUC(dip) == 0)
2028 		pm_rescan(dip);
2029 }
2030 
2031 #define	MAX_PPM_HANDLERS	4
2032 
2033 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2034 
2035 struct	ppm_callbacks {
2036 	int (*ppmc_func)(dev_info_t *);
2037 	dev_info_t	*ppmc_dip;
2038 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2039 
2040 
2041 /*
2042  * This routine calls into all the registered ppms to notify them
2043  * that either all components of power-managed devices are at their
2044  * lowest levels or no longer all are at their lowest levels.
2045  */
2046 static void
2047 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2048 {
2049 	struct ppm_callbacks *ppmcp;
2050 	power_req_t power_req;
2051 	int result = 0;
2052 
2053 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2054 	power_req.req.ppm_all_lowest_req.mode = mode;
2055 	mutex_enter(&ppm_lock);
2056 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2057 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2058 		    DDI_CTLOPS_POWER, &power_req, &result);
2059 	mutex_exit(&ppm_lock);
2060 }
2061 
2062 static void
2063 pm_set_pm_info(dev_info_t *dip, void *value)
2064 {
2065 	DEVI(dip)->devi_pm_info = value;
2066 }
2067 
2068 pm_rsvp_t *pm_blocked_list;
2069 
2070 /*
2071  * Look up an entry in the blocked list by dip and component
2072  */
2073 static pm_rsvp_t *
2074 pm_rsvp_lookup(dev_info_t *dip, int comp)
2075 {
2076 	pm_rsvp_t *p;
2077 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2078 	for (p = pm_blocked_list; p; p = p->pr_next)
2079 		if (p->pr_dip == dip && p->pr_comp == comp) {
2080 			return (p);
2081 		}
2082 	return (NULL);
2083 }
2084 
2085 /*
2086  * Called when a device which is direct power managed (or the parent or
2087  * dependent of such a device) changes power, or when a pm clone is closed
2088  * that was direct power managing a device.  This call results in pm_blocked()
2089  * (below) returning.
2090  */
2091 void
2092 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2093 {
2094 	PMD_FUNC(pmf, "proceed")
2095 	pm_rsvp_t *found = NULL;
2096 	pm_rsvp_t *p;
2097 
2098 	mutex_enter(&pm_rsvp_lock);
2099 	switch (cmd) {
2100 	/*
2101 	 * we're giving up control, let any pending op continue
2102 	 */
2103 	case PMP_RELEASE:
2104 		for (p = pm_blocked_list; p; p = p->pr_next) {
2105 			if (dip == p->pr_dip) {
2106 				p->pr_retval = PMP_RELEASE;
2107 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2108 				    pmf, PM_DEVICE(dip)))
2109 				cv_signal(&p->pr_cv);
2110 			}
2111 		}
2112 		break;
2113 
2114 	/*
2115 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2116 	 * succeed and a non-matching request for the same device fail
2117 	 */
2118 	case PMP_SETPOWER:
2119 		found = pm_rsvp_lookup(dip, comp);
2120 		if (!found)	/* if driver not waiting */
2121 			break;
2122 		/*
2123 		 * This cannot be pm_lower_power, since that can only happen
2124 		 * during detach or probe
2125 		 */
2126 		if (found->pr_newlevel <= newlevel) {
2127 			found->pr_retval = PMP_SUCCEED;
2128 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2129 			    PM_DEVICE(dip)))
2130 		} else {
2131 			found->pr_retval = PMP_FAIL;
2132 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2133 			    PM_DEVICE(dip)))
2134 		}
2135 		cv_signal(&found->pr_cv);
2136 		break;
2137 
2138 	default:
2139 		panic("pm_proceed unknown cmd %d", cmd);
2140 	}
2141 	mutex_exit(&pm_rsvp_lock);
2142 }
2143 
2144 /*
2145  * This routine dispatches new work to the dependency thread. Caller must
2146  * be prepared to block for memory if necessary.
2147  */
2148 void
2149 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2150     int *res, int cached_pwr)
2151 {
2152 	pm_dep_wk_t	*new_work;
2153 
2154 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2155 	new_work->pdw_type = cmd;
2156 	new_work->pdw_wait = wait;
2157 	new_work->pdw_done = 0;
2158 	new_work->pdw_ret = 0;
2159 	new_work->pdw_pwr = cached_pwr;
2160 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2161 	if (keeper != NULL) {
2162 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2163 		    KM_SLEEP);
2164 		(void) strcpy(new_work->pdw_keeper, keeper);
2165 	}
2166 	if (kept != NULL) {
2167 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2168 		(void) strcpy(new_work->pdw_kept, kept);
2169 	}
2170 	mutex_enter(&pm_dep_thread_lock);
2171 	if (pm_dep_thread_workq == NULL) {
2172 		pm_dep_thread_workq = new_work;
2173 		pm_dep_thread_tail = new_work;
2174 		new_work->pdw_next = NULL;
2175 	} else {
2176 		pm_dep_thread_tail->pdw_next = new_work;
2177 		pm_dep_thread_tail = new_work;
2178 		new_work->pdw_next = NULL;
2179 	}
2180 	cv_signal(&pm_dep_thread_cv);
2181 	/* If caller asked for it, wait till it is done. */
2182 	if (wait)  {
2183 		while (!new_work->pdw_done)
2184 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2185 		/*
2186 		 * Pass return status, if any, back.
2187 		 */
2188 		if (res != NULL)
2189 			*res = new_work->pdw_ret;
2190 		/*
2191 		 * If we asked to wait, it is our job to free the request
2192 		 * structure.
2193 		 */
2194 		if (new_work->pdw_keeper)
2195 			kmem_free(new_work->pdw_keeper,
2196 			    strlen(new_work->pdw_keeper) + 1);
2197 		if (new_work->pdw_kept)
2198 			kmem_free(new_work->pdw_kept,
2199 			    strlen(new_work->pdw_kept) + 1);
2200 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2201 	}
2202 	mutex_exit(&pm_dep_thread_lock);
2203 }
2204 
2205 /*
2206  * Release the pm resource for this device.
2207  */
2208 void
2209 pm_rem_info(dev_info_t *dip)
2210 {
2211 	PMD_FUNC(pmf, "rem_info")
2212 	int		i, count = 0;
2213 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2214 	dev_info_t	*pdip = ddi_get_parent(dip);
2215 	char		*pathbuf;
2216 	int		work_type = PM_DEP_WK_DETACH;
2217 
2218 	ASSERT(info);
2219 
2220 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2221 	if (PM_ISDIRECT(dip)) {
2222 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2223 		ASSERT(info->pmi_clone);
2224 		info->pmi_clone = 0;
2225 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2226 	}
2227 	ASSERT(!PM_GET_PM_SCAN(dip));
2228 
2229 	/*
2230 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2231 	 * Others we check all components.  BC node that has already
2232 	 * called pm_destroy_components() has zero component count.
2233 	 * Parents that get notification are not adjusted because their
2234 	 * kidsupcnt is always 0 (or 1 during configuration).
2235 	 */
2236 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2237 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2238 
2239 	/* node is detached, so we can examine power without locking */
2240 	if (PM_ISBC(dip)) {
2241 		count = (PM_CURPOWER(dip, 0) != 0);
2242 	} else {
2243 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2244 			count += (PM_CURPOWER(dip, i) != 0);
2245 	}
2246 
2247 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2248 		e_pm_hold_rele_power(pdip, -count);
2249 
2250 	/* Schedule a request to clean up dependency records */
2251 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2252 	(void) ddi_pathname(dip, pathbuf);
2253 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2254 	    PM_DEP_NOWAIT, NULL, (count > 0));
2255 	kmem_free(pathbuf, MAXPATHLEN);
2256 
2257 	/*
2258 	 * Adjust the pm_comps_notlowest count since this device is
2259 	 * not being power-managed anymore.
2260 	 */
2261 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2262 		if (PM_CURPOWER(dip, i) != 0)
2263 			PM_DECR_NOTLOWEST(dip);
2264 	}
2265 	/*
2266 	 * Once we clear the info pointer, it looks like it is not power
2267 	 * managed to everybody else.
2268 	 */
2269 	pm_set_pm_info(dip, NULL);
2270 	kmem_free(info, sizeof (pm_info_t));
2271 }
2272 
2273 int
2274 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2275 {
2276 	int components = PM_NUMCMPTS(dip);
2277 	int *bufp;
2278 	size_t size;
2279 	int i;
2280 
2281 	if (components <= 0) {
2282 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2283 		    "can't get normal power values\n", PM_DEVICE(dip));
2284 		return (DDI_FAILURE);
2285 	} else {
2286 		size = components * sizeof (int);
2287 		bufp = kmem_alloc(size, KM_SLEEP);
2288 		for (i = 0; i < components; i++) {
2289 			bufp[i] = pm_get_normal_power(dip, i);
2290 		}
2291 	}
2292 	*length = size;
2293 	*valuep = bufp;
2294 	return (DDI_SUCCESS);
2295 }
2296 
2297 static int
2298 pm_reset_timestamps(dev_info_t *dip, void *arg)
2299 {
2300 	_NOTE(ARGUNUSED(arg))
2301 
2302 	int components;
2303 	int	i;
2304 
2305 	if (!PM_GET_PM_INFO(dip))
2306 		return (DDI_WALK_CONTINUE);
2307 	components = PM_NUMCMPTS(dip);
2308 	ASSERT(components > 0);
2309 	PM_LOCK_BUSY(dip);
2310 	for (i = 0; i < components; i++) {
2311 		struct pm_component *cp;
2312 		/*
2313 		 * If the component was not marked as busy,
2314 		 * reset its timestamp to now.
2315 		 */
2316 		cp = PM_CP(dip, i);
2317 		if (cp->pmc_timestamp)
2318 			cp->pmc_timestamp = gethrestime_sec();
2319 	}
2320 	PM_UNLOCK_BUSY(dip);
2321 	return (DDI_WALK_CONTINUE);
2322 }
2323 
2324 /*
2325  * Convert a power level to an index into the levels array (or
2326  * just PM_LEVEL_UNKNOWN in that special case).
2327  */
2328 static int
2329 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2330 {
2331 	PMD_FUNC(pmf, "level_to_index")
2332 	int i;
2333 	int limit = cp->pmc_comp.pmc_numlevels;
2334 	int *ip = cp->pmc_comp.pmc_lvals;
2335 
2336 	if (level == PM_LEVEL_UNKNOWN)
2337 		return (level);
2338 
2339 	for (i = 0; i < limit; i++) {
2340 		if (level == *ip++) {
2341 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2342 			    pmf, PM_DEVICE(dip),
2343 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2344 			return (i);
2345 		}
2346 	}
2347 	panic("pm_level_to_index: level %d not found for device "
2348 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2349 	/*NOTREACHED*/
2350 }
2351 
2352 /*
2353  * Internal function to set current power level
2354  */
2355 static void
2356 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2357 {
2358 	PMD_FUNC(pmf, "set_cur_pwr")
2359 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2360 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2361 
2362 	/*
2363 	 * Nothing to adjust if current & new levels are the same.
2364 	 */
2365 	if (curpwr != PM_LEVEL_UNKNOWN &&
2366 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2367 		return;
2368 
2369 	/*
2370 	 * Keep the count for comps doing transition to/from lowest
2371 	 * level.
2372 	 */
2373 	if (curpwr == 0) {
2374 		PM_INCR_NOTLOWEST(dip);
2375 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2376 		PM_DECR_NOTLOWEST(dip);
2377 	}
2378 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2379 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2380 }
2381 
2382 /*
2383  * This is the default method of setting the power of a device if no ppm
2384  * driver has claimed it.
2385  */
2386 int
2387 pm_power(dev_info_t *dip, int comp, int level)
2388 {
2389 	PMD_FUNC(pmf, "power")
2390 	struct dev_ops	*ops;
2391 	int		(*fn)(dev_info_t *, int, int);
2392 	struct pm_component *cp = PM_CP(dip, comp);
2393 	int retval;
2394 	pm_info_t *info = PM_GET_PM_INFO(dip);
2395 	static int pm_phc_impl(dev_info_t *, int, int, int);
2396 
2397 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2398 	    PM_DEVICE(dip), comp, level))
2399 	if (!(ops = ddi_get_driver(dip))) {
2400 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2401 		    PM_DEVICE(dip)))
2402 		return (DDI_FAILURE);
2403 	}
2404 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2405 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2406 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2407 		    (!fn ? " devo_power NULL" : "")))
2408 		return (DDI_FAILURE);
2409 	}
2410 	cp->pmc_flags |= PM_POWER_OP;
2411 	retval = (*fn)(dip, comp, level);
2412 	cp->pmc_flags &= ~PM_POWER_OP;
2413 	if (retval == DDI_SUCCESS) {
2414 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2415 		return (DDI_SUCCESS);
2416 	}
2417 
2418 	/*
2419 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2420 	 * updated only the power level of the component.  If our attempt to
2421 	 * set the device new to a power level above has failed we sync the
2422 	 * total power state via phc code now.
2423 	 */
2424 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2425 		int phc_lvl =
2426 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2427 
2428 		ASSERT(info);
2429 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2430 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2431 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2432 	}
2433 
2434 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2435 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2436 	    level, power_val_to_string(cp, level)));
2437 	return (DDI_FAILURE);
2438 }
2439 
2440 int
2441 pm_unmanage(dev_info_t *dip)
2442 {
2443 	PMD_FUNC(pmf, "unmanage")
2444 	power_req_t power_req;
2445 	int result, retval = 0;
2446 
2447 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2448 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2449 	    PM_DEVICE(dip)))
2450 	power_req.request_type = PMR_PPM_UNMANAGE;
2451 	power_req.req.ppm_config_req.who = dip;
2452 	if (pm_ppm_claimed(dip))
2453 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2454 		    &power_req, &result);
2455 #ifdef DEBUG
2456 	else
2457 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2458 		    &power_req, &result);
2459 #endif
2460 	ASSERT(retval == DDI_SUCCESS);
2461 	pm_rem_info(dip);
2462 	return (retval);
2463 }
2464 
2465 int
2466 pm_raise_power(dev_info_t *dip, int comp, int level)
2467 {
2468 	if (level < 0)
2469 		return (DDI_FAILURE);
2470 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2471 	    !e_pm_valid_power(dip, comp, level))
2472 		return (DDI_FAILURE);
2473 
2474 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2475 }
2476 
2477 int
2478 pm_lower_power(dev_info_t *dip, int comp, int level)
2479 {
2480 	PMD_FUNC(pmf, "pm_lower_power")
2481 
2482 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2483 	    !e_pm_valid_power(dip, comp, level)) {
2484 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2485 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2486 		return (DDI_FAILURE);
2487 	}
2488 
2489 	if (!DEVI_IS_DETACHING(dip)) {
2490 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2491 		    pmf, PM_DEVICE(dip)))
2492 		return (DDI_FAILURE);
2493 	}
2494 
2495 	/*
2496 	 * If we don't care about saving power, or we're treating this node
2497 	 * specially, then this is a no-op
2498 	 */
2499 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2500 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2501 		    pmf, PM_DEVICE(dip),
2502 		    !autopm_enabled ? "!autopm_enabled " : "",
2503 		    !PM_CPUPM_ENABLED ? "!cpupm_enabled " : "",
2504 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2505 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2506 		return (DDI_SUCCESS);
2507 	}
2508 
2509 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2510 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2511 		    PM_DEVICE(dip)))
2512 		return (DDI_FAILURE);
2513 	}
2514 	return (DDI_SUCCESS);
2515 }
2516 
2517 /*
2518  * Find the entries struct for a given dip in the blocked list, return it locked
2519  */
2520 static psce_t *
2521 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2522 {
2523 	pscc_t *p;
2524 	psce_t *psce;
2525 
2526 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2527 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2528 		if (p->pscc_dip == dip) {
2529 			*psccp = p;
2530 			psce = p->pscc_entries;
2531 			mutex_enter(&psce->psce_lock);
2532 			ASSERT(psce);
2533 			rw_exit(&pm_pscc_direct_rwlock);
2534 			return (psce);
2535 		}
2536 	}
2537 	rw_exit(&pm_pscc_direct_rwlock);
2538 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2539 	/*NOTREACHED*/
2540 }
2541 
2542 /*
2543  * Write an entry indicating a power level change (to be passed to a process
2544  * later) in the given psce.
2545  * If we were called in the path that brings up the console fb in the
2546  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2547  * we create a record that has a size of -1, a physaddr of NULL, and that
2548  * has the overflow flag set.
2549  */
2550 static int
2551 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2552     int old, int which, pm_canblock_t canblock)
2553 {
2554 	char	buf[MAXNAMELEN];
2555 	pm_state_change_t *p;
2556 	size_t	size;
2557 	caddr_t physpath = NULL;
2558 	int	overrun = 0;
2559 
2560 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2561 	(void) ddi_pathname(dip, buf);
2562 	size = strlen(buf) + 1;
2563 	p = psce->psce_in;
2564 	if (canblock == PM_CANBLOCK_BYPASS) {
2565 		physpath = kmem_alloc(size, KM_NOSLEEP);
2566 		if (physpath == NULL) {
2567 			/*
2568 			 * mark current entry as overrun
2569 			 */
2570 			p->flags |= PSC_EVENT_LOST;
2571 			size = (size_t)-1;
2572 		}
2573 	} else
2574 		physpath = kmem_alloc(size, KM_SLEEP);
2575 	if (p->size) {	/* overflow; mark the next entry */
2576 		if (p->size != (size_t)-1)
2577 			kmem_free(p->physpath, p->size);
2578 		ASSERT(psce->psce_out == p);
2579 		if (p == psce->psce_last) {
2580 			psce->psce_first->flags |= PSC_EVENT_LOST;
2581 			psce->psce_out = psce->psce_first;
2582 		} else {
2583 			(p + 1)->flags |= PSC_EVENT_LOST;
2584 			psce->psce_out = (p + 1);
2585 		}
2586 		overrun++;
2587 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2588 		p->flags |= PSC_EVENT_LOST;
2589 		p->size = 0;
2590 		p->physpath = NULL;
2591 	}
2592 	if (which == PSC_INTEREST) {
2593 		mutex_enter(&pm_compcnt_lock);
2594 		if (pm_comps_notlowest == 0)
2595 			p->flags |= PSC_ALL_LOWEST;
2596 		else
2597 			p->flags &= ~PSC_ALL_LOWEST;
2598 		mutex_exit(&pm_compcnt_lock);
2599 	}
2600 	p->event = event;
2601 	p->timestamp = gethrestime_sec();
2602 	p->component = comp;
2603 	p->old_level = old;
2604 	p->new_level = new;
2605 	p->physpath = physpath;
2606 	p->size = size;
2607 	if (physpath != NULL)
2608 		(void) strcpy(p->physpath, buf);
2609 	if (p == psce->psce_last)
2610 		psce->psce_in = psce->psce_first;
2611 	else
2612 		psce->psce_in = ++p;
2613 	mutex_exit(&psce->psce_lock);
2614 	return (overrun);
2615 }
2616 
2617 /*
2618  * Find the next entry on the interest list.  We keep a pointer to the item we
2619  * last returned in the user's cooke.  Returns a locked entries struct.
2620  */
2621 static psce_t *
2622 psc_interest(void **cookie, pscc_t **psccp)
2623 {
2624 	pscc_t *pscc;
2625 	pscc_t **cookiep = (pscc_t **)cookie;
2626 
2627 	if (*cookiep == NULL)
2628 		pscc = pm_pscc_interest;
2629 	else
2630 		pscc = (*cookiep)->pscc_next;
2631 	if (pscc) {
2632 		*cookiep = pscc;
2633 		*psccp = pscc;
2634 		mutex_enter(&pscc->pscc_entries->psce_lock);
2635 		return (pscc->pscc_entries);
2636 	} else {
2637 		return (NULL);
2638 	}
2639 }
2640 
2641 /*
2642  * Create an entry for a process to pick up indicating a power level change.
2643  */
2644 static void
2645 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2646     int newlevel, int oldlevel, pm_canblock_t canblock)
2647 {
2648 	PMD_FUNC(pmf, "enqueue_notify")
2649 	pscc_t	*pscc;
2650 	psce_t	*psce;
2651 	void		*cookie = NULL;
2652 	int	overrun;
2653 
2654 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2655 	switch (cmd) {
2656 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2657 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2658 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2659 		psce = pm_psc_dip_to_direct(dip, &pscc);
2660 		ASSERT(psce);
2661 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2662 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2663 		    pm_poll_cnt[pscc->pscc_clone]))
2664 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2665 		    PSC_DIRECT, canblock);
2666 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2667 		mutex_enter(&pm_clone_lock);
2668 		if (!overrun)
2669 			pm_poll_cnt[pscc->pscc_clone]++;
2670 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2671 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2672 		mutex_exit(&pm_clone_lock);
2673 		break;
2674 	case PSC_HAS_CHANGED:
2675 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2676 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2677 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2678 			psce = pm_psc_dip_to_direct(dip, &pscc);
2679 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2680 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2681 			    pm_poll_cnt[pscc->pscc_clone]))
2682 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2683 			    oldlevel, PSC_DIRECT, canblock);
2684 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2685 			mutex_enter(&pm_clone_lock);
2686 			if (!overrun)
2687 				pm_poll_cnt[pscc->pscc_clone]++;
2688 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2689 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2690 			mutex_exit(&pm_clone_lock);
2691 		}
2692 		mutex_enter(&pm_clone_lock);
2693 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2694 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2695 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2696 			    oldlevel, PSC_INTEREST, canblock);
2697 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2698 		}
2699 		rw_exit(&pm_pscc_interest_rwlock);
2700 		mutex_exit(&pm_clone_lock);
2701 		break;
2702 #ifdef DEBUG
2703 	default:
2704 		ASSERT(0);
2705 #endif
2706 	}
2707 }
2708 
2709 static void
2710 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2711 {
2712 	if (listp) {
2713 		pm_ppm_devlist_t *p, *next = NULL;
2714 
2715 		for (p = *listp; p; p = next) {
2716 			next = p->ppd_next;
2717 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2718 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2719 			    canblock);
2720 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2721 		}
2722 		*listp = NULL;
2723 	}
2724 }
2725 
2726 /*
2727  * Try to get the power locks of the parent node and target (child)
2728  * node.  Return true if successful (with both locks held) or false
2729  * (with no locks held).
2730  */
2731 static int
2732 pm_try_parent_child_locks(dev_info_t *pdip,
2733     dev_info_t *dip, int *pcircp, int *circp)
2734 {
2735 	if (ndi_devi_tryenter(pdip, pcircp))
2736 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2737 			return (1);
2738 		} else {
2739 			ndi_devi_exit(pdip, *pcircp);
2740 		}
2741 	return (0);
2742 }
2743 
2744 /*
2745  * Determine if the power lock owner is blocked by current thread.
2746  * returns :
2747  * 	1 - If the thread owning the effective power lock (the first lock on
2748  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2749  *          a mutex held by the current thread.
2750  *
2751  *	0 - otherwise
2752  *
2753  * Note : This function is called by pm_power_has_changed to determine whether
2754  * it is executing in parallel with pm_set_power.
2755  */
2756 static int
2757 pm_blocked_by_us(dev_info_t *dip)
2758 {
2759 	power_req_t power_req;
2760 	kthread_t *owner;
2761 	int result;
2762 	kmutex_t *mp;
2763 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2764 
2765 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2766 	power_req.req.ppm_power_lock_owner_req.who = dip;
2767 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2768 	    DDI_SUCCESS) {
2769 		/*
2770 		 * It is assumed that if the device is claimed by ppm, ppm
2771 		 * will always implement this request type and it'll always
2772 		 * return success. We panic here, if it fails.
2773 		 */
2774 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2775 		    PM_DEVICE(dip));
2776 		/*NOTREACHED*/
2777 	}
2778 
2779 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2780 	    owner->t_state == TS_SLEEP &&
2781 	    owner->t_sobj_ops &&
2782 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2783 	    (mp = (kmutex_t *)owner->t_wchan) &&
2784 	    mutex_owner(mp) == curthread)
2785 		return (1);
2786 
2787 	return (0);
2788 }
2789 
2790 /*
2791  * Notify parent which wants to hear about a child's power changes.
2792  */
2793 static void
2794 pm_notify_parent(dev_info_t *dip,
2795     dev_info_t *pdip, int comp, int old_level, int level)
2796 {
2797 	pm_bp_has_changed_t bphc;
2798 	pm_sp_misc_t pspm;
2799 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2800 	int result = DDI_SUCCESS;
2801 
2802 	bphc.bphc_dip = dip;
2803 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2804 	bphc.bphc_comp = comp;
2805 	bphc.bphc_olevel = old_level;
2806 	bphc.bphc_nlevel = level;
2807 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2808 	pspm.pspm_scan = 0;
2809 	bphc.bphc_private = &pspm;
2810 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2811 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2812 	kmem_free(pathbuf, MAXPATHLEN);
2813 }
2814 
2815 /*
2816  * Check if we need to resume a BC device, and make the attach call as required.
2817  */
2818 static int
2819 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2820 {
2821 	int ret = DDI_SUCCESS;
2822 
2823 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2824 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2825 		/* ppm is not interested in DDI_PM_RESUME */
2826 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2827 			/* XXX Should we mark it resumed, */
2828 			/* even though it failed? */
2829 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2830 			    PM_NAME(dip), PM_ADDR(dip));
2831 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2832 	}
2833 
2834 	return (ret);
2835 }
2836 
2837 /*
2838  * Tests outside the lock to see if we should bother to enqueue an entry
2839  * for any watching process.  If yes, then caller will take the lock and
2840  * do the full protocol
2841  */
2842 static int
2843 pm_watchers()
2844 {
2845 	if (pm_processes_stopped)
2846 		return (0);
2847 	return (pm_pscc_direct || pm_pscc_interest);
2848 }
2849 
2850 /*
2851  * A driver is reporting that the power of one of its device's components
2852  * has changed.  Update the power state accordingly.
2853  */
2854 int
2855 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2856 {
2857 	PMD_FUNC(pmf, "pm_power_has_changed")
2858 	int ret;
2859 	dev_info_t *pdip = ddi_get_parent(dip);
2860 	struct pm_component *cp;
2861 	int blocked, circ, pcirc, old_level;
2862 	static int pm_phc_impl(dev_info_t *, int, int, int);
2863 
2864 	if (level < 0) {
2865 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2866 		    PM_DEVICE(dip), level))
2867 		return (DDI_FAILURE);
2868 	}
2869 
2870 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2871 	    PM_DEVICE(dip), comp, level))
2872 
2873 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2874 	    !e_pm_valid_power(dip, comp, level))
2875 		return (DDI_FAILURE);
2876 
2877 	/*
2878 	 * A driver thread calling pm_power_has_changed and another thread
2879 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2880 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2881 	 * this specific deadlock.  If we can't get the lock immediately
2882 	 * and we are deadlocked, just update the component's level, do
2883 	 * notifications, and return.  We intend to update the total power
2884 	 * state later (if the other thread fails to set power to the
2885 	 * desired level).  If we were called because of a power change on a
2886 	 * component that isn't involved in a set_power op, update all state
2887 	 * immediately.
2888 	 */
2889 	cp = PM_CP(dip, comp);
2890 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2891 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2892 		    (cp->pmc_flags & PM_POWER_OP)) {
2893 			if (pm_watchers()) {
2894 				mutex_enter(&pm_rsvp_lock);
2895 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2896 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2897 				mutex_exit(&pm_rsvp_lock);
2898 			}
2899 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2900 				pm_notify_parent(dip,
2901 				    pdip, comp, cur_power(cp), level);
2902 			(void) pm_check_and_resume(dip,
2903 			    comp, cur_power(cp), level);
2904 
2905 			/*
2906 			 * Stash the old power index, update curpwr, and flag
2907 			 * that the total power state needs to be synched.
2908 			 */
2909 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2910 			/*
2911 			 * Several pm_power_has_changed calls could arrive
2912 			 * while the set power path remains blocked.  Keep the
2913 			 * oldest old power and the newest new power of any
2914 			 * sequence of phc calls which arrive during deadlock.
2915 			 */
2916 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2917 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2918 			cp->pmc_cur_pwr =
2919 			    pm_level_to_index(dip, cp, level);
2920 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2921 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2922 			return (DDI_SUCCESS);
2923 		} else
2924 			if (blocked) {	/* blocked, but different cmpt? */
2925 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2926 					cmn_err(CE_NOTE,
2927 					    "!pm: parent kuc not updated due "
2928 					    "to possible deadlock.\n");
2929 					return (pm_phc_impl(dip,
2930 					    comp, level, 1));
2931 				}
2932 				old_level = cur_power(cp);
2933 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2934 				    (!PM_ISBC(dip) || comp == 0) &&
2935 				    POWERING_ON(old_level, level))
2936 					pm_hold_power(pdip);
2937 				ret = pm_phc_impl(dip, comp, level, 1);
2938 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2939 					if ((!PM_ISBC(dip) ||
2940 					    comp == 0) && level == 0 &&
2941 					    old_level != PM_LEVEL_UNKNOWN)
2942 						pm_rele_power(pdip);
2943 				}
2944 				ndi_devi_exit(pdip, pcirc);
2945 				/* child lock not held: deadlock */
2946 				return (ret);
2947 			}
2948 		delay(1);
2949 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
2950 	}
2951 
2952 	/* non-deadlock case */
2953 	old_level = cur_power(cp);
2954 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2955 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
2956 		pm_hold_power(pdip);
2957 	ret = pm_phc_impl(dip, comp, level, 1);
2958 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2959 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
2960 		    old_level != PM_LEVEL_UNKNOWN)
2961 			pm_rele_power(pdip);
2962 	}
2963 	PM_UNLOCK_POWER(dip, circ);
2964 	ndi_devi_exit(pdip, pcirc);
2965 	return (ret);
2966 }
2967 
2968 /*
2969  * Account for power changes to a component of the the console frame buffer.
2970  * If lowering power from full (or "unkown", which is treatd as full)
2971  * we will increment the "components off" count of the fb device.
2972  * Subsequent lowering of the same component doesn't affect the count.  If
2973  * raising a component back to full power, we will decrement the count.
2974  *
2975  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
2976  */
2977 static int
2978 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
2979 {
2980 	struct pm_component *cp = PM_CP(dip, cmpt);
2981 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
2982 	int want_normal = (new == cp->pmc_norm_pwr);
2983 	int incr = 0;
2984 
2985 	if (on && !want_normal)
2986 		incr = 1;
2987 	else if (!on && want_normal)
2988 		incr = -1;
2989 	return (incr);
2990 }
2991 
2992 /*
2993  * Adjust the count of console frame buffer components < full power.
2994  */
2995 static void
2996 update_comps_off(int incr, dev_info_t *dip)
2997 {
2998 		mutex_enter(&pm_cfb_lock);
2999 		pm_cfb_comps_off += incr;
3000 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3001 		mutex_exit(&pm_cfb_lock);
3002 }
3003 
3004 /*
3005  * Update the power state in the framework (via the ppm).  The 'notify'
3006  * argument tells whether to notify watchers.  Power lock is already held.
3007  */
3008 static int
3009 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3010 {
3011 	PMD_FUNC(pmf, "phc_impl")
3012 	power_req_t power_req;
3013 	int i, dodeps = 0;
3014 	dev_info_t *pdip = ddi_get_parent(dip);
3015 	int result;
3016 	int old_level;
3017 	struct pm_component *cp;
3018 	int incr = 0;
3019 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3020 	int work_type = 0;
3021 	char *pathbuf;
3022 
3023 	/* Must use "official" power level for this test. */
3024 	cp = PM_CP(dip, comp);
3025 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3026 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3027 	if (old_level != PM_LEVEL_UNKNOWN)
3028 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3029 
3030 	if (level == old_level) {
3031 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3032 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3033 		return (DDI_SUCCESS);
3034 	}
3035 
3036 	/*
3037 	 * Tell ppm about this.
3038 	 */
3039 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3040 	power_req.req.ppm_notify_level_req.who = dip;
3041 	power_req.req.ppm_notify_level_req.cmpt = comp;
3042 	power_req.req.ppm_notify_level_req.new_level = level;
3043 	power_req.req.ppm_notify_level_req.old_level = old_level;
3044 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3045 	    &result) == DDI_FAILURE) {
3046 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3047 		    pmf, PM_DEVICE(dip), level))
3048 		return (DDI_FAILURE);
3049 	}
3050 
3051 	if (PM_IS_CFB(dip)) {
3052 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3053 
3054 		if (incr) {
3055 			update_comps_off(incr, dip);
3056 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3057 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3058 			    comp, old_level, level, pm_cfb_comps_off))
3059 		}
3060 	}
3061 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3062 	result = DDI_SUCCESS;
3063 
3064 	if (notify) {
3065 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3066 			pm_notify_parent(dip, pdip, comp, old_level, level);
3067 		(void) pm_check_and_resume(dip, comp, old_level, level);
3068 	}
3069 
3070 	/*
3071 	 * Decrement the dependency kidsup count if we turn a device
3072 	 * off.
3073 	 */
3074 	if (POWERING_OFF(old_level, level)) {
3075 		dodeps = 1;
3076 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3077 			cp = PM_CP(dip, i);
3078 			if (cur_power(cp)) {
3079 				dodeps = 0;
3080 				break;
3081 			}
3082 		}
3083 		if (dodeps)
3084 			work_type = PM_DEP_WK_POWER_OFF;
3085 	}
3086 
3087 	/*
3088 	 * Increment if we turn it on. Check to see
3089 	 * if other comps are already on, if so,
3090 	 * dont increment.
3091 	 */
3092 	if (POWERING_ON(old_level, level)) {
3093 		dodeps = 1;
3094 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3095 			cp = PM_CP(dip, i);
3096 			if (comp == i)
3097 				continue;
3098 			/* -1 also treated as 0 in this case */
3099 			if (cur_power(cp) > 0) {
3100 				dodeps = 0;
3101 				break;
3102 			}
3103 		}
3104 		if (dodeps)
3105 			work_type = PM_DEP_WK_POWER_ON;
3106 	}
3107 
3108 	if (dodeps) {
3109 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3110 		(void) ddi_pathname(dip, pathbuf);
3111 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3112 		    PM_DEP_NOWAIT, NULL, 0);
3113 		kmem_free(pathbuf, MAXPATHLEN);
3114 	}
3115 
3116 	if (notify && (level != old_level) && pm_watchers()) {
3117 		mutex_enter(&pm_rsvp_lock);
3118 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3119 		    PM_CANBLOCK_BLOCK);
3120 		mutex_exit(&pm_rsvp_lock);
3121 	}
3122 
3123 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3124 	pm_rescan(dip);
3125 	return (DDI_SUCCESS);
3126 }
3127 
3128 /*
3129  * This function is called at startup time to notify pm of the existence
3130  * of any platform power managers for this platform.  As a result of
3131  * this registration, each function provided will be called each time
3132  * a device node is attached, until one returns true, and it must claim the
3133  * device node (by returning non-zero) if it wants to be involved in the
3134  * node's power management.  If it does claim the node, then it will
3135  * subsequently be notified of attach and detach events.
3136  *
3137  */
3138 
3139 int
3140 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3141 {
3142 	PMD_FUNC(pmf, "register_ppm")
3143 	struct ppm_callbacks *ppmcp;
3144 	pm_component_t *cp;
3145 	int i, pwr, result, circ;
3146 	power_req_t power_req;
3147 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3148 	void pm_ppm_claim(dev_info_t *);
3149 
3150 	mutex_enter(&ppm_lock);
3151 	ppmcp = ppm_callbacks;
3152 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3153 		if (ppmcp->ppmc_func == NULL) {
3154 			ppmcp->ppmc_func = func;
3155 			ppmcp->ppmc_dip = dip;
3156 			break;
3157 		}
3158 	}
3159 	mutex_exit(&ppm_lock);
3160 
3161 	if (i >= MAX_PPM_HANDLERS)
3162 		return (DDI_FAILURE);
3163 	while ((dip = ddi_get_parent(dip)) != NULL) {
3164 		if (PM_GET_PM_INFO(dip) == NULL)
3165 			continue;
3166 		pm_ppm_claim(dip);
3167 		if (pm_ppm_claimed(dip)) {
3168 			/*
3169 			 * Tell ppm about this.
3170 			 */
3171 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3172 			p->old_level = PM_LEVEL_UNKNOWN;
3173 			p->who = dip;
3174 			PM_LOCK_POWER(dip, &circ);
3175 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3176 				cp = PM_CP(dip, i);
3177 				pwr = cp->pmc_cur_pwr;
3178 				if (pwr != PM_LEVEL_UNKNOWN) {
3179 					p->cmpt = i;
3180 					p->new_level = cur_power(cp);
3181 					p->old_level = PM_LEVEL_UNKNOWN;
3182 					if (pm_ctlops(PPM(dip), dip,
3183 					    DDI_CTLOPS_POWER, &power_req,
3184 					    &result) == DDI_FAILURE) {
3185 						PMD(PMD_FAIL, ("%s: pc "
3186 						    "%s@%s(%s#%d) to %d "
3187 						    "fails\n", pmf,
3188 						    PM_DEVICE(dip), pwr))
3189 					}
3190 				}
3191 			}
3192 			PM_UNLOCK_POWER(dip, circ);
3193 		}
3194 	}
3195 	return (DDI_SUCCESS);
3196 }
3197 
3198 /*
3199  * Call the ppm's that have registered and adjust the devinfo struct as
3200  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3201  * by each ppm are assumed to be disjoint.
3202  */
3203 void
3204 pm_ppm_claim(dev_info_t *dip)
3205 {
3206 	struct ppm_callbacks *ppmcp;
3207 
3208 	if (PPM(dip)) {
3209 		return;
3210 	}
3211 	mutex_enter(&ppm_lock);
3212 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3213 		if ((*ppmcp->ppmc_func)(dip)) {
3214 			DEVI(dip)->devi_pm_ppm =
3215 			    (struct dev_info *)ppmcp->ppmc_dip;
3216 			mutex_exit(&ppm_lock);
3217 			return;
3218 		}
3219 	}
3220 	mutex_exit(&ppm_lock);
3221 }
3222 
3223 /*
3224  * Node is being detached so stop autopm until we see if it succeeds, in which
3225  * case pm_stop will be called.  For backwards compatible devices we bring the
3226  * device up to full power on the assumption the detach will succeed.
3227  */
3228 void
3229 pm_detaching(dev_info_t *dip)
3230 {
3231 	PMD_FUNC(pmf, "detaching")
3232 	pm_info_t *info = PM_GET_PM_INFO(dip);
3233 	int iscons;
3234 
3235 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3236 	    PM_NUMCMPTS(dip)))
3237 	if (info == NULL)
3238 		return;
3239 	ASSERT(DEVI_IS_DETACHING(dip));
3240 	PM_LOCK_DIP(dip);
3241 	info->pmi_dev_pm_state |= PM_DETACHING;
3242 	PM_UNLOCK_DIP(dip);
3243 	if (!PM_ISBC(dip))
3244 		pm_scan_stop(dip);
3245 
3246 	/*
3247 	 * console and old-style devices get brought up when detaching.
3248 	 */
3249 	iscons = PM_IS_CFB(dip);
3250 	if (iscons || PM_ISBC(dip)) {
3251 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3252 		if (iscons) {
3253 			mutex_enter(&pm_cfb_lock);
3254 			while (cfb_inuse) {
3255 				mutex_exit(&pm_cfb_lock);
3256 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3257 				delay(1);
3258 				mutex_enter(&pm_cfb_lock);
3259 			}
3260 			ASSERT(cfb_dip_detaching == NULL);
3261 			ASSERT(cfb_dip);
3262 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3263 			cfb_dip = NULL;
3264 			mutex_exit(&pm_cfb_lock);
3265 		}
3266 	}
3267 }
3268 
3269 /*
3270  * Node failed to detach.  If it used to be autopm'd, make it so again.
3271  */
3272 void
3273 pm_detach_failed(dev_info_t *dip)
3274 {
3275 	PMD_FUNC(pmf, "detach_failed")
3276 	pm_info_t *info = PM_GET_PM_INFO(dip);
3277 	int pm_all_at_normal(dev_info_t *);
3278 
3279 	if (info == NULL)
3280 		return;
3281 	ASSERT(DEVI_IS_DETACHING(dip));
3282 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3283 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3284 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3285 			/* Make sure the operation is still needed */
3286 			if (!pm_all_at_normal(dip)) {
3287 				if (pm_all_to_normal(dip,
3288 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3289 					PMD(PMD_ERROR, ("%s: could not bring "
3290 					    "%s@%s(%s#%d) to normal\n", pmf,
3291 					    PM_DEVICE(dip)))
3292 				}
3293 			}
3294 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3295 		}
3296 	}
3297 	if (!PM_ISBC(dip)) {
3298 		mutex_enter(&pm_scan_lock);
3299 		if (PM_SCANABLE(dip))
3300 			pm_scan_init(dip);
3301 		mutex_exit(&pm_scan_lock);
3302 		pm_rescan(dip);
3303 	}
3304 }
3305 
3306 /* generic Backwards Compatible component */
3307 static char *bc_names[] = {"off", "on"};
3308 
3309 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3310 
3311 static void
3312 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3313 {
3314 	pm_comp_t *pmc;
3315 	pmc = &cp->pmc_comp;
3316 	pmc->pmc_numlevels = 2;
3317 	pmc->pmc_lvals[0] = 0;
3318 	pmc->pmc_lvals[1] = norm;
3319 	e_pm_set_cur_pwr(dip, cp, norm);
3320 }
3321 
3322 static void
3323 e_pm_default_components(dev_info_t *dip, int cmpts)
3324 {
3325 	int i;
3326 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3327 
3328 	p = DEVI(dip)->devi_pm_components;
3329 	for (i = 0; i < cmpts; i++, p++) {
3330 		p->pmc_comp = bc_comp;	/* struct assignment */
3331 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3332 		    KM_SLEEP);
3333 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3334 		    KM_SLEEP);
3335 		p->pmc_comp.pmc_numlevels = 2;
3336 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3337 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3338 	}
3339 }
3340 
3341 /*
3342  * Called from functions that require components to exist already to allow
3343  * for their creation by parsing the pm-components property.
3344  * Device will not be power managed as a result of this call
3345  * No locking needed because we're single threaded by the ndi_devi_enter
3346  * done while attaching, and the device isn't visible until after it has
3347  * attached
3348  */
3349 int
3350 pm_premanage(dev_info_t *dip, int style)
3351 {
3352 	PMD_FUNC(pmf, "premanage")
3353 	pm_comp_t	*pcp, *compp;
3354 	int		cmpts, i, norm, error;
3355 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3356 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3357 
3358 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3359 	/*
3360 	 * If this dip has already been processed, don't mess with it
3361 	 */
3362 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3363 		return (DDI_SUCCESS);
3364 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3365 		return (DDI_FAILURE);
3366 	}
3367 	/*
3368 	 * Look up pm-components property and create components accordingly
3369 	 * If that fails, fall back to backwards compatibility
3370 	 */
3371 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3372 		/*
3373 		 * If error is set, the property existed but was not well formed
3374 		 */
3375 		if (error || (style == PM_STYLE_NEW)) {
3376 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3377 			return (DDI_FAILURE);
3378 		}
3379 		/*
3380 		 * If they don't have the pm-components property, then we
3381 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3382 		 * behavior driver must have called pm_create_components, and
3383 		 * we need to flesh out dummy components
3384 		 */
3385 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3386 			/*
3387 			 * Not really failure, but we don't want the
3388 			 * caller to treat it as success
3389 			 */
3390 			return (DDI_FAILURE);
3391 		}
3392 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3393 		e_pm_default_components(dip, cmpts);
3394 		for (i = 0; i < cmpts; i++) {
3395 			/*
3396 			 * if normal power not set yet, we don't really know
3397 			 * what *ANY* of the power values are.  If normal
3398 			 * power is set, then we assume for this backwards
3399 			 * compatible case that the values are 0, normal power.
3400 			 */
3401 			norm = pm_get_normal_power(dip, i);
3402 			if (norm == (uint_t)-1) {
3403 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3404 				    PM_DEVICE(dip), i))
3405 				return (DDI_FAILURE);
3406 			}
3407 			/*
3408 			 * Components of BC devices start at their normal power,
3409 			 * so count them to be not at their lowest power.
3410 			 */
3411 			PM_INCR_NOTLOWEST(dip);
3412 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3413 		}
3414 	} else {
3415 		/*
3416 		 * e_pm_create_components was called from pm_autoconfig(), it
3417 		 * creates components with no descriptions (or known levels)
3418 		 */
3419 		cmpts = PM_NUMCMPTS(dip);
3420 		ASSERT(cmpts != 0);
3421 		pcp = compp;
3422 		p = DEVI(dip)->devi_pm_components;
3423 		for (i = 0; i < cmpts; i++, p++) {
3424 			p->pmc_comp = *pcp++;   /* struct assignment */
3425 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3426 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3427 		}
3428 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3429 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3430 			    PMC_CPU_THRESH);
3431 		else
3432 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3433 			    PMC_DEF_THRESH);
3434 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3435 	}
3436 	return (DDI_SUCCESS);
3437 }
3438 
3439 /*
3440  * Called from during or after the device's attach to let us know it is ready
3441  * to play autopm.   Look up the pm model and manage the device accordingly.
3442  * Returns system call errno value.
3443  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3444  * a little cleaner
3445  *
3446  * Called with dip lock held, return with dip lock unheld.
3447  */
3448 
3449 int
3450 e_pm_manage(dev_info_t *dip, int style)
3451 {
3452 	PMD_FUNC(pmf, "e_manage")
3453 	pm_info_t	*info;
3454 	dev_info_t	*pdip = ddi_get_parent(dip);
3455 	int	pm_thresh_specd(dev_info_t *);
3456 	int	count;
3457 	char	*pathbuf;
3458 
3459 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3460 		return (DDI_FAILURE);
3461 	}
3462 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3463 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3464 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3465 
3466 	/*
3467 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3468 	 * out at their normal power, so they are "up", others start out
3469 	 * unknown, which is effectively "up".  Parent which want notification
3470 	 * get kidsupcnt of 0 always.
3471 	 */
3472 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3473 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3474 		e_pm_hold_rele_power(pdip, count);
3475 
3476 	pm_set_pm_info(dip, info);
3477 	/*
3478 	 * Apply any recorded thresholds
3479 	 */
3480 	(void) pm_thresh_specd(dip);
3481 
3482 	/*
3483 	 * Do dependency processing.
3484 	 */
3485 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3486 	(void) ddi_pathname(dip, pathbuf);
3487 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3488 	    PM_DEP_NOWAIT, NULL, 0);
3489 	kmem_free(pathbuf, MAXPATHLEN);
3490 
3491 	if (!PM_ISBC(dip)) {
3492 		mutex_enter(&pm_scan_lock);
3493 		if (PM_SCANABLE(dip)) {
3494 			pm_scan_init(dip);
3495 			mutex_exit(&pm_scan_lock);
3496 			pm_rescan(dip);
3497 		} else {
3498 			mutex_exit(&pm_scan_lock);
3499 		}
3500 	}
3501 	return (0);
3502 }
3503 
3504 /*
3505  * This is the obsolete exported interface for a driver to find out its
3506  * "normal" (max) power.
3507  * We only get components destroyed while no power management is
3508  * going on (and the device is detached), so we don't need a mutex here
3509  */
3510 int
3511 pm_get_normal_power(dev_info_t *dip, int comp)
3512 {
3513 
3514 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3515 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3516 	}
3517 	return (DDI_FAILURE);
3518 }
3519 
3520 /*
3521  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3522  */
3523 int
3524 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3525 {
3526 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3527 		*levelp = PM_CURPOWER(dip, comp);
3528 		return (DDI_SUCCESS);
3529 	}
3530 	return (DDI_FAILURE);
3531 }
3532 
3533 /*
3534  * Returns current threshold of indicated component
3535  */
3536 static int
3537 cur_threshold(dev_info_t *dip, int comp)
3538 {
3539 	pm_component_t *cp = PM_CP(dip, comp);
3540 	int pwr;
3541 
3542 	if (PM_ISBC(dip)) {
3543 		/*
3544 		 * backwards compatible nodes only have one threshold
3545 		 */
3546 		return (cp->pmc_comp.pmc_thresh[1]);
3547 	}
3548 	pwr = cp->pmc_cur_pwr;
3549 	if (pwr == PM_LEVEL_UNKNOWN) {
3550 		int thresh;
3551 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3552 			thresh = pm_default_nexus_threshold;
3553 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3554 			thresh = pm_cpu_idle_threshold;
3555 		else
3556 			thresh = pm_system_idle_threshold;
3557 		return (thresh);
3558 	}
3559 	ASSERT(cp->pmc_comp.pmc_thresh);
3560 	return (cp->pmc_comp.pmc_thresh[pwr]);
3561 }
3562 
3563 /*
3564  * Compute next lower component power level given power index.
3565  */
3566 static int
3567 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3568 {
3569 	int nxt_pwr;
3570 
3571 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3572 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3573 	} else {
3574 		pwrndx--;
3575 		ASSERT(pwrndx >= 0);
3576 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3577 	}
3578 	return (nxt_pwr);
3579 }
3580 
3581 /*
3582  * Update the maxpower (normal) power of a component. Note that the
3583  * component's power level is only changed if it's current power level
3584  * is higher than the new max power.
3585  */
3586 int
3587 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3588 {
3589 	PMD_FUNC(pmf, "update_maxpower")
3590 	int old;
3591 	int result;
3592 
3593 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3594 	    !e_pm_valid_power(dip, comp, level)) {
3595 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3596 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3597 		return (DDI_FAILURE);
3598 	}
3599 	old = e_pm_get_max_power(dip, comp);
3600 	e_pm_set_max_power(dip, comp, level);
3601 
3602 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3603 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3604 		e_pm_set_max_power(dip, comp, old);
3605 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3606 		    PM_DEVICE(dip)))
3607 		return (DDI_FAILURE);
3608 	}
3609 	return (DDI_SUCCESS);
3610 }
3611 
3612 /*
3613  * Bring all components of device to normal power
3614  */
3615 int
3616 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3617 {
3618 	PMD_FUNC(pmf, "all_to_normal")
3619 	int		*normal;
3620 	int		i, ncomps, result;
3621 	size_t		size;
3622 	int		changefailed = 0;
3623 
3624 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3625 	ASSERT(PM_GET_PM_INFO(dip));
3626 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3627 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3628 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3629 		return (DDI_FAILURE);
3630 	}
3631 	ncomps = PM_NUMCMPTS(dip);
3632 	for (i = 0; i < ncomps; i++) {
3633 		if (pm_set_power(dip, i, normal[i],
3634 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3635 			changefailed++;
3636 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3637 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3638 			    PM_DEVICE(dip), i, normal[i], result))
3639 		}
3640 	}
3641 	kmem_free(normal, size);
3642 	if (changefailed) {
3643 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3644 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3645 		return (DDI_FAILURE);
3646 	}
3647 	return (DDI_SUCCESS);
3648 }
3649 
3650 /*
3651  * Returns true if all components of device are at normal power
3652  */
3653 int
3654 pm_all_at_normal(dev_info_t *dip)
3655 {
3656 	PMD_FUNC(pmf, "all_at_normal")
3657 	int		*normal;
3658 	int		i;
3659 	size_t		size;
3660 
3661 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3662 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3663 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3664 		return (DDI_FAILURE);
3665 	}
3666 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3667 		int current = PM_CURPOWER(dip, i);
3668 		if (normal[i] > current) {
3669 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3670 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3671 			    normal[i], current))
3672 			break;
3673 		}
3674 	}
3675 	kmem_free(normal, size);
3676 	if (i != PM_NUMCMPTS(dip)) {
3677 		return (0);
3678 	}
3679 	return (1);
3680 }
3681 
3682 static void
3683 bring_wekeeps_up(char *keeper)
3684 {
3685 	PMD_FUNC(pmf, "bring_wekeeps_up")
3686 	int i;
3687 	pm_pdr_t *dp;
3688 	pm_info_t *wku_info;
3689 	char *kept_path;
3690 	dev_info_t *kept;
3691 	static void bring_pmdep_up(dev_info_t *, int);
3692 
3693 	if (panicstr) {
3694 		return;
3695 	}
3696 	/*
3697 	 * We process the request even if the keeper detaches because
3698 	 * detach processing expects this to increment kidsupcnt of kept.
3699 	 */
3700 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3701 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3702 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3703 			continue;
3704 		for (i = 0; i < dp->pdr_kept_count; i++) {
3705 			kept_path = dp->pdr_kept_paths[i];
3706 			if (kept_path == NULL)
3707 				continue;
3708 			ASSERT(kept_path[0] != '\0');
3709 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3710 				continue;
3711 			wku_info = PM_GET_PM_INFO(kept);
3712 			if (wku_info == NULL) {
3713 				if (kept)
3714 					ddi_release_devi(kept);
3715 				continue;
3716 			}
3717 			/*
3718 			 * Don't mess with it if it is being detached, it isn't
3719 			 * safe to call its power entry point
3720 			 */
3721 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3722 				if (kept)
3723 					ddi_release_devi(kept);
3724 				continue;
3725 			}
3726 			bring_pmdep_up(kept, 1);
3727 			ddi_release_devi(kept);
3728 		}
3729 	}
3730 }
3731 
3732 /*
3733  * Bring up the 'kept' device passed as argument
3734  */
3735 static void
3736 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3737 {
3738 	PMD_FUNC(pmf, "bring_pmdep_up")
3739 	int is_all_at_normal = 0;
3740 
3741 	/*
3742 	 * If the kept device has been unmanaged, do nothing.
3743 	 */
3744 	if (!PM_GET_PM_INFO(kept_dip))
3745 		return;
3746 
3747 	/* Just ignore DIRECT PM device till they are released. */
3748 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3749 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3750 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3751 		    "controlling process did something else\n", pmf,
3752 		    PM_DEVICE(kept_dip)))
3753 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3754 		return;
3755 	}
3756 	/* if we got here the keeper had a transition from OFF->ON */
3757 	if (hold)
3758 		pm_hold_power(kept_dip);
3759 
3760 	if (!is_all_at_normal)
3761 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3762 }
3763 
3764 /*
3765  * A bunch of stuff that belongs only to the next routine (or two)
3766  */
3767 
3768 static const char namestr[] = "NAME=";
3769 static const int nameln = sizeof (namestr) - 1;
3770 static const char pmcompstr[] = "pm-components";
3771 
3772 struct pm_comp_pkg {
3773 	pm_comp_t		*comp;
3774 	struct pm_comp_pkg	*next;
3775 };
3776 
3777 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3778 
3779 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3780 			((ch) >= 'A' && (ch) <= 'F'))
3781 
3782 /*
3783  * Rather than duplicate this code ...
3784  * (this code excerpted from the function that follows it)
3785  */
3786 #define	FINISH_COMP { \
3787 	ASSERT(compp); \
3788 	compp->pmc_lnames_sz = size; \
3789 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3790 	compp->pmc_numlevels = level; \
3791 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3792 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3793 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3794 	/* copy string out of prop array into buffer */ \
3795 	for (j = 0; j < level; j++) { \
3796 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3797 		compp->pmc_lvals[j] = lvals[j]; \
3798 		(void) strcpy(tp, lnames[j]); \
3799 		compp->pmc_lnames[j] = tp; \
3800 		tp += lszs[j]; \
3801 	} \
3802 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3803 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3804 	}
3805 
3806 /*
3807  * Create (empty) component data structures.
3808  */
3809 static void
3810 e_pm_create_components(dev_info_t *dip, int num_components)
3811 {
3812 	struct pm_component *compp, *ocompp;
3813 	int i, size = 0;
3814 
3815 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3816 	ASSERT(!DEVI(dip)->devi_pm_components);
3817 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3818 	size = sizeof (struct pm_component) * num_components;
3819 
3820 	compp = kmem_zalloc(size, KM_SLEEP);
3821 	ocompp = compp;
3822 	DEVI(dip)->devi_pm_comp_size = size;
3823 	DEVI(dip)->devi_pm_num_components = num_components;
3824 	PM_LOCK_BUSY(dip);
3825 	for (i = 0; i < num_components;  i++) {
3826 		compp->pmc_timestamp = gethrestime_sec();
3827 		compp->pmc_norm_pwr = (uint_t)-1;
3828 		compp++;
3829 	}
3830 	PM_UNLOCK_BUSY(dip);
3831 	DEVI(dip)->devi_pm_components = ocompp;
3832 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3833 }
3834 
3835 /*
3836  * Parse hex or decimal value from char string
3837  */
3838 static char *
3839 pm_parsenum(char *cp, int *valp)
3840 {
3841 	int ch, offset;
3842 	char numbuf[256];
3843 	char *np = numbuf;
3844 	int value = 0;
3845 
3846 	ch = *cp++;
3847 	if (isdigit(ch)) {
3848 		if (ch == '0') {
3849 			if ((ch = *cp++) == 'x' || ch == 'X') {
3850 				ch = *cp++;
3851 				while (isxdigit(ch)) {
3852 					*np++ = (char)ch;
3853 					ch = *cp++;
3854 				}
3855 				*np = 0;
3856 				cp--;
3857 				goto hexval;
3858 			} else {
3859 				goto digit;
3860 			}
3861 		} else {
3862 digit:
3863 			while (isdigit(ch)) {
3864 				*np++ = (char)ch;
3865 				ch = *cp++;
3866 			}
3867 			*np = 0;
3868 			cp--;
3869 			goto decval;
3870 		}
3871 	} else
3872 		return (NULL);
3873 
3874 hexval:
3875 	for (np = numbuf; *np; np++) {
3876 		if (*np >= 'a' && *np <= 'f')
3877 			offset = 'a' - 10;
3878 		else if (*np >= 'A' && *np <= 'F')
3879 			offset = 'A' - 10;
3880 		else if (*np >= '0' && *np <= '9')
3881 			offset = '0';
3882 		value *= 16;
3883 		value += *np - offset;
3884 	}
3885 	*valp = value;
3886 	return (cp);
3887 
3888 decval:
3889 	offset = '0';
3890 	for (np = numbuf; *np; np++) {
3891 		value *= 10;
3892 		value += *np - offset;
3893 	}
3894 	*valp = value;
3895 	return (cp);
3896 }
3897 
3898 /*
3899  * Set max (previously documented as "normal") power.
3900  */
3901 static void
3902 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3903 {
3904 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3905 }
3906 
3907 /*
3908  * Get max (previously documented as "normal") power.
3909  */
3910 static int
3911 e_pm_get_max_power(dev_info_t *dip, int component_number)
3912 {
3913 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
3914 }
3915 
3916 /*
3917  * Internal routine for destroying components
3918  * It is called even when there might not be any, so it must be forgiving.
3919  */
3920 static void
3921 e_pm_destroy_components(dev_info_t *dip)
3922 {
3923 	int i;
3924 	struct pm_component *cp;
3925 
3926 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3927 	if (PM_NUMCMPTS(dip) == 0)
3928 		return;
3929 	cp = DEVI(dip)->devi_pm_components;
3930 	ASSERT(cp);
3931 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3932 		int nlevels = cp->pmc_comp.pmc_numlevels;
3933 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3934 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3935 		/*
3936 		 * For BC nodes, the rest is static in bc_comp, so skip it
3937 		 */
3938 		if (PM_ISBC(dip))
3939 			continue;
3940 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
3941 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
3942 		kmem_free(cp->pmc_comp.pmc_lname_buf,
3943 		    cp->pmc_comp.pmc_lnames_sz);
3944 	}
3945 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
3946 	DEVI(dip)->devi_pm_components = NULL;
3947 	DEVI(dip)->devi_pm_num_components = 0;
3948 	DEVI(dip)->devi_pm_flags &=
3949 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
3950 }
3951 
3952 /*
3953  * Read the pm-components property (if there is one) and use it to set up
3954  * components.  Returns a pointer to an array of component structures if
3955  * pm-components found and successfully parsed, else returns NULL.
3956  * Sets error return *errp to true to indicate a failure (as opposed to no
3957  * property being present).
3958  */
3959 pm_comp_t *
3960 pm_autoconfig(dev_info_t *dip, int *errp)
3961 {
3962 	PMD_FUNC(pmf, "autoconfig")
3963 	uint_t nelems;
3964 	char **pp;
3965 	pm_comp_t *compp = NULL;
3966 	int i, j, level, components = 0;
3967 	size_t size = 0;
3968 	struct pm_comp_pkg *p, *ptail;
3969 	struct pm_comp_pkg *phead = NULL;
3970 	int *lvals = NULL;
3971 	int *lszs = NULL;
3972 	int *np = NULL;
3973 	int npi = 0;
3974 	char **lnames = NULL;
3975 	char *cp, *tp;
3976 	pm_comp_t *ret = NULL;
3977 
3978 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3979 	*errp = 0;	/* assume success */
3980 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3981 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
3982 		return (NULL);
3983 	}
3984 
3985 	if (nelems < 3) {	/* need at least one name and two levels */
3986 		goto errout;
3987 	}
3988 
3989 	/*
3990 	 * pm_create_components is no longer allowed
3991 	 */
3992 	if (PM_NUMCMPTS(dip) != 0) {
3993 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
3994 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
3995 		goto errout;
3996 	}
3997 
3998 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3999 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4000 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4001 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4002 
4003 	level = 0;
4004 	phead = NULL;
4005 	for (i = 0; i < nelems; i++) {
4006 		cp = pp[i];
4007 		if (!isdigit(*cp)) {	/*  must be name */
4008 			if (strncmp(cp, namestr, nameln) != 0) {
4009 				goto errout;
4010 			}
4011 			if (i != 0) {
4012 				if (level == 0) {	/* no level spec'd */
4013 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4014 					    pmf))
4015 					goto errout;
4016 				}
4017 				np[npi++] = lvals[level - 1];
4018 				/* finish up previous component levels */
4019 				FINISH_COMP;
4020 			}
4021 			cp += nameln;
4022 			if (!*cp) {
4023 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4024 				goto errout;
4025 			}
4026 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4027 			if (phead == NULL) {
4028 				phead = ptail = p;
4029 			} else {
4030 				ptail->next = p;
4031 				ptail = p;
4032 			}
4033 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4034 			    KM_SLEEP);
4035 			compp->pmc_name_sz = strlen(cp) + 1;
4036 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4037 			    KM_SLEEP);
4038 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4039 			components++;
4040 			level = 0;
4041 		} else {	/* better be power level <num>=<name> */
4042 #ifdef DEBUG
4043 			tp = cp;
4044 #endif
4045 			if (i == 0 ||
4046 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4047 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4048 				goto errout;
4049 			}
4050 #ifdef DEBUG
4051 			tp = cp;
4052 #endif
4053 			if (*cp++ != '=' || !*cp) {
4054 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4055 				goto errout;
4056 			}
4057 
4058 			lszs[level] = strlen(cp) + 1;
4059 			size += lszs[level];
4060 			lnames[level] = cp;	/* points into prop string */
4061 			level++;
4062 		}
4063 	}
4064 	np[npi++] = lvals[level - 1];
4065 	if (level == 0) {	/* ended with a name */
4066 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4067 		goto errout;
4068 	}
4069 	FINISH_COMP;
4070 
4071 
4072 	/*
4073 	 * Now we have a list of components--we have to return instead an
4074 	 * array of them, but we can just copy the top level and leave
4075 	 * the rest as is
4076 	 */
4077 	(void) e_pm_create_components(dip, components);
4078 	for (i = 0; i < components; i++)
4079 		e_pm_set_max_power(dip, i, np[i]);
4080 
4081 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4082 	for (i = 0, p = phead; i < components; i++) {
4083 		ASSERT(p);
4084 		/*
4085 		 * Now sanity-check values:  levels must be monotonically
4086 		 * increasing
4087 		 */
4088 		if (p->comp->pmc_numlevels < 2) {
4089 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4090 			    "levels\n", pmf,
4091 			    p->comp->pmc_name, PM_DEVICE(dip),
4092 			    p->comp->pmc_numlevels))
4093 			goto errout;
4094 		}
4095 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4096 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4097 			    (p->comp->pmc_lvals[j] <=
4098 			    p->comp->pmc_lvals[j - 1]))) {
4099 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4100 				    "not mono. incr, %d follows %d\n", pmf,
4101 				    p->comp->pmc_name, PM_DEVICE(dip),
4102 				    p->comp->pmc_lvals[j],
4103 				    p->comp->pmc_lvals[j - 1]))
4104 				goto errout;
4105 			}
4106 		}
4107 		ret[i] = *p->comp;	/* struct assignment */
4108 		for (j = 0; j < i; j++) {
4109 			/*
4110 			 * Test for unique component names
4111 			 */
4112 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4113 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4114 				    "unique\n", pmf, ret[j].pmc_name,
4115 				    PM_DEVICE(dip)))
4116 				goto errout;
4117 			}
4118 		}
4119 		ptail = p;
4120 		p = p->next;
4121 		phead = p;	/* errout depends on phead making sense */
4122 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4123 		kmem_free(ptail, sizeof (*ptail));
4124 	}
4125 out:
4126 	ddi_prop_free(pp);
4127 	if (lvals)
4128 		kmem_free(lvals, nelems * sizeof (int));
4129 	if (lszs)
4130 		kmem_free(lszs, nelems * sizeof (int));
4131 	if (lnames)
4132 		kmem_free(lnames, nelems * sizeof (char *));
4133 	if (np)
4134 		kmem_free(np, nelems * sizeof (int));
4135 	return (ret);
4136 
4137 errout:
4138 	e_pm_destroy_components(dip);
4139 	*errp = 1;	/* signal failure */
4140 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4141 	for (i = 0; i < nelems - 1; i++)
4142 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4143 	if (nelems != 0)
4144 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4145 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4146 	for (p = phead; p; ) {
4147 		pm_comp_t *pp;
4148 		int n;
4149 
4150 		ptail = p;
4151 		/*
4152 		 * Free component data structures
4153 		 */
4154 		pp = p->comp;
4155 		n = pp->pmc_numlevels;
4156 		if (pp->pmc_name_sz) {
4157 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4158 		}
4159 		if (pp->pmc_lnames_sz) {
4160 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4161 		}
4162 		if (pp->pmc_lnames) {
4163 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4164 		}
4165 		if (pp->pmc_thresh) {
4166 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4167 		}
4168 		if (pp->pmc_lvals) {
4169 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4170 		}
4171 		p = ptail->next;
4172 		kmem_free(ptail, sizeof (*ptail));
4173 	}
4174 	if (ret != NULL)
4175 		kmem_free(ret, components * sizeof (pm_comp_t));
4176 	ret = NULL;
4177 	goto out;
4178 }
4179 
4180 /*
4181  * Set threshold values for a devices components by dividing the target
4182  * threshold (base) by the number of transitions and assign each transition
4183  * that threshold.  This will get the entire device down in the target time if
4184  * all components are idle and even if there are dependencies among components.
4185  *
4186  * Devices may well get powered all the way down before the target time, but
4187  * at least the EPA will be happy.
4188  */
4189 void
4190 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4191 {
4192 	PMD_FUNC(pmf, "set_device_threshold")
4193 	int target_threshold = (base * 95) / 100;
4194 	int level, comp;		/* loop counters */
4195 	int transitions = 0;
4196 	int ncomp = PM_NUMCMPTS(dip);
4197 	int thresh;
4198 	int remainder;
4199 	pm_comp_t *pmc;
4200 	int i, circ;
4201 
4202 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4203 	PM_LOCK_DIP(dip);
4204 	/*
4205 	 * First we handle the easy one.  If we're setting the default
4206 	 * threshold for a node with children, then we set it to the
4207 	 * default nexus threshold (currently 0) and mark it as default
4208 	 * nexus threshold instead
4209 	 */
4210 	if (PM_IS_NEXUS(dip)) {
4211 		if (flag == PMC_DEF_THRESH) {
4212 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4213 			    PM_DEVICE(dip)))
4214 			thresh = pm_default_nexus_threshold;
4215 			for (comp = 0; comp < ncomp; comp++) {
4216 				pmc = &PM_CP(dip, comp)->pmc_comp;
4217 				for (level = 1; level < pmc->pmc_numlevels;
4218 				    level++) {
4219 					pmc->pmc_thresh[level] = thresh;
4220 				}
4221 			}
4222 			DEVI(dip)->devi_pm_dev_thresh =
4223 			    pm_default_nexus_threshold;
4224 			/*
4225 			 * If the nexus node is being reconfigured back to
4226 			 * the default threshold, adjust the notlowest count.
4227 			 */
4228 			if (DEVI(dip)->devi_pm_flags &
4229 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4230 				PM_LOCK_POWER(dip, &circ);
4231 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4232 					if (PM_CURPOWER(dip, i) == 0)
4233 						continue;
4234 					mutex_enter(&pm_compcnt_lock);
4235 					ASSERT(pm_comps_notlowest);
4236 					pm_comps_notlowest--;
4237 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4238 					    "notlowest to %d\n", pmf,
4239 					    PM_DEVICE(dip), pm_comps_notlowest))
4240 					if (pm_comps_notlowest == 0)
4241 						pm_ppm_notify_all_lowest(dip,
4242 						    PM_ALL_LOWEST);
4243 					mutex_exit(&pm_compcnt_lock);
4244 				}
4245 				PM_UNLOCK_POWER(dip, circ);
4246 			}
4247 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4248 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4249 			PM_UNLOCK_DIP(dip);
4250 			return;
4251 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4252 			/*
4253 			 * If the nexus node is being configured for a
4254 			 * non-default threshold, include that node in
4255 			 * the notlowest accounting.
4256 			 */
4257 			PM_LOCK_POWER(dip, &circ);
4258 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4259 				if (PM_CURPOWER(dip, i) == 0)
4260 					continue;
4261 				mutex_enter(&pm_compcnt_lock);
4262 				if (pm_comps_notlowest == 0)
4263 					pm_ppm_notify_all_lowest(dip,
4264 					    PM_NOT_ALL_LOWEST);
4265 				pm_comps_notlowest++;
4266 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4267 				    "notlowest to %d\n", pmf,
4268 				    PM_DEVICE(dip), pm_comps_notlowest))
4269 				mutex_exit(&pm_compcnt_lock);
4270 			}
4271 			PM_UNLOCK_POWER(dip, circ);
4272 		}
4273 	}
4274 	/*
4275 	 * Compute the total number of transitions for all components
4276 	 * of the device.  Distribute the threshold evenly over them
4277 	 */
4278 	for (comp = 0; comp < ncomp; comp++) {
4279 		pmc = &PM_CP(dip, comp)->pmc_comp;
4280 		ASSERT(pmc->pmc_numlevels > 1);
4281 		transitions += pmc->pmc_numlevels - 1;
4282 	}
4283 	ASSERT(transitions);
4284 	thresh = target_threshold / transitions;
4285 
4286 	for (comp = 0; comp < ncomp; comp++) {
4287 		pmc = &PM_CP(dip, comp)->pmc_comp;
4288 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4289 			pmc->pmc_thresh[level] = thresh;
4290 		}
4291 	}
4292 
4293 #ifdef DEBUG
4294 	for (comp = 0; comp < ncomp; comp++) {
4295 		pmc = &PM_CP(dip, comp)->pmc_comp;
4296 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4297 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4298 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4299 			    comp, level, pmc->pmc_thresh[level]))
4300 		}
4301 	}
4302 #endif
4303 	/*
4304 	 * Distribute any remainder till they are all gone
4305 	 */
4306 	remainder = target_threshold - thresh * transitions;
4307 	level = 1;
4308 #ifdef DEBUG
4309 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4310 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4311 	    transitions))
4312 #endif
4313 	while (remainder > 0) {
4314 		comp = 0;
4315 		while (remainder && (comp < ncomp)) {
4316 			pmc = &PM_CP(dip, comp)->pmc_comp;
4317 			if (level < pmc->pmc_numlevels) {
4318 				pmc->pmc_thresh[level] += 1;
4319 				remainder--;
4320 			}
4321 			comp++;
4322 		}
4323 		level++;
4324 	}
4325 #ifdef DEBUG
4326 	for (comp = 0; comp < ncomp; comp++) {
4327 		pmc = &PM_CP(dip, comp)->pmc_comp;
4328 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4329 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4330 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4331 			    comp, level, pmc->pmc_thresh[level]))
4332 		}
4333 	}
4334 #endif
4335 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4336 	DEVI(dip)->devi_pm_dev_thresh = base;
4337 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4338 	DEVI(dip)->devi_pm_flags |= flag;
4339 	PM_UNLOCK_DIP(dip);
4340 }
4341 
4342 /*
4343  * Called when there is no old-style platform power management driver
4344  */
4345 static int
4346 ddi_no_platform_power(power_req_t *req)
4347 {
4348 	_NOTE(ARGUNUSED(req))
4349 	return (DDI_FAILURE);
4350 }
4351 
4352 /*
4353  * This function calls the entry point supplied by the platform-specific
4354  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4355  * The use of global for getting the  function name from platform-specific
4356  * pm driver is not ideal, but it is simple and efficient.
4357  * The previous property lookup was being done in the idle loop on swift
4358  * systems without pmc chips and hurt deskbench performance as well as
4359  * violating scheduler locking rules
4360  */
4361 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4362 
4363 /*
4364  * Old obsolete interface for a device to request a power change (but only
4365  * an increase in power)
4366  */
4367 int
4368 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4369 {
4370 	return (pm_raise_power(dip, cmpt, level));
4371 }
4372 
4373 /*
4374  * The old obsolete interface to platform power management.  Only used by
4375  * Gypsy platform and APM on X86.
4376  */
4377 int
4378 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4379 {
4380 	power_req_t	request;
4381 
4382 	request.request_type = PMR_SET_POWER;
4383 	request.req.set_power_req.who = dip;
4384 	request.req.set_power_req.cmpt = pm_cmpt;
4385 	request.req.set_power_req.level = pm_level;
4386 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4387 }
4388 
4389 /*
4390  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4391  * passed.  Returns true if subsequent processing could result in power being
4392  * removed from the device.  The arg is not currently used because it is
4393  * implicit in the operation of cpr/DR.
4394  */
4395 int
4396 ddi_removing_power(dev_info_t *dip)
4397 {
4398 	_NOTE(ARGUNUSED(dip))
4399 	return (pm_powering_down);
4400 }
4401 
4402 /*
4403  * Returns true if a device indicates that its parent handles suspend/resume
4404  * processing for it.
4405  */
4406 int
4407 e_ddi_parental_suspend_resume(dev_info_t *dip)
4408 {
4409 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4410 }
4411 
4412 /*
4413  * Called for devices which indicate that their parent does suspend/resume
4414  * handling for them
4415  */
4416 int
4417 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4418 {
4419 	power_req_t	request;
4420 	request.request_type = PMR_SUSPEND;
4421 	request.req.suspend_req.who = dip;
4422 	request.req.suspend_req.cmd = cmd;
4423 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4424 }
4425 
4426 /*
4427  * Called for devices which indicate that their parent does suspend/resume
4428  * handling for them
4429  */
4430 int
4431 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4432 {
4433 	power_req_t	request;
4434 	request.request_type = PMR_RESUME;
4435 	request.req.resume_req.who = dip;
4436 	request.req.resume_req.cmd = cmd;
4437 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4438 }
4439 
4440 /*
4441  * Old obsolete exported interface for drivers to create components.
4442  * This is now handled by exporting the pm-components property.
4443  */
4444 int
4445 pm_create_components(dev_info_t *dip, int num_components)
4446 {
4447 	PMD_FUNC(pmf, "pm_create_components")
4448 
4449 	if (num_components < 1)
4450 		return (DDI_FAILURE);
4451 
4452 	if (!DEVI_IS_ATTACHING(dip)) {
4453 		return (DDI_FAILURE);
4454 	}
4455 
4456 	/* don't need to lock dip because attach is single threaded */
4457 	if (DEVI(dip)->devi_pm_components) {
4458 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4459 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4460 		return (DDI_FAILURE);
4461 	}
4462 	e_pm_create_components(dip, num_components);
4463 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4464 	e_pm_default_components(dip, num_components);
4465 	return (DDI_SUCCESS);
4466 }
4467 
4468 /*
4469  * Obsolete interface previously called by drivers to destroy their components
4470  * at detach time.  This is now done automatically.  However, we need to keep
4471  * this for the old drivers.
4472  */
4473 void
4474 pm_destroy_components(dev_info_t *dip)
4475 {
4476 	PMD_FUNC(pmf, "pm_destroy_components")
4477 	dev_info_t *pdip = ddi_get_parent(dip);
4478 
4479 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4480 	    PM_DEVICE(dip)))
4481 	ASSERT(DEVI_IS_DETACHING(dip));
4482 #ifdef DEBUG
4483 	if (!PM_ISBC(dip))
4484 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4485 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4486 		    PM_ADDR(dip));
4487 #endif
4488 	/*
4489 	 * We ignore this unless this is an old-style driver, except for
4490 	 * printing the message above
4491 	 */
4492 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4493 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4494 		    PM_DEVICE(dip)))
4495 		return;
4496 	}
4497 	ASSERT(PM_GET_PM_INFO(dip));
4498 
4499 	/*
4500 	 * pm_unmanage will clear info pointer later, after dealing with
4501 	 * dependencies
4502 	 */
4503 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4504 	/*
4505 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4506 	 * Parents that get notification are not adjusted because their
4507 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4508 	 */
4509 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4510 		pm_rele_power(pdip);
4511 #ifdef DEBUG
4512 	else {
4513 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4514 		    pmf, PM_DEVICE(dip)))
4515 	}
4516 #endif
4517 	e_pm_destroy_components(dip);
4518 	/*
4519 	 * Forget we ever knew anything about the components of this  device
4520 	 */
4521 	DEVI(dip)->devi_pm_flags &=
4522 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4523 }
4524 
4525 /*
4526  * Exported interface for a driver to set a component busy.
4527  */
4528 int
4529 pm_busy_component(dev_info_t *dip, int cmpt)
4530 {
4531 	struct pm_component *cp;
4532 
4533 	ASSERT(dip != NULL);
4534 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4535 		return (DDI_FAILURE);
4536 	PM_LOCK_BUSY(dip);
4537 	cp->pmc_busycount++;
4538 	cp->pmc_timestamp = 0;
4539 	PM_UNLOCK_BUSY(dip);
4540 	return (DDI_SUCCESS);
4541 }
4542 
4543 /*
4544  * Exported interface for a driver to set a component idle.
4545  */
4546 int
4547 pm_idle_component(dev_info_t *dip, int cmpt)
4548 {
4549 	PMD_FUNC(pmf, "pm_idle_component")
4550 	struct pm_component *cp;
4551 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4552 
4553 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4554 		return (DDI_FAILURE);
4555 
4556 	PM_LOCK_BUSY(dip);
4557 	if (cp->pmc_busycount) {
4558 		if (--(cp->pmc_busycount) == 0)
4559 			cp->pmc_timestamp = gethrestime_sec();
4560 	} else {
4561 		cp->pmc_timestamp = gethrestime_sec();
4562 	}
4563 
4564 	PM_UNLOCK_BUSY(dip);
4565 
4566 	/*
4567 	 * if device becomes idle during idle down period, try scan it down
4568 	 */
4569 	if (scanp && PM_IS_PID(dip)) {
4570 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4571 		    PM_DEVICE(dip)))
4572 		pm_rescan(dip);
4573 		return (DDI_SUCCESS);
4574 	}
4575 
4576 	/*
4577 	 * handle scan not running with nexus threshold == 0
4578 	 */
4579 
4580 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4581 		pm_rescan(dip);
4582 	}
4583 
4584 	return (DDI_SUCCESS);
4585 }
4586 
4587 /*
4588  * This is the old  obsolete interface called by drivers to set their normal
4589  * power.  Thus we can't fix its behavior or return a value.
4590  * This functionality is replaced by the pm-component property.
4591  * We'll only get components destroyed while no power management is
4592  * going on (and the device is detached), so we don't need a mutex here
4593  */
4594 void
4595 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4596 {
4597 	PMD_FUNC(pmf, "set_normal_power")
4598 #ifdef DEBUG
4599 	if (!PM_ISBC(dip))
4600 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4601 		    "(driver exporting pm-components property) ignored",
4602 		    PM_NAME(dip), PM_ADDR(dip));
4603 #endif
4604 	if (PM_ISBC(dip)) {
4605 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4606 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4607 		e_pm_set_max_power(dip, comp, level);
4608 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4609 	}
4610 }
4611 
4612 /*
4613  * Called on a successfully detached driver to free pm resources
4614  */
4615 static void
4616 pm_stop(dev_info_t *dip)
4617 {
4618 	PMD_FUNC(pmf, "stop")
4619 	dev_info_t *pdip = ddi_get_parent(dip);
4620 
4621 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4622 	/* stopping scan, destroy scan data structure */
4623 	if (!PM_ISBC(dip)) {
4624 		pm_scan_stop(dip);
4625 		pm_scan_fini(dip);
4626 	}
4627 
4628 	if (PM_GET_PM_INFO(dip) != NULL) {
4629 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4630 			/*
4631 			 * Old style driver may have called
4632 			 * pm_destroy_components already, but just in case ...
4633 			 */
4634 			e_pm_destroy_components(dip);
4635 		} else {
4636 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4637 			    pmf, PM_DEVICE(dip)))
4638 		}
4639 	} else {
4640 		if (PM_NUMCMPTS(dip))
4641 			e_pm_destroy_components(dip);
4642 		else {
4643 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4644 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4645 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4646 					pm_rele_power(pdip);
4647 				} else if (pdip &&
4648 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4649 					(void) mdi_power(pdip,
4650 					    MDI_PM_RELE_POWER,
4651 					    (void *)dip, NULL, 0);
4652 				}
4653 			}
4654 		}
4655 	}
4656 }
4657 
4658 /*
4659  * The node is the subject of a reparse pm props ioctl. Throw away the old
4660  * info and start over.
4661  */
4662 int
4663 e_new_pm_props(dev_info_t *dip)
4664 {
4665 	if (PM_GET_PM_INFO(dip) != NULL) {
4666 		pm_stop(dip);
4667 
4668 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4669 			return (DDI_FAILURE);
4670 		}
4671 	}
4672 	e_pm_props(dip);
4673 	return (DDI_SUCCESS);
4674 }
4675 
4676 /*
4677  * Device has been attached, so process its pm properties
4678  */
4679 void
4680 e_pm_props(dev_info_t *dip)
4681 {
4682 	char *pp;
4683 	int len;
4684 	int flags = 0;
4685 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4686 
4687 	/*
4688 	 * It doesn't matter if we do this more than once, we should always
4689 	 * get the same answers, and if not, then the last one in is the
4690 	 * best one.
4691 	 */
4692 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4693 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4694 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4695 			flags = PMC_NEEDS_SR;
4696 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4697 			flags = PMC_NO_SR;
4698 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4699 			flags = PMC_PARENTAL_SR;
4700 		} else {
4701 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4702 			    "%s property value '%s'", PM_NAME(dip),
4703 			    PM_ADDR(dip), "pm-hardware-state", pp);
4704 		}
4705 		kmem_free(pp, len);
4706 	}
4707 	/*
4708 	 * This next segment (PMC_WANTS_NOTIFY) is in
4709 	 * support of nexus drivers which will want to be involved in
4710 	 * (or at least notified of) their child node's power level transitions.
4711 	 * "pm-want-child-notification?" is defined by the parent.
4712 	 */
4713 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4714 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4715 		flags |= PMC_WANTS_NOTIFY;
4716 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4717 	    dip, propflag, "pm-want-child-notification?"));
4718 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4719 	    "no-involuntary-power-cycles"))
4720 		flags |= PMC_NO_INVOL;
4721 	/*
4722 	 * Is the device a CPU device?
4723 	 */
4724 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4725 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4726 		if (strcmp(pp, "CPU") == 0) {
4727 			flags |= PMC_CPU_DEVICE;
4728 		} else {
4729 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4730 			    "%s property value '%s'", PM_NAME(dip),
4731 			    PM_ADDR(dip), "pm-class", pp);
4732 		}
4733 		kmem_free(pp, len);
4734 	}
4735 	/* devfs single threads us */
4736 	DEVI(dip)->devi_pm_flags |= flags;
4737 }
4738 
4739 /*
4740  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4741  * driver which has claimed a node.
4742  * Sets old_power in arg struct.
4743  */
4744 static int
4745 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4746     ddi_ctl_enum_t ctlop, void *arg, void *result)
4747 {
4748 	_NOTE(ARGUNUSED(dip))
4749 	PMD_FUNC(pmf, "ctlops")
4750 	power_req_t *reqp = (power_req_t *)arg;
4751 	int retval;
4752 	dev_info_t *target_dip;
4753 	int new_level, old_level, cmpt;
4754 #ifdef PMDDEBUG
4755 	char *format;
4756 #endif
4757 
4758 	/*
4759 	 * The interface for doing the actual power level changes is now
4760 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4761 	 * different platform-specific power control drivers.
4762 	 *
4763 	 * This driver implements the "default" version of this interface.
4764 	 * If no ppm driver has been installed then this interface is called
4765 	 * instead.
4766 	 */
4767 	ASSERT(dip == NULL);
4768 	switch (ctlop) {
4769 	case DDI_CTLOPS_POWER:
4770 		switch (reqp->request_type) {
4771 		case PMR_PPM_SET_POWER:
4772 		{
4773 			target_dip = reqp->req.ppm_set_power_req.who;
4774 			ASSERT(target_dip == rdip);
4775 			new_level = reqp->req.ppm_set_power_req.new_level;
4776 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4777 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4778 			old_level = PM_CURPOWER(target_dip, cmpt);
4779 			reqp->req.ppm_set_power_req.old_level = old_level;
4780 			retval = pm_power(target_dip, cmpt, new_level);
4781 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4782 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4783 			    old_level, new_level, (retval == DDI_SUCCESS ?
4784 			    "chd" : "no chg")))
4785 			return (retval);
4786 		}
4787 
4788 		case PMR_PPM_PRE_DETACH:
4789 		case PMR_PPM_POST_DETACH:
4790 		case PMR_PPM_PRE_ATTACH:
4791 		case PMR_PPM_POST_ATTACH:
4792 		case PMR_PPM_PRE_PROBE:
4793 		case PMR_PPM_POST_PROBE:
4794 		case PMR_PPM_PRE_RESUME:
4795 		case PMR_PPM_INIT_CHILD:
4796 		case PMR_PPM_UNINIT_CHILD:
4797 #ifdef PMDDEBUG
4798 			switch (reqp->request_type) {
4799 				case PMR_PPM_PRE_DETACH:
4800 					format = "%s: PMR_PPM_PRE_DETACH "
4801 					    "%s@%s(%s#%d)\n";
4802 					break;
4803 				case PMR_PPM_POST_DETACH:
4804 					format = "%s: PMR_PPM_POST_DETACH "
4805 					    "%s@%s(%s#%d) rets %d\n";
4806 					break;
4807 				case PMR_PPM_PRE_ATTACH:
4808 					format = "%s: PMR_PPM_PRE_ATTACH "
4809 					    "%s@%s(%s#%d)\n";
4810 					break;
4811 				case PMR_PPM_POST_ATTACH:
4812 					format = "%s: PMR_PPM_POST_ATTACH "
4813 					    "%s@%s(%s#%d) rets %d\n";
4814 					break;
4815 				case PMR_PPM_PRE_PROBE:
4816 					format = "%s: PMR_PPM_PRE_PROBE "
4817 					    "%s@%s(%s#%d)\n";
4818 					break;
4819 				case PMR_PPM_POST_PROBE:
4820 					format = "%s: PMR_PPM_POST_PROBE "
4821 					    "%s@%s(%s#%d) rets %d\n";
4822 					break;
4823 				case PMR_PPM_PRE_RESUME:
4824 					format = "%s: PMR_PPM_PRE_RESUME "
4825 					    "%s@%s(%s#%d) rets %d\n";
4826 					break;
4827 				case PMR_PPM_INIT_CHILD:
4828 					format = "%s: PMR_PPM_INIT_CHILD "
4829 					    "%s@%s(%s#%d)\n";
4830 					break;
4831 				case PMR_PPM_UNINIT_CHILD:
4832 					format = "%s: PMR_PPM_UNINIT_CHILD "
4833 					    "%s@%s(%s#%d)\n";
4834 					break;
4835 				default:
4836 					break;
4837 			}
4838 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4839 			    reqp->req.ppm_config_req.result))
4840 #endif
4841 			return (DDI_SUCCESS);
4842 
4843 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4844 			/*
4845 			 * Nothing for us to do
4846 			 */
4847 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4848 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4849 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4850 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4851 			    reqp->req.ppm_notify_level_req.cmpt,
4852 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4853 			    reqp->req.ppm_notify_level_req.cmpt),
4854 			    reqp->req.ppm_notify_level_req.new_level))
4855 			return (DDI_SUCCESS);
4856 
4857 		case PMR_PPM_UNMANAGE:
4858 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4859 			    pmf, PM_DEVICE(rdip)))
4860 			return (DDI_SUCCESS);
4861 
4862 		case PMR_PPM_LOCK_POWER:
4863 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4864 			    reqp->req.ppm_lock_power_req.circp);
4865 			return (DDI_SUCCESS);
4866 
4867 		case PMR_PPM_UNLOCK_POWER:
4868 			pm_unlock_power_single(
4869 			    reqp->req.ppm_unlock_power_req.who,
4870 			    reqp->req.ppm_unlock_power_req.circ);
4871 			return (DDI_SUCCESS);
4872 
4873 		case PMR_PPM_TRY_LOCK_POWER:
4874 			*(int *)result = pm_try_locking_power_single(
4875 			    reqp->req.ppm_lock_power_req.who,
4876 			    reqp->req.ppm_lock_power_req.circp);
4877 			return (DDI_SUCCESS);
4878 
4879 		case PMR_PPM_POWER_LOCK_OWNER:
4880 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4881 			ASSERT(target_dip == rdip);
4882 			reqp->req.ppm_power_lock_owner_req.owner =
4883 			    DEVI(rdip)->devi_busy_thread;
4884 			return (DDI_SUCCESS);
4885 		default:
4886 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4887 			return (DDI_FAILURE);
4888 		}
4889 
4890 	default:
4891 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4892 		return (DDI_FAILURE);
4893 	}
4894 }
4895 
4896 /*
4897  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4898  * power_ops struct for this functionality instead?
4899  * However, we only ever do this on a ppm driver.
4900  */
4901 int
4902 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4903 {
4904 	int (*fp)();
4905 
4906 	/* if no ppm handler, call the default routine */
4907 	if (d == NULL) {
4908 		return (pm_default_ctlops(d, r, op, a, v));
4909 	}
4910 	if (!d || !r)
4911 		return (DDI_FAILURE);
4912 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4913 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4914 
4915 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4916 	return ((*fp)(d, r, op, a, v));
4917 }
4918 
4919 /*
4920  * Called on a node when attach completes or the driver makes its first pm
4921  * call (whichever comes first).
4922  * In the attach case, device may not be power manageable at all.
4923  * Don't need to lock the dip because we're single threaded by the devfs code
4924  */
4925 static int
4926 pm_start(dev_info_t *dip)
4927 {
4928 	PMD_FUNC(pmf, "start")
4929 	int ret;
4930 	dev_info_t *pdip = ddi_get_parent(dip);
4931 	int e_pm_manage(dev_info_t *, int);
4932 	void pm_noinvol_specd(dev_info_t *dip);
4933 
4934 	e_pm_props(dip);
4935 	pm_noinvol_specd(dip);
4936 	/*
4937 	 * If this dip has already been processed, don't mess with it
4938 	 * (but decrement the speculative count we did above, as whatever
4939 	 * code put it under pm already will have dealt with it)
4940 	 */
4941 	if (PM_GET_PM_INFO(dip)) {
4942 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
4943 		    pmf, PM_DEVICE(dip)))
4944 		return (0);
4945 	}
4946 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
4947 
4948 	if (PM_GET_PM_INFO(dip) == NULL) {
4949 		/*
4950 		 * keep the kidsupcount increment as is
4951 		 */
4952 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
4953 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4954 			pm_hold_power(pdip);
4955 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4956 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
4957 			    (void *)dip, NULL, 0);
4958 		}
4959 
4960 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
4961 		    "left up\n", pmf, PM_DEVICE(dip)))
4962 	}
4963 
4964 	return (ret);
4965 }
4966 
4967 /*
4968  * Keep a list of recorded thresholds.  For now we just keep a list and
4969  * search it linearly.  We don't expect too many entries.  Can always hash it
4970  * later if we need to.
4971  */
4972 void
4973 pm_record_thresh(pm_thresh_rec_t *rp)
4974 {
4975 	pm_thresh_rec_t *pptr, *ptr;
4976 
4977 	ASSERT(*rp->ptr_physpath);
4978 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
4979 	for (pptr = NULL, ptr = pm_thresh_head;
4980 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
4981 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
4982 			/* replace this one */
4983 			rp->ptr_next = ptr->ptr_next;
4984 			if (pptr) {
4985 				pptr->ptr_next = rp;
4986 			} else {
4987 				pm_thresh_head = rp;
4988 			}
4989 			rw_exit(&pm_thresh_rwlock);
4990 			kmem_free(ptr, ptr->ptr_size);
4991 			return;
4992 		}
4993 		continue;
4994 	}
4995 	/*
4996 	 * There was not a match in the list, insert this one in front
4997 	 */
4998 	if (pm_thresh_head) {
4999 		rp->ptr_next = pm_thresh_head;
5000 		pm_thresh_head = rp;
5001 	} else {
5002 		rp->ptr_next = NULL;
5003 		pm_thresh_head = rp;
5004 	}
5005 	rw_exit(&pm_thresh_rwlock);
5006 }
5007 
5008 /*
5009  * Create a new dependency record and hang a new dependency entry off of it
5010  */
5011 pm_pdr_t *
5012 newpdr(char *kept, char *keeps, int isprop)
5013 {
5014 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5015 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5016 	p->pdr_size = size;
5017 	p->pdr_isprop = isprop;
5018 	p->pdr_kept_paths = NULL;
5019 	p->pdr_kept_count = 0;
5020 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5021 	(void) strcpy(p->pdr_kept, kept);
5022 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5023 	(void) strcpy(p->pdr_keeper, keeps);
5024 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5025 	    (intptr_t)p + size);
5026 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5027 	    (intptr_t)p + size);
5028 	return (p);
5029 }
5030 
5031 /*
5032  * Keep a list of recorded dependencies.  We only keep the
5033  * keeper -> kept list for simplification. At this point We do not
5034  * care about whether the devices are attached or not yet,
5035  * this would be done in pm_keeper() and pm_kept().
5036  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5037  * and it is up to the user to issue the ioctl again if they want it
5038  * (e.g. pmconfig)
5039  * Returns true if dependency already exists in the list.
5040  */
5041 int
5042 pm_record_keeper(char *kept, char *keeper, int isprop)
5043 {
5044 	PMD_FUNC(pmf, "record_keeper")
5045 	pm_pdr_t *npdr, *ppdr, *pdr;
5046 
5047 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5048 	ASSERT(kept && keeper);
5049 #ifdef DEBUG
5050 	if (pm_debug & PMD_KEEPS)
5051 		prdeps("pm_record_keeper entry");
5052 #endif
5053 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5054 	    ppdr = pdr, pdr = pdr->pdr_next) {
5055 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5056 		    pdr->pdr_keeper))
5057 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5058 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5059 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5060 			return (1);
5061 		}
5062 	}
5063 	/*
5064 	 * We did not find any match, so we have to make an entry
5065 	 */
5066 	npdr = newpdr(kept, keeper, isprop);
5067 	if (ppdr) {
5068 		ASSERT(ppdr->pdr_next == NULL);
5069 		ppdr->pdr_next = npdr;
5070 	} else {
5071 		ASSERT(pm_dep_head == NULL);
5072 		pm_dep_head = npdr;
5073 	}
5074 #ifdef DEBUG
5075 	if (pm_debug & PMD_KEEPS)
5076 		prdeps("pm_record_keeper after new record");
5077 #endif
5078 	if (!isprop)
5079 		pm_unresolved_deps++;
5080 	else
5081 		pm_prop_deps++;
5082 	return (0);
5083 }
5084 
5085 /*
5086  * Look up this device in the set of devices we've seen ioctls for
5087  * to see if we are holding a threshold spec for it.  If so, make it so.
5088  * At ioctl time, we were given the physical path of the device.
5089  */
5090 int
5091 pm_thresh_specd(dev_info_t *dip)
5092 {
5093 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5094 	char *path = 0;
5095 	char pathbuf[MAXNAMELEN];
5096 	pm_thresh_rec_t *rp;
5097 
5098 	path = ddi_pathname(dip, pathbuf);
5099 
5100 	rw_enter(&pm_thresh_rwlock, RW_READER);
5101 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5102 		if (strcmp(rp->ptr_physpath, path) != 0)
5103 			continue;
5104 		pm_apply_recorded_thresh(dip, rp);
5105 		rw_exit(&pm_thresh_rwlock);
5106 		return (1);
5107 	}
5108 	rw_exit(&pm_thresh_rwlock);
5109 	return (0);
5110 }
5111 
5112 static int
5113 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5114 {
5115 	PMD_FUNC(pmf, "set_keeping")
5116 	pm_info_t *kept_info;
5117 	int j, up = 0, circ;
5118 	void prdeps(char *);
5119 
5120 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5121 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5122 #ifdef DEBUG
5123 	if (pm_debug & PMD_KEEPS)
5124 		prdeps("Before PAD\n");
5125 #endif
5126 	ASSERT(keeper != kept);
5127 	if (PM_GET_PM_INFO(keeper) == NULL) {
5128 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5129 		    "%s@%s(%s#%d), but the latter is not power managed",
5130 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5131 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5132 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5133 		return (0);
5134 	}
5135 	kept_info = PM_GET_PM_INFO(kept);
5136 	ASSERT(kept_info);
5137 	PM_LOCK_POWER(keeper, &circ);
5138 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5139 		if (PM_CURPOWER(keeper, j)) {
5140 			up++;
5141 			break;
5142 		}
5143 	}
5144 	if (up) {
5145 		/* Bringup and maintain a hold on the kept */
5146 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5147 		    PM_DEVICE(kept)))
5148 		bring_pmdep_up(kept, 1);
5149 	}
5150 	PM_UNLOCK_POWER(keeper, circ);
5151 #ifdef DEBUG
5152 	if (pm_debug & PMD_KEEPS)
5153 		prdeps("After PAD\n");
5154 #endif
5155 	return (1);
5156 }
5157 
5158 /*
5159  * Should this device keep up another device?
5160  * Look up this device in the set of devices we've seen ioctls for
5161  * to see if we are holding a dependency spec for it.  If so, make it so.
5162  * Because we require the kept device to be attached already in order to
5163  * make the list entry (and hold it), we only need to look for keepers.
5164  * At ioctl time, we were given the physical path of the device.
5165  */
5166 int
5167 pm_keeper(char *keeper)
5168 {
5169 	PMD_FUNC(pmf, "keeper")
5170 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5171 	dev_info_t *dip;
5172 	pm_pdr_t *dp;
5173 	dev_info_t *kept = NULL;
5174 	int ret = 0;
5175 	int i;
5176 
5177 	if (!pm_unresolved_deps && !pm_prop_deps)
5178 		return (0);
5179 	ASSERT(keeper != NULL);
5180 	dip = pm_name_to_dip(keeper, 1);
5181 	if (dip == NULL)
5182 		return (0);
5183 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5184 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5185 		if (!dp->pdr_isprop) {
5186 			if (!pm_unresolved_deps)
5187 				continue;
5188 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5189 			if (dp->pdr_satisfied) {
5190 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5191 				continue;
5192 			}
5193 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5194 				ret += pm_apply_recorded_dep(dip, dp);
5195 			}
5196 		} else {
5197 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5198 				continue;
5199 			for (i = 0; i < dp->pdr_kept_count; i++) {
5200 				if (dp->pdr_kept_paths[i] == NULL)
5201 					continue;
5202 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5203 				if (kept == NULL)
5204 					continue;
5205 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5206 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5207 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5208 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5209 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5210 				    dp->pdr_kept_count))
5211 				if (kept != dip) {
5212 					ret += pm_set_keeping(dip, kept);
5213 				}
5214 				ddi_release_devi(kept);
5215 			}
5216 
5217 		}
5218 	}
5219 	ddi_release_devi(dip);
5220 	return (ret);
5221 }
5222 
5223 /*
5224  * Should this device be kept up by another device?
5225  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5226  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5227  * kept device lists.
5228  */
5229 static int
5230 pm_kept(char *keptp)
5231 {
5232 	PMD_FUNC(pmf, "kept")
5233 	pm_pdr_t *dp;
5234 	int found = 0;
5235 	int ret = 0;
5236 	dev_info_t *keeper;
5237 	dev_info_t *kept;
5238 	size_t length;
5239 	int i;
5240 	char **paths;
5241 	char *path;
5242 
5243 	ASSERT(keptp != NULL);
5244 	kept = pm_name_to_dip(keptp, 1);
5245 	if (kept == NULL)
5246 		return (0);
5247 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5248 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5249 		if (dp->pdr_isprop) {
5250 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5251 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5252 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5253 				/*
5254 				 * Dont allow self dependency.
5255 				 */
5256 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5257 					continue;
5258 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5259 				if (keeper == NULL)
5260 					continue;
5261 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5262 				    "%p\n", pmf, (void *)kept))
5263 #ifdef DEBUG
5264 				if (pm_debug & PMD_DEP)
5265 					prdeps("Before Adding from pm_kept\n");
5266 #endif
5267 				/*
5268 				 * Add ourselves to the dip list.
5269 				 */
5270 				if (dp->pdr_kept_count == 0) {
5271 					length = strlen(keptp) + 1;
5272 					path =
5273 					    kmem_alloc(length, KM_SLEEP);
5274 					paths = kmem_alloc(sizeof (char **),
5275 					    KM_SLEEP);
5276 					(void) strcpy(path, keptp);
5277 					paths[0] = path;
5278 					dp->pdr_kept_paths = paths;
5279 					dp->pdr_kept_count++;
5280 				} else {
5281 					/* Check to see if already on list */
5282 					for (i = 0; i < dp->pdr_kept_count;
5283 					    i++) {
5284 						if (strcmp(keptp,
5285 						    dp->pdr_kept_paths[i])
5286 						    == 0) {
5287 							found++;
5288 							break;
5289 						}
5290 					}
5291 					if (found) {
5292 						ddi_release_devi(keeper);
5293 						continue;
5294 					}
5295 					length = dp->pdr_kept_count *
5296 					    sizeof (char **);
5297 					paths = kmem_alloc(
5298 					    length + sizeof (char **),
5299 					    KM_SLEEP);
5300 					if (dp->pdr_kept_count) {
5301 						bcopy(dp->pdr_kept_paths,
5302 						    paths, length);
5303 						kmem_free(dp->pdr_kept_paths,
5304 						    length);
5305 					}
5306 					dp->pdr_kept_paths = paths;
5307 					length = strlen(keptp) + 1;
5308 					path =
5309 					    kmem_alloc(length, KM_SLEEP);
5310 					(void) strcpy(path, keptp);
5311 					dp->pdr_kept_paths[i] = path;
5312 					dp->pdr_kept_count++;
5313 				}
5314 #ifdef DEBUG
5315 				if (pm_debug & PMD_DEP)
5316 					prdeps("After from pm_kept\n");
5317 #endif
5318 				if (keeper) {
5319 					ret += pm_set_keeping(keeper, kept);
5320 					ddi_release_devi(keeper);
5321 				}
5322 			}
5323 		} else {
5324 			/*
5325 			 * pm_keeper would be called later to do
5326 			 * the actual pm_set_keeping.
5327 			 */
5328 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5329 			    pmf, (void *)kept))
5330 #ifdef DEBUG
5331 			if (pm_debug & PMD_DEP)
5332 				prdeps("Before Adding from pm_kept\n");
5333 #endif
5334 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5335 				if (dp->pdr_kept_paths == NULL) {
5336 					length = strlen(keptp) + 1;
5337 					path =
5338 					    kmem_alloc(length, KM_SLEEP);
5339 					paths = kmem_alloc(sizeof (char **),
5340 					    KM_SLEEP);
5341 					(void) strcpy(path, keptp);
5342 					paths[0] = path;
5343 					dp->pdr_kept_paths = paths;
5344 					dp->pdr_kept_count++;
5345 				}
5346 			}
5347 #ifdef DEBUG
5348 			if (pm_debug & PMD_DEP)
5349 				prdeps("After from pm_kept\n");
5350 #endif
5351 		}
5352 	}
5353 	ddi_release_devi(kept);
5354 	return (ret);
5355 }
5356 
5357 /*
5358  * Apply a recorded dependency.  dp specifies the dependency, and
5359  * keeper is already known to be the device that keeps up the other (kept) one.
5360  * We have to the whole tree for the "kept" device, then apply
5361  * the dependency (which may already be applied).
5362  */
5363 int
5364 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5365 {
5366 	PMD_FUNC(pmf, "apply_recorded_dep")
5367 	dev_info_t *kept = NULL;
5368 	int ret = 0;
5369 	char *keptp = NULL;
5370 
5371 	/*
5372 	 * Device to Device dependency can only be 1 to 1.
5373 	 */
5374 	if (dp->pdr_kept_paths == NULL)
5375 		return (0);
5376 	keptp = dp->pdr_kept_paths[0];
5377 	if (keptp == NULL)
5378 		return (0);
5379 	ASSERT(*keptp != '\0');
5380 	kept = pm_name_to_dip(keptp, 1);
5381 	if (kept == NULL)
5382 		return (0);
5383 	if (kept) {
5384 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5385 		    dp->pdr_keeper, keptp))
5386 		if (pm_set_keeping(keeper, kept)) {
5387 			ASSERT(dp->pdr_satisfied == 0);
5388 			dp->pdr_satisfied = 1;
5389 			ASSERT(pm_unresolved_deps);
5390 			pm_unresolved_deps--;
5391 			ret++;
5392 		}
5393 	}
5394 	ddi_release_devi(kept);
5395 
5396 	return (ret);
5397 }
5398 
5399 /*
5400  * Called from common/io/pm.c
5401  */
5402 int
5403 pm_cur_power(pm_component_t *cp)
5404 {
5405 	return (cur_power(cp));
5406 }
5407 
5408 /*
5409  * External interface to sanity-check a power level.
5410  */
5411 int
5412 pm_valid_power(dev_info_t *dip, int comp, int level)
5413 {
5414 	PMD_FUNC(pmf, "valid_power")
5415 
5416 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5417 		return (e_pm_valid_power(dip, comp, level));
5418 	else {
5419 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5420 		    pmf, comp, PM_NUMCMPTS(dip), level))
5421 		return (0);
5422 	}
5423 }
5424 
5425 /*
5426  * Called when a device that is direct power managed needs to change state.
5427  * This routine arranges to block the request until the process managing
5428  * the device makes the change (or some other incompatible change) or
5429  * the process closes /dev/pm.
5430  */
5431 static int
5432 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5433 {
5434 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5435 	int ret = 0;
5436 	void pm_dequeue_blocked(pm_rsvp_t *);
5437 	void pm_enqueue_blocked(pm_rsvp_t *);
5438 
5439 	ASSERT(!pm_processes_stopped);
5440 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5441 	new->pr_dip = dip;
5442 	new->pr_comp = comp;
5443 	new->pr_newlevel = newpower;
5444 	new->pr_oldlevel = oldpower;
5445 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5446 	mutex_enter(&pm_rsvp_lock);
5447 	pm_enqueue_blocked(new);
5448 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5449 	    PM_CANBLOCK_BLOCK);
5450 	PM_UNLOCK_DIP(dip);
5451 	/*
5452 	 * truss may make the cv_wait_sig return prematurely
5453 	 */
5454 	while (ret == 0) {
5455 		/*
5456 		 * Normally there will be no user context involved, but if
5457 		 * there is (e.g. we are here via an ioctl call to a driver)
5458 		 * then we should allow the process to abort the request,
5459 		 * or we get an unkillable process if the same thread does
5460 		 * PM_DIRECT_PM and pm_raise_power
5461 		 */
5462 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5463 			ret = PMP_FAIL;
5464 		} else {
5465 			ret = new->pr_retval;
5466 		}
5467 	}
5468 	pm_dequeue_blocked(new);
5469 	mutex_exit(&pm_rsvp_lock);
5470 	cv_destroy(&new->pr_cv);
5471 	kmem_free(new, sizeof (*new));
5472 	return (ret);
5473 }
5474 
5475 /*
5476  * Returns true if the process is interested in power level changes (has issued
5477  * PM_GET_STATE_CHANGE ioctl).
5478  */
5479 int
5480 pm_interest_registered(int clone)
5481 {
5482 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5483 	return (pm_interest[clone]);
5484 }
5485 
5486 /*
5487  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5488  * watch all state transitions (dip == NULL).  Set up data
5489  * structs to communicate with process about state changes.
5490  */
5491 void
5492 pm_register_watcher(int clone, dev_info_t *dip)
5493 {
5494 	pscc_t	*p;
5495 	psce_t	*psce;
5496 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5497 
5498 	/*
5499 	 * We definitely need a control struct, then we have to search to see
5500 	 * there is already an entries struct (in the dip != NULL case).
5501 	 */
5502 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5503 	pscc->pscc_clone = clone;
5504 	pscc->pscc_dip = dip;
5505 
5506 	if (dip) {
5507 		int found = 0;
5508 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5509 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5510 			/*
5511 			 * Already an entry for this clone, so just use it
5512 			 * for the new one (for the case where a single
5513 			 * process is watching multiple devices)
5514 			 */
5515 			if (p->pscc_clone == clone) {
5516 				ASSERT(p->pscc_dip != dip);
5517 				pscc->pscc_entries = p->pscc_entries;
5518 				pscc->pscc_entries->psce_references++;
5519 				found++;
5520 			}
5521 		}
5522 		if (!found) {		/* create a new one */
5523 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5524 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5525 			psce->psce_first =
5526 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5527 			    KM_SLEEP);
5528 			psce->psce_in = psce->psce_out = psce->psce_first;
5529 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5530 			psce->psce_references = 1;
5531 			pscc->pscc_entries = psce;
5532 		}
5533 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5534 		rw_exit(&pm_pscc_direct_rwlock);
5535 	} else {
5536 		ASSERT(!pm_interest_registered(clone));
5537 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5538 #ifdef DEBUG
5539 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5540 			/*
5541 			 * Should not be an entry for this clone!
5542 			 */
5543 			ASSERT(p->pscc_clone != clone);
5544 		}
5545 #endif
5546 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5547 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5548 		    PSCCOUNT, KM_SLEEP);
5549 		psce->psce_in = psce->psce_out = psce->psce_first;
5550 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5551 		psce->psce_references = 1;
5552 		pscc->pscc_entries = psce;
5553 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5554 		pm_interest[clone] = 1;
5555 		rw_exit(&pm_pscc_interest_rwlock);
5556 	}
5557 }
5558 
5559 /*
5560  * Remove the given entry from the blocked list
5561  */
5562 void
5563 pm_dequeue_blocked(pm_rsvp_t *p)
5564 {
5565 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5566 	if (pm_blocked_list == p) {
5567 		ASSERT(p->pr_prev == NULL);
5568 		if (p->pr_next != NULL)
5569 			p->pr_next->pr_prev = NULL;
5570 		pm_blocked_list = p->pr_next;
5571 	} else {
5572 		ASSERT(p->pr_prev != NULL);
5573 		p->pr_prev->pr_next = p->pr_next;
5574 		if (p->pr_next != NULL)
5575 			p->pr_next->pr_prev = p->pr_prev;
5576 	}
5577 }
5578 
5579 /*
5580  * Remove the given control struct from the given list
5581  */
5582 static void
5583 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5584 {
5585 	if (*list == p) {
5586 		ASSERT(p->pscc_prev == NULL);
5587 		if (p->pscc_next != NULL)
5588 			p->pscc_next->pscc_prev = NULL;
5589 		*list = p->pscc_next;
5590 	} else {
5591 		ASSERT(p->pscc_prev != NULL);
5592 		p->pscc_prev->pscc_next = p->pscc_next;
5593 		if (p->pscc_next != NULL)
5594 			p->pscc_next->pscc_prev = p->pscc_prev;
5595 	}
5596 }
5597 
5598 /*
5599  * Stick the control struct specified on the front of the list
5600  */
5601 static void
5602 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5603 {
5604 	pscc_t *h;	/* entry at head of list */
5605 	if ((h = *list) == NULL) {
5606 		*list = p;
5607 		ASSERT(p->pscc_next == NULL);
5608 		ASSERT(p->pscc_prev == NULL);
5609 	} else {
5610 		p->pscc_next = h;
5611 		ASSERT(h->pscc_prev == NULL);
5612 		h->pscc_prev = p;
5613 		ASSERT(p->pscc_prev == NULL);
5614 		*list = p;
5615 	}
5616 }
5617 
5618 /*
5619  * If dip is NULL, process is closing "clone" clean up all its registrations.
5620  * Otherwise only clean up those for dip because process is just giving up
5621  * control of a direct device.
5622  */
5623 void
5624 pm_deregister_watcher(int clone, dev_info_t *dip)
5625 {
5626 	pscc_t	*p, *pn;
5627 	psce_t	*psce;
5628 	int found = 0;
5629 
5630 	if (dip == NULL) {
5631 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5632 		for (p = pm_pscc_interest; p; p = pn) {
5633 			pn = p->pscc_next;
5634 			if (p->pscc_clone == clone) {
5635 				pm_dequeue_pscc(p, &pm_pscc_interest);
5636 				psce = p->pscc_entries;
5637 				ASSERT(psce->psce_references == 1);
5638 				mutex_destroy(&psce->psce_lock);
5639 				kmem_free(psce->psce_first,
5640 				    sizeof (pm_state_change_t) * PSCCOUNT);
5641 				kmem_free(psce, sizeof (*psce));
5642 				kmem_free(p, sizeof (*p));
5643 			}
5644 		}
5645 		pm_interest[clone] = 0;
5646 		rw_exit(&pm_pscc_interest_rwlock);
5647 	}
5648 	found = 0;
5649 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5650 	for (p = pm_pscc_direct; p; p = pn) {
5651 		pn = p->pscc_next;
5652 		if ((dip && p->pscc_dip == dip) ||
5653 		    (dip == NULL && clone == p->pscc_clone)) {
5654 			ASSERT(clone == p->pscc_clone);
5655 			found++;
5656 			/*
5657 			 * Remove from control list
5658 			 */
5659 			pm_dequeue_pscc(p, &pm_pscc_direct);
5660 			/*
5661 			 * If we're the last reference, free the
5662 			 * entries struct.
5663 			 */
5664 			psce = p->pscc_entries;
5665 			ASSERT(psce);
5666 			if (psce->psce_references == 1) {
5667 				kmem_free(psce->psce_first,
5668 				    PSCCOUNT * sizeof (pm_state_change_t));
5669 				kmem_free(psce, sizeof (*psce));
5670 			} else {
5671 				psce->psce_references--;
5672 			}
5673 			kmem_free(p, sizeof (*p));
5674 		}
5675 	}
5676 	ASSERT(dip == NULL || found);
5677 	rw_exit(&pm_pscc_direct_rwlock);
5678 }
5679 
5680 /*
5681  * Search the indicated list for an entry that matches clone, and return a
5682  * pointer to it.  To be interesting, the entry must have something ready to
5683  * be passed up to the controlling process.
5684  * The returned entry will be locked upon return from this call.
5685  */
5686 static psce_t *
5687 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5688 {
5689 	pscc_t	*p;
5690 	psce_t	*psce;
5691 	rw_enter(lock, RW_READER);
5692 	for (p = *list; p; p = p->pscc_next) {
5693 		if (clone == p->pscc_clone) {
5694 			psce = p->pscc_entries;
5695 			mutex_enter(&psce->psce_lock);
5696 			if (psce->psce_out->size) {
5697 				rw_exit(lock);
5698 				return (psce);
5699 			} else {
5700 				mutex_exit(&psce->psce_lock);
5701 			}
5702 		}
5703 	}
5704 	rw_exit(lock);
5705 	return (NULL);
5706 }
5707 
5708 /*
5709  * Find an entry for a particular clone in the direct list.
5710  */
5711 psce_t *
5712 pm_psc_clone_to_direct(int clone)
5713 {
5714 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5715 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5716 	    &pm_pscc_direct_rwlock));
5717 }
5718 
5719 /*
5720  * Find an entry for a particular clone in the interest list.
5721  */
5722 psce_t *
5723 pm_psc_clone_to_interest(int clone)
5724 {
5725 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5726 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5727 	    &pm_pscc_interest_rwlock));
5728 }
5729 
5730 /*
5731  * Put the given entry at the head of the blocked list
5732  */
5733 void
5734 pm_enqueue_blocked(pm_rsvp_t *p)
5735 {
5736 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5737 	ASSERT(p->pr_next == NULL);
5738 	ASSERT(p->pr_prev == NULL);
5739 	if (pm_blocked_list != NULL) {
5740 		p->pr_next = pm_blocked_list;
5741 		ASSERT(pm_blocked_list->pr_prev == NULL);
5742 		pm_blocked_list->pr_prev = p;
5743 		pm_blocked_list = p;
5744 	} else {
5745 		pm_blocked_list = p;
5746 	}
5747 }
5748 
5749 /*
5750  * Sets every power managed device back to its default threshold
5751  */
5752 void
5753 pm_all_to_default_thresholds(void)
5754 {
5755 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5756 	    (void *) &pm_system_idle_threshold);
5757 }
5758 
5759 static int
5760 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5761 {
5762 	int thr = (int)(*(int *)arg);
5763 
5764 	if (!PM_GET_PM_INFO(dip))
5765 		return (DDI_WALK_CONTINUE);
5766 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5767 	return (DDI_WALK_CONTINUE);
5768 }
5769 
5770 /*
5771  * Returns the current threshold value (in seconds) for the indicated component
5772  */
5773 int
5774 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5775 {
5776 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5777 		return (DDI_FAILURE);
5778 	} else {
5779 		*threshp = cur_threshold(dip, comp);
5780 		return (DDI_SUCCESS);
5781 	}
5782 }
5783 
5784 /*
5785  * To be called when changing the power level of a component of a device.
5786  * On some platforms, changing power on one device may require that power
5787  * be changed on other, related devices in the same transaction.  Thus, we
5788  * always pass this request to the platform power manager so that all the
5789  * affected devices will be locked.
5790  */
5791 void
5792 pm_lock_power(dev_info_t *dip, int *circp)
5793 {
5794 	power_req_t power_req;
5795 	int result;
5796 
5797 	power_req.request_type = PMR_PPM_LOCK_POWER;
5798 	power_req.req.ppm_lock_power_req.who = dip;
5799 	power_req.req.ppm_lock_power_req.circp = circp;
5800 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5801 }
5802 
5803 /*
5804  * Release the lock (or locks) acquired to change the power of a device.
5805  * See comments for pm_lock_power.
5806  */
5807 void
5808 pm_unlock_power(dev_info_t *dip, int circ)
5809 {
5810 	power_req_t power_req;
5811 	int result;
5812 
5813 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5814 	power_req.req.ppm_unlock_power_req.who = dip;
5815 	power_req.req.ppm_unlock_power_req.circ = circ;
5816 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5817 }
5818 
5819 
5820 /*
5821  * Attempt (without blocking) to acquire the lock(s) needed to change the
5822  * power of a component of a device.  See comments for pm_lock_power.
5823  *
5824  * Return: 1 if lock(s) acquired, 0 if not.
5825  */
5826 int
5827 pm_try_locking_power(dev_info_t *dip, int *circp)
5828 {
5829 	power_req_t power_req;
5830 	int result;
5831 
5832 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5833 	power_req.req.ppm_lock_power_req.who = dip;
5834 	power_req.req.ppm_lock_power_req.circp = circp;
5835 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5836 	return (result);
5837 }
5838 
5839 
5840 /*
5841  * Lock power state of a device.
5842  *
5843  * The implementation handles a special case where another thread may have
5844  * acquired the lock and created/launched this thread to do the work.  If
5845  * the lock cannot be acquired immediately, we check to see if this thread
5846  * is registered as a borrower of the lock.  If so, we may proceed without
5847  * the lock.  This assumes that the lending thread blocks on the completion
5848  * of this thread.
5849  *
5850  * Note 1: for use by ppm only.
5851  *
5852  * Note 2: On failing to get the lock immediately, we search lock_loan list
5853  * for curthread (as borrower of the lock).  On a hit, we check that the
5854  * lending thread already owns the lock we want.  It is safe to compare
5855  * devi_busy_thread and thread id of the lender because in the == case (the
5856  * only one we care about) we know that the owner is blocked.  Similarly,
5857  * If we find that curthread isn't registered as a lock borrower, it is safe
5858  * to use the blocking call (ndi_devi_enter) because we know that if we
5859  * weren't already listed as a borrower (upstream on the call stack) we won't
5860  * become one.
5861  */
5862 void
5863 pm_lock_power_single(dev_info_t *dip, int *circp)
5864 {
5865 	lock_loan_t *cur;
5866 
5867 	/* if the lock is available, we are done. */
5868 	if (ndi_devi_tryenter(dip, circp))
5869 		return;
5870 
5871 	mutex_enter(&pm_loan_lock);
5872 	/* see if our thread is registered as a lock borrower. */
5873 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5874 		if (cur->pmlk_borrower == curthread)
5875 			break;
5876 	mutex_exit(&pm_loan_lock);
5877 
5878 	/* if this thread not already registered, it is safe to block */
5879 	if (cur == NULL)
5880 		ndi_devi_enter(dip, circp);
5881 	else {
5882 		/* registered: does lender own the lock we want? */
5883 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5884 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5885 			cur->pmlk_dip = dip;
5886 		} else /* no: just block for it */
5887 			ndi_devi_enter(dip, circp);
5888 
5889 	}
5890 }
5891 
5892 /*
5893  * Drop the lock on the device's power state.  See comment for
5894  * pm_lock_power_single() for special implementation considerations.
5895  *
5896  * Note: for use by ppm only.
5897  */
5898 void
5899 pm_unlock_power_single(dev_info_t *dip, int circ)
5900 {
5901 	lock_loan_t *cur;
5902 
5903 	/* optimization: mutex not needed to check empty list */
5904 	if (lock_loan_head.pmlk_next == NULL) {
5905 		ndi_devi_exit(dip, circ);
5906 		return;
5907 	}
5908 
5909 	mutex_enter(&pm_loan_lock);
5910 	/* see if our thread is registered as a lock borrower. */
5911 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5912 		if (cur->pmlk_borrower == curthread)
5913 			break;
5914 	mutex_exit(&pm_loan_lock);
5915 
5916 	if (cur == NULL || cur->pmlk_dip != dip)
5917 		/* we acquired the lock directly, so return it */
5918 		ndi_devi_exit(dip, circ);
5919 }
5920 
5921 /*
5922  * Try to take the lock for changing the power level of a component.
5923  *
5924  * Note: for use by ppm only.
5925  */
5926 int
5927 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5928 {
5929 	return (ndi_devi_tryenter(dip, circp));
5930 }
5931 
5932 #ifdef	DEBUG
5933 /*
5934  * The following are used only to print out data structures for debugging
5935  */
5936 void
5937 prdeps(char *msg)
5938 {
5939 
5940 	pm_pdr_t *rp;
5941 	int i;
5942 
5943 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
5944 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
5945 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
5946 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
5947 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
5948 		    (void *)rp->pdr_next);
5949 		if (rp->pdr_kept_count != 0) {
5950 			pm_log("kept list = ");
5951 			i = 0;
5952 			while (i < rp->pdr_kept_count) {
5953 				pm_log("%s ", rp->pdr_kept_paths[i]);
5954 				i++;
5955 			}
5956 			pm_log("\n");
5957 		}
5958 	}
5959 }
5960 
5961 void
5962 pr_noinvol(char *hdr)
5963 {
5964 	pm_noinvol_t *ip;
5965 
5966 	pm_log("%s\n", hdr);
5967 	rw_enter(&pm_noinvol_rwlock, RW_READER);
5968 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
5969 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
5970 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
5971 	rw_exit(&pm_noinvol_rwlock);
5972 }
5973 #endif
5974 
5975 /*
5976  * Attempt to apply the thresholds indicated by rp to the node specified by
5977  * dip.
5978  */
5979 void
5980 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5981 {
5982 	PMD_FUNC(pmf, "apply_recorded_thresh")
5983 	int i, j;
5984 	int comps = PM_NUMCMPTS(dip);
5985 	struct pm_component *cp;
5986 	pm_pte_t *ep;
5987 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
5988 
5989 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
5990 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
5991 	PM_LOCK_DIP(dip);
5992 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
5993 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
5994 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
5995 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
5996 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
5997 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
5998 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
5999 		PM_UNLOCK_DIP(dip);
6000 		return;
6001 	}
6002 
6003 	ep = rp->ptr_entries;
6004 	/*
6005 	 * Here we do the special case of a device threshold
6006 	 */
6007 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6008 		ASSERT(ep && ep->pte_numthresh == 1);
6009 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6010 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6011 		PM_UNLOCK_DIP(dip);
6012 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6013 		if (PM_SCANABLE(dip))
6014 			pm_rescan(dip);
6015 		return;
6016 	}
6017 	for (i = 0; i < comps; i++) {
6018 		cp = PM_CP(dip, i);
6019 		for (j = 0; j < ep->pte_numthresh; j++) {
6020 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6021 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6022 			    i, ep->pte_thresh[j]))
6023 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6024 		}
6025 		ep++;
6026 	}
6027 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6028 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6029 	PM_UNLOCK_DIP(dip);
6030 
6031 	if (PM_SCANABLE(dip))
6032 		pm_rescan(dip);
6033 }
6034 
6035 /*
6036  * Returns true if the threshold specified by rp could be applied to dip
6037  * (that is, the number of components and transitions are the same)
6038  */
6039 int
6040 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6041 {
6042 	PMD_FUNC(pmf, "valid_thresh")
6043 	int comps, i;
6044 	pm_component_t *cp;
6045 	pm_pte_t *ep;
6046 
6047 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6048 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6049 		    rp->ptr_physpath))
6050 		return (0);
6051 	}
6052 	/*
6053 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6054 	 * an entry with numcomps == 0, (since we don't know how many
6055 	 * components there are in advance).  This is always a valid
6056 	 * spec.
6057 	 */
6058 	if (rp->ptr_numcomps == 0) {
6059 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6060 		return (1);
6061 	}
6062 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6063 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6064 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6065 		return (0);
6066 	}
6067 	ep = rp->ptr_entries;
6068 	for (i = 0; i < comps; i++) {
6069 		cp = PM_CP(dip, i);
6070 		if ((ep + i)->pte_numthresh !=
6071 		    cp->pmc_comp.pmc_numlevels - 1) {
6072 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6073 			    pmf, rp->ptr_physpath, i,
6074 			    cp->pmc_comp.pmc_numlevels - 1,
6075 			    (ep + i)->pte_numthresh))
6076 			return (0);
6077 		}
6078 	}
6079 	return (1);
6080 }
6081 
6082 /*
6083  * Remove any recorded threshold for device physpath
6084  * We know there will be at most one.
6085  */
6086 void
6087 pm_unrecord_threshold(char *physpath)
6088 {
6089 	pm_thresh_rec_t *pptr, *ptr;
6090 
6091 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6092 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6093 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6094 			if (pptr) {
6095 				pptr->ptr_next = ptr->ptr_next;
6096 			} else {
6097 				ASSERT(pm_thresh_head == ptr);
6098 				pm_thresh_head = ptr->ptr_next;
6099 			}
6100 			kmem_free(ptr, ptr->ptr_size);
6101 			break;
6102 		}
6103 		pptr = ptr;
6104 	}
6105 	rw_exit(&pm_thresh_rwlock);
6106 }
6107 
6108 /*
6109  * Discard all recorded thresholds.  We are returning to the default pm state.
6110  */
6111 void
6112 pm_discard_thresholds(void)
6113 {
6114 	pm_thresh_rec_t *rp;
6115 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6116 	while (pm_thresh_head) {
6117 		rp = pm_thresh_head;
6118 		pm_thresh_head = rp->ptr_next;
6119 		kmem_free(rp, rp->ptr_size);
6120 	}
6121 	rw_exit(&pm_thresh_rwlock);
6122 }
6123 
6124 /*
6125  * Discard all recorded dependencies.  We are returning to the default pm state.
6126  */
6127 void
6128 pm_discard_dependencies(void)
6129 {
6130 	pm_pdr_t *rp;
6131 	int i;
6132 	size_t length;
6133 
6134 #ifdef DEBUG
6135 	if (pm_debug & PMD_DEP)
6136 		prdeps("Before discard\n");
6137 #endif
6138 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6139 
6140 #ifdef DEBUG
6141 	if (pm_debug & PMD_DEP)
6142 		prdeps("After discard\n");
6143 #endif
6144 	while (pm_dep_head) {
6145 		rp = pm_dep_head;
6146 		if (!rp->pdr_isprop) {
6147 			ASSERT(rp->pdr_satisfied == 0);
6148 			ASSERT(pm_unresolved_deps);
6149 			pm_unresolved_deps--;
6150 		} else {
6151 			ASSERT(pm_prop_deps);
6152 			pm_prop_deps--;
6153 		}
6154 		pm_dep_head = rp->pdr_next;
6155 		if (rp->pdr_kept_count)  {
6156 			for (i = 0; i < rp->pdr_kept_count; i++) {
6157 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6158 				kmem_free(rp->pdr_kept_paths[i], length);
6159 			}
6160 			kmem_free(rp->pdr_kept_paths,
6161 			    rp->pdr_kept_count * sizeof (char **));
6162 		}
6163 		kmem_free(rp, rp->pdr_size);
6164 	}
6165 }
6166 
6167 
6168 static int
6169 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6170 {
6171 	_NOTE(ARGUNUSED(arg))
6172 	char *pathbuf;
6173 
6174 	if (PM_GET_PM_INFO(dip) == NULL)
6175 		return (DDI_WALK_CONTINUE);
6176 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6177 	(void) ddi_pathname(dip, pathbuf);
6178 	pm_free_keeper(pathbuf, 0);
6179 	kmem_free(pathbuf, MAXPATHLEN);
6180 	return (DDI_WALK_CONTINUE);
6181 }
6182 
6183 static int
6184 pm_kept_walk(dev_info_t *dip, void *arg)
6185 {
6186 	_NOTE(ARGUNUSED(arg))
6187 	char *pathbuf;
6188 
6189 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6190 	(void) ddi_pathname(dip, pathbuf);
6191 	(void) pm_kept(pathbuf);
6192 	kmem_free(pathbuf, MAXPATHLEN);
6193 
6194 	return (DDI_WALK_CONTINUE);
6195 }
6196 
6197 static int
6198 pm_keeper_walk(dev_info_t *dip, void *arg)
6199 {
6200 	_NOTE(ARGUNUSED(arg))
6201 	char *pathbuf;
6202 
6203 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6204 	(void) ddi_pathname(dip, pathbuf);
6205 	(void) pm_keeper(pathbuf);
6206 	kmem_free(pathbuf, MAXPATHLEN);
6207 
6208 	return (DDI_WALK_CONTINUE);
6209 }
6210 
6211 static char *
6212 pdw_type_decode(int type)
6213 {
6214 	switch (type) {
6215 	case PM_DEP_WK_POWER_ON:
6216 		return ("power on");
6217 	case PM_DEP_WK_POWER_OFF:
6218 		return ("power off");
6219 	case PM_DEP_WK_DETACH:
6220 		return ("detach");
6221 	case PM_DEP_WK_REMOVE_DEP:
6222 		return ("remove dep");
6223 	case PM_DEP_WK_BRINGUP_SELF:
6224 		return ("bringup self");
6225 	case PM_DEP_WK_RECORD_KEEPER:
6226 		return ("add dependent");
6227 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6228 		return ("add dependent property");
6229 	case PM_DEP_WK_KEPT:
6230 		return ("kept");
6231 	case PM_DEP_WK_KEEPER:
6232 		return ("keeper");
6233 	case PM_DEP_WK_ATTACH:
6234 		return ("attach");
6235 	case PM_DEP_WK_CHECK_KEPT:
6236 		return ("check kept");
6237 	case PM_DEP_WK_CPR_SUSPEND:
6238 		return ("suspend");
6239 	case PM_DEP_WK_CPR_RESUME:
6240 		return ("resume");
6241 	default:
6242 		return ("unknown");
6243 	}
6244 
6245 }
6246 
6247 static void
6248 pm_rele_dep(char *keeper)
6249 {
6250 	PMD_FUNC(pmf, "rele_dep")
6251 	pm_pdr_t *dp;
6252 	char *kept_path = NULL;
6253 	dev_info_t *kept = NULL;
6254 	int count = 0;
6255 
6256 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6257 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6258 			continue;
6259 		for (count = 0; count < dp->pdr_kept_count; count++) {
6260 			kept_path = dp->pdr_kept_paths[count];
6261 			if (kept_path == NULL)
6262 				continue;
6263 			kept = pm_name_to_dip(kept_path, 1);
6264 			if (kept) {
6265 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6266 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6267 				    keeper))
6268 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6269 				pm_rele_power(kept);
6270 				ddi_release_devi(kept);
6271 			}
6272 		}
6273 	}
6274 }
6275 
6276 /*
6277  * Called when we are just released from direct PM.  Bring ourself up
6278  * if our keeper is up since dependency is not honored while a kept
6279  * device is under direct PM.
6280  */
6281 static void
6282 pm_bring_self_up(char *keptpath)
6283 {
6284 	PMD_FUNC(pmf, "bring_self_up")
6285 	dev_info_t *kept;
6286 	dev_info_t *keeper;
6287 	pm_pdr_t *dp;
6288 	int i, j;
6289 	int up = 0, circ;
6290 
6291 	kept = pm_name_to_dip(keptpath, 1);
6292 	if (kept == NULL)
6293 		return;
6294 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6295 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6296 		if (dp->pdr_kept_count == 0)
6297 			continue;
6298 		for (i = 0; i < dp->pdr_kept_count; i++) {
6299 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6300 				continue;
6301 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6302 			if (keeper) {
6303 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6304 				    pmf, PM_DEVICE(keeper)))
6305 				PM_LOCK_POWER(keeper, &circ);
6306 				for (j = 0; j < PM_NUMCMPTS(keeper);
6307 				    j++) {
6308 					if (PM_CURPOWER(keeper, j)) {
6309 						PMD(PMD_KEEPS, ("%s: comp="
6310 						    "%d is up\n", pmf, j))
6311 						up++;
6312 					}
6313 				}
6314 				if (up) {
6315 					if (PM_SKBU(kept))
6316 						DEVI(kept)->devi_pm_flags &=
6317 						    ~PMC_SKIP_BRINGUP;
6318 					bring_pmdep_up(kept, 1);
6319 				}
6320 				PM_UNLOCK_POWER(keeper, circ);
6321 				ddi_release_devi(keeper);
6322 			}
6323 		}
6324 	}
6325 	ddi_release_devi(kept);
6326 }
6327 
6328 static void
6329 pm_process_dep_request(pm_dep_wk_t *work)
6330 {
6331 	PMD_FUNC(pmf, "dep_req")
6332 	int ret;
6333 
6334 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6335 	    pdw_type_decode(work->pdw_type)))
6336 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6337 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6338 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6339 
6340 	switch (work->pdw_type) {
6341 	case PM_DEP_WK_POWER_ON:
6342 		/* Bring up the kept devices and put a hold on them */
6343 		bring_wekeeps_up(work->pdw_keeper);
6344 		break;
6345 	case PM_DEP_WK_POWER_OFF:
6346 		/* Release the kept devices */
6347 		pm_rele_dep(work->pdw_keeper);
6348 		break;
6349 	case PM_DEP_WK_DETACH:
6350 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6351 		break;
6352 	case PM_DEP_WK_REMOVE_DEP:
6353 		pm_discard_dependencies();
6354 		break;
6355 	case PM_DEP_WK_BRINGUP_SELF:
6356 		/*
6357 		 * We deferred satisfying our dependency till now, so satisfy
6358 		 * it again and bring ourselves up.
6359 		 */
6360 		pm_bring_self_up(work->pdw_kept);
6361 		break;
6362 	case PM_DEP_WK_RECORD_KEEPER:
6363 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6364 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6365 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6366 		break;
6367 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6368 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6369 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6370 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6371 		break;
6372 	case PM_DEP_WK_KEPT:
6373 		ret = pm_kept(work->pdw_kept);
6374 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6375 		    ret))
6376 		break;
6377 	case PM_DEP_WK_KEEPER:
6378 		ret = pm_keeper(work->pdw_keeper);
6379 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6380 		    pmf, ret))
6381 		break;
6382 	case PM_DEP_WK_ATTACH:
6383 		ret = pm_keeper(work->pdw_keeper);
6384 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6385 		    pmf, ret))
6386 		ret = pm_kept(work->pdw_kept);
6387 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6388 		    pmf, ret))
6389 		break;
6390 	case PM_DEP_WK_CHECK_KEPT:
6391 		ret = pm_is_kept(work->pdw_kept);
6392 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6393 		    pmf, work->pdw_kept, ret))
6394 		break;
6395 	case PM_DEP_WK_CPR_SUSPEND:
6396 		pm_discard_dependencies();
6397 		break;
6398 	case PM_DEP_WK_CPR_RESUME:
6399 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6400 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6401 		break;
6402 	default:
6403 		ASSERT(0);
6404 		break;
6405 	}
6406 	/*
6407 	 * Free the work structure if the requester is not waiting
6408 	 * Otherwise it is the requester's responsiblity to free it.
6409 	 */
6410 	if (!work->pdw_wait) {
6411 		if (work->pdw_keeper)
6412 			kmem_free(work->pdw_keeper,
6413 			    strlen(work->pdw_keeper) + 1);
6414 		if (work->pdw_kept)
6415 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6416 		kmem_free(work, sizeof (pm_dep_wk_t));
6417 	} else {
6418 		/*
6419 		 * Notify requester if it is waiting for it.
6420 		 */
6421 		work->pdw_ret = ret;
6422 		work->pdw_done = 1;
6423 		cv_signal(&work->pdw_cv);
6424 	}
6425 }
6426 
6427 /*
6428  * Process PM dependency requests.
6429  */
6430 static void
6431 pm_dep_thread(void)
6432 {
6433 	pm_dep_wk_t *work;
6434 	callb_cpr_t cprinfo;
6435 
6436 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6437 	    "pm_dep_thread");
6438 	for (;;) {
6439 		mutex_enter(&pm_dep_thread_lock);
6440 		if (pm_dep_thread_workq == NULL) {
6441 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6442 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6443 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6444 		}
6445 		work = pm_dep_thread_workq;
6446 		pm_dep_thread_workq = work->pdw_next;
6447 		if (pm_dep_thread_tail == work)
6448 			pm_dep_thread_tail = work->pdw_next;
6449 		mutex_exit(&pm_dep_thread_lock);
6450 		pm_process_dep_request(work);
6451 
6452 	}
6453 	/*NOTREACHED*/
6454 }
6455 
6456 /*
6457  * Set the power level of the indicated device to unknown (if it is not a
6458  * backwards compatible device), as it has just been resumed, and it won't
6459  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6460  */
6461 void
6462 pm_forget_power_level(dev_info_t *dip)
6463 {
6464 	dev_info_t *pdip = ddi_get_parent(dip);
6465 	int i, count = 0;
6466 
6467 	if (!PM_ISBC(dip)) {
6468 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6469 			count += (PM_CURPOWER(dip, i) == 0);
6470 
6471 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6472 			e_pm_hold_rele_power(pdip, count);
6473 
6474 		/*
6475 		 * Count this as a power cycle if we care
6476 		 */
6477 		if (DEVI(dip)->devi_pm_volpmd &&
6478 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6479 			DEVI(dip)->devi_pm_volpmd = 0;
6480 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6481 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6482 	}
6483 }
6484 
6485 /*
6486  * This function advises the caller whether it should make a power-off
6487  * transition at this time or not.  If the transition is not advised
6488  * at this time, the time that the next power-off transition can
6489  * be made from now is returned through "intervalp" pointer.
6490  * This function returns:
6491  *
6492  *  1  power-off advised
6493  *  0  power-off not advised, intervalp will point to seconds from
6494  *	  now that a power-off is advised.  If it is passed the number
6495  *	  of years that policy specifies the device should last,
6496  *	  a large number is returned as the time interval.
6497  *  -1  error
6498  */
6499 int
6500 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6501 {
6502 	PMD_FUNC(pmf, "pm_trans_check")
6503 	char dbuf[DC_SCSI_MFR_LEN];
6504 	struct pm_scsi_cycles *scp;
6505 	int service_years, service_weeks, full_years;
6506 	time_t now, service_seconds, tdiff;
6507 	time_t within_year, when_allowed;
6508 	char *ptr;
6509 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6510 	int cycles_diff, cycles_over;
6511 
6512 	if (datap == NULL) {
6513 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6514 		return (-1);
6515 	}
6516 
6517 	if (datap->format == DC_SCSI_FORMAT) {
6518 		/*
6519 		 * Power cycles of the scsi drives are distributed
6520 		 * over 5 years with the following percentage ratio:
6521 		 *
6522 		 *	30%, 25%, 20%, 15%, and 10%
6523 		 *
6524 		 * The power cycle quota for each year is distributed
6525 		 * linearly through out the year.  The equation for
6526 		 * determining the expected cycles is:
6527 		 *
6528 		 *	e = a * (n / y)
6529 		 *
6530 		 * e = expected cycles
6531 		 * a = allocated cycles for this year
6532 		 * n = number of seconds since beginning of this year
6533 		 * y = number of seconds in a year
6534 		 *
6535 		 * Note that beginning of the year starts the day that
6536 		 * the drive has been put on service.
6537 		 *
6538 		 * If the drive has passed its expected cycles, we
6539 		 * can determine when it can start to power cycle
6540 		 * again to keep it on track to meet the 5-year
6541 		 * life expectancy.  The equation for determining
6542 		 * when to power cycle is:
6543 		 *
6544 		 *	w = y * (c / a)
6545 		 *
6546 		 * w = when it can power cycle again
6547 		 * y = number of seconds in a year
6548 		 * c = current number of cycles
6549 		 * a = allocated cycles for the year
6550 		 *
6551 		 */
6552 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6553 
6554 		scp = &datap->un.scsi_cycles;
6555 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6556 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6557 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6558 		if (scp->ncycles < 0 || scp->flag != 0) {
6559 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6560 			return (-1);
6561 		}
6562 
6563 		if (scp->ncycles > scp->lifemax) {
6564 			*intervalp = (LONG_MAX / hz);
6565 			return (0);
6566 		}
6567 
6568 		/*
6569 		 * convert service date to time_t
6570 		 */
6571 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6572 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6573 		ptr = dbuf;
6574 		service_years = stoi(&ptr) - EPOCH_YEAR;
6575 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6576 		    DC_SCSI_WEEK_LEN);
6577 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6578 
6579 		/*
6580 		 * scsi standard does not specify WW data,
6581 		 * could be (00-51) or (01-52)
6582 		 */
6583 		ptr = dbuf;
6584 		service_weeks = stoi(&ptr);
6585 		if (service_years < 0 ||
6586 		    service_weeks < 0 || service_weeks > 52) {
6587 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6588 			    pmf, service_years, service_weeks))
6589 			return (-1);
6590 		}
6591 
6592 		/*
6593 		 * calculate service date in seconds-since-epoch,
6594 		 * adding one day for each leap-year.
6595 		 *
6596 		 * (years-since-epoch + 2) fixes integer truncation,
6597 		 * example: (8) leap-years during [1972, 2000]
6598 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6599 		 */
6600 		service_seconds = (service_years * DC_SPY) +
6601 		    (service_weeks * DC_SPW) +
6602 		    (((service_years + 2) / 4) * DC_SPD);
6603 
6604 		now = gethrestime_sec();
6605 		/*
6606 		 * since the granularity of 'svc_date' is day not second,
6607 		 * 'now' should be rounded up to full day.
6608 		 */
6609 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6610 		if (service_seconds > now) {
6611 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6612 			    "than now (%ld)!\n", pmf, service_seconds, now))
6613 			return (-1);
6614 		}
6615 
6616 		tdiff = now - service_seconds;
6617 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6618 
6619 		/*
6620 		 * NOTE - Leap years are not considered in the calculations
6621 		 * below.
6622 		 */
6623 		full_years = (tdiff / DC_SPY);
6624 		if ((full_years >= DC_SCSI_NPY) &&
6625 		    (scp->ncycles <= scp->lifemax))
6626 			return (1);
6627 
6628 		/*
6629 		 * Determine what is the normal cycle usage for the
6630 		 * device at the beginning and the end of this year.
6631 		 */
6632 		lower_bound_cycles = (!full_years) ? 0 :
6633 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6634 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6635 
6636 		if (scp->ncycles <= lower_bound_cycles)
6637 			return (1);
6638 
6639 		/*
6640 		 * The linear slope that determines how many cycles
6641 		 * are allowed this year is number of seconds
6642 		 * passed this year over total number of seconds in a year.
6643 		 */
6644 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6645 		within_year = (tdiff % DC_SPY);
6646 		cycles_allowed = lower_bound_cycles +
6647 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6648 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6649 		    full_years, within_year))
6650 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6651 		    cycles_allowed))
6652 
6653 		if (scp->ncycles <= cycles_allowed)
6654 			return (1);
6655 
6656 		/*
6657 		 * The transition is not advised now but we can
6658 		 * determine when the next transition can be made.
6659 		 *
6660 		 * Depending on how many cycles the device has been
6661 		 * over-used, we may need to skip years with
6662 		 * different percentage quota in order to determine
6663 		 * when the next transition can be made.
6664 		 */
6665 		cycles_over = (scp->ncycles - lower_bound_cycles);
6666 		while (cycles_over > cycles_diff) {
6667 			full_years++;
6668 			if (full_years >= DC_SCSI_NPY) {
6669 				*intervalp = (LONG_MAX / hz);
6670 				return (0);
6671 			}
6672 			cycles_over -= cycles_diff;
6673 			lower_bound_cycles = upper_bound_cycles;
6674 			upper_bound_cycles =
6675 			    (scp->lifemax * pcnt[full_years]) / 100;
6676 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6677 		}
6678 
6679 		/*
6680 		 * The linear slope that determines when the next transition
6681 		 * can be made is the relative position of used cycles within a
6682 		 * year over total number of cycles within that year.
6683 		 */
6684 		when_allowed = service_seconds + (full_years * DC_SPY) +
6685 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6686 		*intervalp = (when_allowed - now);
6687 		if (*intervalp > (LONG_MAX / hz))
6688 			*intervalp = (LONG_MAX / hz);
6689 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6690 		    *intervalp))
6691 		return (0);
6692 	}
6693 
6694 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6695 	return (-1);
6696 }
6697 
6698 /*
6699  * Nexus drivers call into pm framework to indicate which child driver is about
6700  * to be installed.  In some platforms, ppm may need to configure the hardware
6701  * for successful installation of a driver.
6702  */
6703 int
6704 pm_init_child(dev_info_t *dip)
6705 {
6706 	power_req_t power_req;
6707 
6708 	ASSERT(ddi_binding_name(dip));
6709 	ASSERT(ddi_get_name_addr(dip));
6710 	pm_ppm_claim(dip);
6711 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6712 		power_req.request_type = PMR_PPM_INIT_CHILD;
6713 		power_req.req.ppm_config_req.who = dip;
6714 		ASSERT(PPM(dip) != NULL);
6715 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6716 		    NULL));
6717 	} else {
6718 #ifdef DEBUG
6719 		/* pass it to the default handler so we can debug things */
6720 		power_req.request_type = PMR_PPM_INIT_CHILD;
6721 		power_req.req.ppm_config_req.who = dip;
6722 		(void) pm_ctlops(NULL, dip,
6723 		    DDI_CTLOPS_POWER, &power_req, NULL);
6724 #endif
6725 	}
6726 	return (DDI_SUCCESS);
6727 }
6728 
6729 /*
6730  * Bring parent of a node that is about to be probed up to full power, and
6731  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6732  * it is time to let it go down again
6733  */
6734 void
6735 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6736 {
6737 	int result;
6738 	power_req_t power_req;
6739 
6740 	bzero(cp, sizeof (*cp));
6741 	cp->ppc_dip = dip;
6742 
6743 	pm_ppm_claim(dip);
6744 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6745 		power_req.request_type = PMR_PPM_PRE_PROBE;
6746 		power_req.req.ppm_config_req.who = dip;
6747 		ASSERT(PPM(dip) != NULL);
6748 		(void) pm_ctlops(PPM(dip), dip,
6749 		    DDI_CTLOPS_POWER, &power_req, &result);
6750 		cp->ppc_ppm = PPM(dip);
6751 	} else {
6752 #ifdef DEBUG
6753 		/* pass it to the default handler so we can debug things */
6754 		power_req.request_type = PMR_PPM_PRE_PROBE;
6755 		power_req.req.ppm_config_req.who = dip;
6756 		(void) pm_ctlops(NULL, dip,
6757 		    DDI_CTLOPS_POWER, &power_req, &result);
6758 #endif
6759 		cp->ppc_ppm = NULL;
6760 	}
6761 }
6762 
6763 int
6764 pm_pre_config(dev_info_t *dip, char *devnm)
6765 {
6766 	PMD_FUNC(pmf, "pre_config")
6767 	int ret;
6768 
6769 	if (MDI_VHCI(dip)) {
6770 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6771 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6772 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6773 	} else if (!PM_GET_PM_INFO(dip))
6774 		return (DDI_SUCCESS);
6775 
6776 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6777 	pm_hold_power(dip);
6778 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6779 	if (ret != DDI_SUCCESS)
6780 		pm_rele_power(dip);
6781 	return (ret);
6782 }
6783 
6784 /*
6785  * This routine is called by devfs during its walk to unconfigue a node.
6786  * If the call is due to auto mod_unloads and the dip is not at its
6787  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6788  * return DDI_SUCCESS.
6789  */
6790 int
6791 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6792 {
6793 	PMD_FUNC(pmf, "pre_unconfig")
6794 	int ret;
6795 
6796 	if (MDI_VHCI(dip)) {
6797 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6798 		    PM_DEVICE(dip), flags))
6799 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6800 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6801 	} else if (!PM_GET_PM_INFO(dip))
6802 		return (DDI_SUCCESS);
6803 
6804 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6805 	    flags))
6806 	*held = 0;
6807 
6808 	/*
6809 	 * If the dip is a leaf node, don't power it up.
6810 	 */
6811 	if (!ddi_get_child(dip))
6812 		return (DDI_SUCCESS);
6813 
6814 	/*
6815 	 * Do not power up the node if it is called due to auto-modunload.
6816 	 */
6817 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6818 		return (DDI_FAILURE);
6819 
6820 	pm_hold_power(dip);
6821 	*held = 1;
6822 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6823 	if (ret != DDI_SUCCESS) {
6824 		pm_rele_power(dip);
6825 		*held = 0;
6826 	}
6827 	return (ret);
6828 }
6829 
6830 /*
6831  * Notify ppm of attach action.  Parent is already held at full power by
6832  * probe action.
6833  */
6834 void
6835 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6836 {
6837 	static char *me = "pm_pre_attach";
6838 	power_req_t power_req;
6839 	int result;
6840 
6841 	/*
6842 	 * Initialize and fill in the PPM cookie
6843 	 */
6844 	bzero(cp, sizeof (*cp));
6845 	cp->ppc_cmd = (int)cmd;
6846 	cp->ppc_ppm = PPM(dip);
6847 	cp->ppc_dip = dip;
6848 
6849 	/*
6850 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6851 	 * Power Management stuff. DDI_RESUME also has to purge it's
6852 	 * powerlevel information.
6853 	 */
6854 	switch (cmd) {
6855 	case DDI_ATTACH:
6856 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6857 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6858 			power_req.req.ppm_config_req.who = dip;
6859 			ASSERT(PPM(dip));
6860 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6861 			    &power_req, &result);
6862 		}
6863 #ifdef DEBUG
6864 		else {
6865 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6866 			power_req.req.ppm_config_req.who = dip;
6867 			(void) pm_ctlops(NULL, dip,
6868 			    DDI_CTLOPS_POWER, &power_req, &result);
6869 		}
6870 #endif
6871 		break;
6872 	case DDI_RESUME:
6873 		pm_forget_power_level(dip);
6874 
6875 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6876 			power_req.request_type = PMR_PPM_PRE_RESUME;
6877 			power_req.req.resume_req.who = cp->ppc_dip;
6878 			power_req.req.resume_req.cmd =
6879 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6880 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6881 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6882 			    DDI_CTLOPS_POWER, &power_req, &result);
6883 		}
6884 #ifdef DEBUG
6885 		else {
6886 			power_req.request_type = PMR_PPM_PRE_RESUME;
6887 			power_req.req.resume_req.who = cp->ppc_dip;
6888 			power_req.req.resume_req.cmd =
6889 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6890 			(void) pm_ctlops(NULL, cp->ppc_dip,
6891 			    DDI_CTLOPS_POWER, &power_req, &result);
6892 		}
6893 #endif
6894 		break;
6895 
6896 	case DDI_PM_RESUME:
6897 		break;
6898 
6899 	default:
6900 		panic(me);
6901 	}
6902 }
6903 
6904 /*
6905  * Nexus drivers call into pm framework to indicate which child driver is
6906  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6907  * hardware since the device driver is no longer installed.
6908  */
6909 int
6910 pm_uninit_child(dev_info_t *dip)
6911 {
6912 	power_req_t power_req;
6913 
6914 	ASSERT(ddi_binding_name(dip));
6915 	ASSERT(ddi_get_name_addr(dip));
6916 	pm_ppm_claim(dip);
6917 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6918 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6919 		power_req.req.ppm_config_req.who = dip;
6920 		ASSERT(PPM(dip));
6921 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6922 		    NULL));
6923 	} else {
6924 #ifdef DEBUG
6925 		/* pass it to the default handler so we can debug things */
6926 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6927 		power_req.req.ppm_config_req.who = dip;
6928 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6929 #endif
6930 	}
6931 	return (DDI_SUCCESS);
6932 }
6933 /*
6934  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6935  * Also notify ppm of result of probe if there is a ppm that cares
6936  */
6937 void
6938 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
6939 {
6940 	_NOTE(ARGUNUSED(probe_failed))
6941 	int result;
6942 	power_req_t power_req;
6943 
6944 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6945 		power_req.request_type = PMR_PPM_POST_PROBE;
6946 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6947 		power_req.req.ppm_config_req.result = ret;
6948 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6949 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
6950 		    &power_req, &result);
6951 	}
6952 #ifdef DEBUG
6953 	else {
6954 		power_req.request_type = PMR_PPM_POST_PROBE;
6955 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6956 		power_req.req.ppm_config_req.result = ret;
6957 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
6958 		    &power_req, &result);
6959 	}
6960 #endif
6961 }
6962 
6963 void
6964 pm_post_config(dev_info_t *dip, char *devnm)
6965 {
6966 	PMD_FUNC(pmf, "post_config")
6967 
6968 	if (MDI_VHCI(dip)) {
6969 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6970 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
6971 		return;
6972 	} else if (!PM_GET_PM_INFO(dip))
6973 		return;
6974 
6975 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6976 	pm_rele_power(dip);
6977 }
6978 
6979 void
6980 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
6981 {
6982 	PMD_FUNC(pmf, "post_unconfig")
6983 
6984 	if (MDI_VHCI(dip)) {
6985 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
6986 		    PM_DEVICE(dip), held))
6987 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
6988 		return;
6989 	} else if (!PM_GET_PM_INFO(dip))
6990 		return;
6991 
6992 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
6993 	    held))
6994 	if (!held)
6995 		return;
6996 	/*
6997 	 * We have held power in pre_unconfig, release it here.
6998 	 */
6999 	pm_rele_power(dip);
7000 }
7001 
7002 /*
7003  * Notify ppm of result of attach if there is a ppm that cares
7004  */
7005 void
7006 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7007 {
7008 	int result;
7009 	power_req_t power_req;
7010 	dev_info_t	*dip;
7011 
7012 	if (cp->ppc_cmd != DDI_ATTACH)
7013 		return;
7014 
7015 	dip = cp->ppc_dip;
7016 
7017 	if (ret == DDI_SUCCESS) {
7018 		/*
7019 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7020 		 */
7021 		if (PM_GET_PM_INFO(dip) == NULL)
7022 			(void) pm_start(dip);
7023 	} else {
7024 		/*
7025 		 * Attach may have got pm started before failing
7026 		 */
7027 		pm_stop(dip);
7028 	}
7029 
7030 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7031 		power_req.request_type = PMR_PPM_POST_ATTACH;
7032 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7033 		power_req.req.ppm_config_req.result = ret;
7034 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7035 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7036 		    DDI_CTLOPS_POWER, &power_req, &result);
7037 	}
7038 #ifdef DEBUG
7039 	else {
7040 		power_req.request_type = PMR_PPM_POST_ATTACH;
7041 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7042 		power_req.req.ppm_config_req.result = ret;
7043 		(void) pm_ctlops(NULL, cp->ppc_dip,
7044 		    DDI_CTLOPS_POWER, &power_req, &result);
7045 	}
7046 #endif
7047 }
7048 
7049 /*
7050  * Notify ppm of attach action.  Parent is already held at full power by
7051  * probe action.
7052  */
7053 void
7054 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7055 {
7056 	int result;
7057 	power_req_t power_req;
7058 
7059 	bzero(cp, sizeof (*cp));
7060 	cp->ppc_dip = dip;
7061 	cp->ppc_cmd = (int)cmd;
7062 
7063 	switch (cmd) {
7064 	case DDI_DETACH:
7065 		pm_detaching(dip);		/* suspend pm while detaching */
7066 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7067 			power_req.request_type = PMR_PPM_PRE_DETACH;
7068 			power_req.req.ppm_config_req.who = dip;
7069 			ASSERT(PPM(dip));
7070 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7071 			    &power_req, &result);
7072 			cp->ppc_ppm = PPM(dip);
7073 		} else {
7074 #ifdef DEBUG
7075 			/* pass to the default handler so we can debug things */
7076 			power_req.request_type = PMR_PPM_PRE_DETACH;
7077 			power_req.req.ppm_config_req.who = dip;
7078 			(void) pm_ctlops(NULL, dip,
7079 			    DDI_CTLOPS_POWER, &power_req, &result);
7080 #endif
7081 			cp->ppc_ppm = NULL;
7082 		}
7083 		break;
7084 
7085 	default:
7086 		break;
7087 	}
7088 }
7089 
7090 /*
7091  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7092  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7093  * make an entry to record the details, which includes certain flag settings.
7094  */
7095 static void
7096 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7097     int wasvolpmd, major_t major)
7098 {
7099 	PMD_FUNC(pmf, "record_invol_path")
7100 	major_t pm_path_to_major(char *);
7101 	size_t plen;
7102 	pm_noinvol_t *ip, *np, *pp;
7103 	pp = NULL;
7104 
7105 	plen = strlen(path) + 1;
7106 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7107 	np->ni_size = plen;
7108 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7109 	np->ni_noinvolpm = noinvolpm;
7110 	np->ni_volpmd = volpmd;
7111 	np->ni_wasvolpmd = wasvolpmd;
7112 	np->ni_flags = flags;
7113 	(void) strcpy(np->ni_path, path);
7114 	/*
7115 	 * If we haven't actually seen the node attached, it is hard to figure
7116 	 * out its major.  If we could hold the node by path, we would be much
7117 	 * happier here.
7118 	 */
7119 	if (major == (major_t)-1) {
7120 		np->ni_major = pm_path_to_major(path);
7121 	} else {
7122 		np->ni_major = major;
7123 	}
7124 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7125 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7126 		int comp = strcmp(path, ip->ni_path);
7127 		if (comp < 0) {
7128 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7129 			    pmf, path, ip->ni_path))
7130 			/* insert before current entry */
7131 			np->ni_next = ip;
7132 			if (pp) {
7133 				pp->ni_next = np;
7134 			} else {
7135 				pm_noinvol_head = np;
7136 			}
7137 			rw_exit(&pm_noinvol_rwlock);
7138 #ifdef DEBUG
7139 			if (pm_debug & PMD_NOINVOL)
7140 				pr_noinvol("record_invol_path exit0");
7141 #endif
7142 			return;
7143 		} else if (comp == 0) {
7144 			panic("%s already in pm_noinvol list", path);
7145 		}
7146 	}
7147 	/*
7148 	 * If we did not find an entry in the list that this should go before,
7149 	 * then it must go at the end
7150 	 */
7151 	if (pp) {
7152 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7153 		    pp->ni_path))
7154 		ASSERT(pp->ni_next == 0);
7155 		pp->ni_next = np;
7156 	} else {
7157 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7158 		ASSERT(!pm_noinvol_head);
7159 		pm_noinvol_head = np;
7160 	}
7161 	rw_exit(&pm_noinvol_rwlock);
7162 #ifdef DEBUG
7163 	if (pm_debug & PMD_NOINVOL)
7164 		pr_noinvol("record_invol_path exit");
7165 #endif
7166 }
7167 
7168 void
7169 pm_record_invol(dev_info_t *dip)
7170 {
7171 	char *pathbuf;
7172 	int pm_all_components_off(dev_info_t *);
7173 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7174 
7175 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7176 	(void) ddi_pathname(dip, pathbuf);
7177 
7178 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7179 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7180 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7181 
7182 	/*
7183 	 * If this child's detach will be holding up its ancestors, then we
7184 	 * allow for an exception to that if all children of this type have
7185 	 * gone down voluntarily.
7186 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7187 	 */
7188 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7189 	    dip);
7190 	kmem_free(pathbuf, MAXPATHLEN);
7191 }
7192 
7193 void
7194 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7195 {
7196 	dev_info_t *dip = cp->ppc_dip;
7197 	int result;
7198 	power_req_t power_req;
7199 
7200 	switch (cp->ppc_cmd) {
7201 	case DDI_DETACH:
7202 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7203 			power_req.request_type = PMR_PPM_POST_DETACH;
7204 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7205 			power_req.req.ppm_config_req.result = ret;
7206 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7207 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7208 			    DDI_CTLOPS_POWER, &power_req, &result);
7209 		}
7210 #ifdef DEBUG
7211 		else {
7212 			power_req.request_type = PMR_PPM_POST_DETACH;
7213 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7214 			power_req.req.ppm_config_req.result = ret;
7215 			(void) pm_ctlops(NULL, cp->ppc_dip,
7216 			    DDI_CTLOPS_POWER, &power_req, &result);
7217 		}
7218 #endif
7219 		if (ret == DDI_SUCCESS) {
7220 			/*
7221 			 * For hotplug detach we assume it is *really* gone
7222 			 */
7223 			if (cp->ppc_cmd == DDI_DETACH &&
7224 			    ((DEVI(dip)->devi_pm_flags &
7225 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7226 			    DEVI(dip)->devi_pm_noinvolpm))
7227 				pm_record_invol(dip);
7228 			DEVI(dip)->devi_pm_flags &=
7229 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7230 
7231 			/*
7232 			 * If console fb is detaching, then we don't need to
7233 			 * worry any more about it going off (pm_detaching has
7234 			 * brought up all components)
7235 			 */
7236 			if (PM_IS_CFB(dip)) {
7237 				mutex_enter(&pm_cfb_lock);
7238 				ASSERT(cfb_dip_detaching);
7239 				ASSERT(cfb_dip == NULL);
7240 				ASSERT(pm_cfb_comps_off == 0);
7241 				cfb_dip_detaching = NULL;
7242 				mutex_exit(&pm_cfb_lock);
7243 			}
7244 			pm_stop(dip);	/* make it permanent */
7245 		} else {
7246 			if (PM_IS_CFB(dip)) {
7247 				mutex_enter(&pm_cfb_lock);
7248 				ASSERT(cfb_dip_detaching);
7249 				ASSERT(cfb_dip == NULL);
7250 				ASSERT(pm_cfb_comps_off == 0);
7251 				cfb_dip = cfb_dip_detaching;
7252 				cfb_dip_detaching = NULL;
7253 				mutex_exit(&pm_cfb_lock);
7254 			}
7255 			pm_detach_failed(dip);	/* resume power management */
7256 		}
7257 		break;
7258 	case DDI_PM_SUSPEND:
7259 		break;
7260 	case DDI_SUSPEND:
7261 		break;				/* legal, but nothing to do */
7262 	default:
7263 #ifdef DEBUG
7264 		panic("pm_post_detach: unrecognized cmd %d for detach",
7265 		    cp->ppc_cmd);
7266 		/*NOTREACHED*/
7267 #else
7268 		break;
7269 #endif
7270 	}
7271 }
7272 
7273 /*
7274  * Called after vfs_mountroot has got the clock started to fix up timestamps
7275  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7276  * devices look busy but have a 0 busycnt
7277  */
7278 int
7279 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7280 {
7281 	_NOTE(ARGUNUSED(arg))
7282 
7283 	pm_info_t *info = PM_GET_PM_INFO(dip);
7284 	struct pm_component *cp;
7285 	int i;
7286 
7287 	if (!info)
7288 		return (DDI_WALK_CONTINUE);
7289 	PM_LOCK_BUSY(dip);
7290 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7291 		cp = PM_CP(dip, i);
7292 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7293 			cp->pmc_timestamp = gethrestime_sec();
7294 	}
7295 	PM_UNLOCK_BUSY(dip);
7296 	return (DDI_WALK_CONTINUE);
7297 }
7298 
7299 /*
7300  * Called at attach time to see if the device being attached has a record in
7301  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7302  * parents and set a flag in the dip
7303  */
7304 void
7305 pm_noinvol_specd(dev_info_t *dip)
7306 {
7307 	PMD_FUNC(pmf, "noinvol_specd")
7308 	char *pathbuf;
7309 	pm_noinvol_t *ip, *pp = NULL;
7310 	int wasvolpmd;
7311 	int found = 0;
7312 
7313 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7314 		return;
7315 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7316 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7317 	(void) ddi_pathname(dip, pathbuf);
7318 
7319 	PM_LOCK_DIP(dip);
7320 	DEVI(dip)->devi_pm_volpmd = 0;
7321 	DEVI(dip)->devi_pm_noinvolpm = 0;
7322 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7323 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7324 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7325 		    pmf, pathbuf, ip->ni_path))
7326 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7327 			found++;
7328 			break;
7329 		}
7330 	}
7331 	rw_exit(&pm_noinvol_rwlock);
7332 	if (!found) {
7333 		PM_UNLOCK_DIP(dip);
7334 		kmem_free(pathbuf, MAXPATHLEN);
7335 		return;
7336 	}
7337 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7338 	pp = NULL;
7339 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7340 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7341 		    pmf, pathbuf, ip->ni_path))
7342 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7343 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7344 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7345 			/*
7346 			 * Handle special case of console fb
7347 			 */
7348 			if (PM_IS_CFB(dip)) {
7349 				mutex_enter(&pm_cfb_lock);
7350 				cfb_dip = dip;
7351 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7352 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7353 				mutex_exit(&pm_cfb_lock);
7354 			}
7355 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7356 			ASSERT((DEVI(dip)->devi_pm_flags &
7357 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7358 			    DEVI(dip)->devi_pm_noinvolpm);
7359 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7360 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7361 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7362 			    ip->ni_noinvolpm, ip->ni_volpmd,
7363 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7364 			/*
7365 			 * free the entry in hopes the list will now be empty
7366 			 * and we won't have to search it any more until the
7367 			 * device detaches
7368 			 */
7369 			if (pp) {
7370 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7371 				    pmf, ip->ni_path, pp->ni_path))
7372 				pp->ni_next = ip->ni_next;
7373 			} else {
7374 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7375 				    pmf, ip->ni_path))
7376 				ASSERT(pm_noinvol_head == ip);
7377 				pm_noinvol_head = ip->ni_next;
7378 			}
7379 			PM_UNLOCK_DIP(dip);
7380 			wasvolpmd = ip->ni_wasvolpmd;
7381 			rw_exit(&pm_noinvol_rwlock);
7382 			kmem_free(ip->ni_path, ip->ni_size);
7383 			kmem_free(ip, sizeof (*ip));
7384 			/*
7385 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7386 			 * (and volpmd if appropriate)
7387 			 */
7388 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7389 			    wasvolpmd, pathbuf, dip);
7390 #ifdef DEBUG
7391 			if (pm_debug & PMD_NOINVOL)
7392 				pr_noinvol("noinvol_specd exit");
7393 #endif
7394 			kmem_free(pathbuf, MAXPATHLEN);
7395 			return;
7396 		}
7397 	}
7398 	kmem_free(pathbuf, MAXPATHLEN);
7399 	rw_exit(&pm_noinvol_rwlock);
7400 	PM_UNLOCK_DIP(dip);
7401 }
7402 
7403 int
7404 pm_all_components_off(dev_info_t *dip)
7405 {
7406 	int i;
7407 	pm_component_t *cp;
7408 
7409 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7410 		cp = PM_CP(dip, i);
7411 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7412 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7413 			return (0);
7414 	}
7415 	return (1);	/* all off */
7416 }
7417 
7418 /*
7419  * Make sure that all "no involuntary power cycles" devices are attached.
7420  * Called before doing a cpr suspend to make sure the driver has a say about
7421  * the power cycle
7422  */
7423 int
7424 pm_reattach_noinvol(void)
7425 {
7426 	PMD_FUNC(pmf, "reattach_noinvol")
7427 	pm_noinvol_t *ip;
7428 	char *path;
7429 	dev_info_t *dip;
7430 
7431 	/*
7432 	 * Prevent the modunload thread from unloading any modules until we
7433 	 * have completely stopped all kernel threads.
7434 	 */
7435 	modunload_disable();
7436 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7437 		/*
7438 		 * Forget we'v ever seen any entry
7439 		 */
7440 		ip->ni_persistent = 0;
7441 	}
7442 restart:
7443 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7444 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7445 #ifdef PMDDEBUG
7446 		major_t maj;
7447 		maj = ip->ni_major;
7448 #endif
7449 		path = ip->ni_path;
7450 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7451 			if (ip->ni_persistent) {
7452 				/*
7453 				 * If we weren't able to make this entry
7454 				 * go away, then we give up, as
7455 				 * holding/attaching the driver ought to have
7456 				 * resulted in this entry being deleted
7457 				 */
7458 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7459 				    "(%s|%d)\n", pmf, ip->ni_path,
7460 				    ddi_major_to_name(maj), (int)maj))
7461 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7462 				    ip->ni_path);
7463 				modunload_enable();
7464 				rw_exit(&pm_noinvol_rwlock);
7465 				return (0);
7466 			}
7467 			ip->ni_persistent++;
7468 			rw_exit(&pm_noinvol_rwlock);
7469 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7470 			dip = e_ddi_hold_devi_by_path(path, 0);
7471 			if (dip == NULL) {
7472 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7473 				    pmf, path, (int)maj))
7474 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7475 				    "driver", path);
7476 				modunload_enable();
7477 				return (0);
7478 			} else {
7479 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7480 				/*
7481 				 * Since the modunload thread is stopped, we
7482 				 * don't have to keep the driver held, which
7483 				 * saves a ton of bookkeeping
7484 				 */
7485 				ddi_release_devi(dip);
7486 				goto restart;
7487 			}
7488 		} else {
7489 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7490 			    pmf, ip->ni_path))
7491 			continue;
7492 		}
7493 	}
7494 	rw_exit(&pm_noinvol_rwlock);
7495 	return (1);
7496 }
7497 
7498 void
7499 pm_reattach_noinvol_fini(void)
7500 {
7501 	modunload_enable();
7502 }
7503 
7504 /*
7505  * Display pm support code
7506  */
7507 
7508 
7509 /*
7510  * console frame-buffer power-mgmt gets enabled when debugging
7511  * services are not present or console fbpm override is set
7512  */
7513 void
7514 pm_cfb_setup(const char *stdout_path)
7515 {
7516 	PMD_FUNC(pmf, "cfb_setup")
7517 	extern int obpdebug;
7518 	char *devname;
7519 	dev_info_t *dip;
7520 	int devname_len;
7521 	extern dev_info_t *fbdip;
7522 
7523 	/*
7524 	 * By virtue of this function being called (from consconfig),
7525 	 * we know stdout is a framebuffer.
7526 	 */
7527 	stdout_is_framebuffer = 1;
7528 
7529 	if (obpdebug || (boothowto & RB_DEBUG)) {
7530 		if (pm_cfb_override == 0) {
7531 			/*
7532 			 * Console is frame buffer, but we want to suppress
7533 			 * pm on it because of debugging setup
7534 			 */
7535 			pm_cfb_enabled = 0;
7536 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7537 			    "console power management.");
7538 			/*
7539 			 * however, we still need to know which is the console
7540 			 * fb in order to suppress pm on it
7541 			 */
7542 		} else {
7543 			cmn_err(CE_WARN, "Kernel debugger present: see "
7544 			    "kmdb(1M) for interaction with power management.");
7545 		}
7546 	}
7547 #ifdef DEBUG
7548 	/*
7549 	 * IF console is fb and is power managed, don't do prom_printfs from
7550 	 * pm debug macro
7551 	 */
7552 	if (pm_cfb_enabled) {
7553 		if (pm_debug)
7554 			prom_printf("pm debug output will be to log only\n");
7555 		pm_divertdebug++;
7556 	}
7557 #endif
7558 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7559 	devname_len = strlen(devname) + 1;
7560 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7561 	/* if the driver is attached */
7562 	if ((dip = fbdip) != NULL) {
7563 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7564 		    PM_DEVICE(dip)))
7565 		/*
7566 		 * We set up here as if the driver were power manageable in case
7567 		 * we get a later attach of a pm'able driver (which would result
7568 		 * in a panic later)
7569 		 */
7570 		cfb_dip = dip;
7571 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7572 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7573 		    PM_DEVICE(dip)))
7574 #ifdef DEBUG
7575 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7576 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7577 			    pmf, PM_DEVICE(dip)))
7578 		}
7579 #endif
7580 	} else {
7581 		char *ep;
7582 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7583 		pm_record_invol_path(devname,
7584 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7585 		    (major_t)-1);
7586 		for (ep = strrchr(devname, '/'); ep != devname;
7587 		    ep = strrchr(devname, '/')) {
7588 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7589 			*ep = '\0';
7590 			dip = pm_name_to_dip(devname, 0);
7591 			if (dip != NULL) {
7592 				/*
7593 				 * Walk up the tree incrementing
7594 				 * devi_pm_noinvolpm
7595 				 */
7596 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7597 				    0, 0, devname, dip);
7598 				break;
7599 			} else {
7600 				pm_record_invol_path(devname,
7601 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7602 			}
7603 		}
7604 	}
7605 	kmem_free(devname, devname_len);
7606 }
7607 
7608 void
7609 pm_cfb_rele(void)
7610 {
7611 	mutex_enter(&pm_cfb_lock);
7612 	/*
7613 	 * this call isn't using the console any  more, it is ok to take it
7614 	 * down if the count goes to 0
7615 	 */
7616 	cfb_inuse--;
7617 	mutex_exit(&pm_cfb_lock);
7618 }
7619 
7620 /*
7621  * software interrupt handler for fbpm; this function exists because we can't
7622  * bring up the frame buffer power from above lock level.  So if we need to,
7623  * we instead schedule a softint that runs this routine and takes us into
7624  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7625  */
7626 static uint_t
7627 pm_cfb_softint(caddr_t int_handler_arg)
7628 {
7629 	_NOTE(ARGUNUSED(int_handler_arg))
7630 	int rval = DDI_INTR_UNCLAIMED;
7631 
7632 	mutex_enter(&pm_cfb_lock);
7633 	if (pm_soft_pending) {
7634 		mutex_exit(&pm_cfb_lock);
7635 		debug_enter((char *)NULL);
7636 		/* acquired in debug_enter before calling pm_cfb_trigger */
7637 		pm_cfb_rele();
7638 		mutex_enter(&pm_cfb_lock);
7639 		pm_soft_pending = 0;
7640 		mutex_exit(&pm_cfb_lock);
7641 		rval = DDI_INTR_CLAIMED;
7642 	} else
7643 		mutex_exit(&pm_cfb_lock);
7644 
7645 	return (rval);
7646 }
7647 
7648 void
7649 pm_cfb_setup_intr(void)
7650 {
7651 	PMD_FUNC(pmf, "cfb_setup_intr")
7652 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7653 	void pm_cfb_check_and_powerup(void);
7654 
7655 	if (!stdout_is_framebuffer) {
7656 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7657 		return;
7658 	}
7659 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7660 #ifdef DEBUG
7661 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7662 #endif
7663 	/*
7664 	 * setup software interrupt handler
7665 	 */
7666 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7667 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7668 		panic("pm: unable to register soft intr.");
7669 
7670 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7671 }
7672 
7673 /*
7674  * Checks to see if it is safe to write to the console wrt power management
7675  * (i.e. if the console is a framebuffer, then it must be at full power)
7676  * returns 1 when power is off (power-up is needed)
7677  * returns 0 when power is on (power-up not needed)
7678  */
7679 int
7680 pm_cfb_check_and_hold(void)
7681 {
7682 	/*
7683 	 * cfb_dip is set iff console is a power manageable frame buffer
7684 	 * device
7685 	 */
7686 	extern int modrootloaded;
7687 
7688 	mutex_enter(&pm_cfb_lock);
7689 	cfb_inuse++;
7690 	ASSERT(cfb_inuse);	/* wrap? */
7691 	if (modrootloaded && cfb_dip) {
7692 		/*
7693 		 * don't power down the frame buffer, the prom is using it
7694 		 */
7695 		if (pm_cfb_comps_off) {
7696 			mutex_exit(&pm_cfb_lock);
7697 			return (1);
7698 		}
7699 	}
7700 	mutex_exit(&pm_cfb_lock);
7701 	return (0);
7702 }
7703 
7704 /*
7705  * turn on cfb power (which is known to be off).
7706  * Must be called below lock level!
7707  */
7708 void
7709 pm_cfb_powerup(void)
7710 {
7711 	pm_info_t *info;
7712 	int norm;
7713 	int ccount, ci;
7714 	int unused;
7715 #ifdef DEBUG
7716 	/*
7717 	 * Can't reenter prom_prekern, so suppress pm debug messages
7718 	 * (still go to circular buffer).
7719 	 */
7720 	mutex_enter(&pm_debug_lock);
7721 	pm_divertdebug++;
7722 	mutex_exit(&pm_debug_lock);
7723 #endif
7724 	info = PM_GET_PM_INFO(cfb_dip);
7725 	ASSERT(info);
7726 
7727 	ccount = PM_NUMCMPTS(cfb_dip);
7728 	for (ci = 0; ci < ccount; ci++) {
7729 		norm = pm_get_normal_power(cfb_dip, ci);
7730 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7731 		    PM_CANBLOCK_BYPASS, 0, &unused);
7732 	}
7733 #ifdef DEBUG
7734 	mutex_enter(&pm_debug_lock);
7735 	pm_divertdebug--;
7736 	mutex_exit(&pm_debug_lock);
7737 #endif
7738 }
7739 
7740 /*
7741  * Check if the console framebuffer is powered up.  If not power it up.
7742  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7743  * must be released by calling pm_cfb_rele when the console fb operation
7744  * is completed.
7745  */
7746 void
7747 pm_cfb_check_and_powerup(void)
7748 {
7749 	if (pm_cfb_check_and_hold())
7750 		pm_cfb_powerup();
7751 }
7752 
7753 /*
7754  * Trigger a low level interrupt to power up console frame buffer.
7755  */
7756 void
7757 pm_cfb_trigger(void)
7758 {
7759 	if (cfb_dip == NULL)
7760 		return;
7761 
7762 	mutex_enter(&pm_cfb_lock);
7763 	/*
7764 	 * If machine appears to be hung, pulling the keyboard connector of
7765 	 * the console will cause a high level interrupt and go to debug_enter.
7766 	 * But, if the fb is powered down, this routine will be called to bring
7767 	 * it up (by generating a softint to do the work).  If soft interrupts
7768 	 * are not running, and the keyboard connector is pulled again, the
7769 	 * following code detects this condition and calls panic which allows
7770 	 * the fb to be brought up from high level.
7771 	 *
7772 	 * If two nearly simultaneous calls to debug_enter occur (both from
7773 	 * high level) the code described above will cause a panic.
7774 	 */
7775 	if (lbolt <= pm_soft_pending) {
7776 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7777 		panic(panicstr);	/* does a power up at any intr level */
7778 		/* NOTREACHED */
7779 	}
7780 	pm_soft_pending = lbolt;
7781 	mutex_exit(&pm_cfb_lock);
7782 	ddi_trigger_softintr(pm_soft_id);
7783 }
7784 
7785 major_t
7786 pm_path_to_major(char *path)
7787 {
7788 	PMD_FUNC(pmf, "path_to_major")
7789 	char *np, *ap, *bp;
7790 	major_t ret;
7791 	size_t len;
7792 	static major_t i_path_to_major(char *, char *);
7793 
7794 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7795 
7796 	np = strrchr(path, '/');
7797 	if (np != NULL)
7798 		np++;
7799 	else
7800 		np = path;
7801 	len = strlen(np) + 1;
7802 	bp = kmem_alloc(len, KM_SLEEP);
7803 	(void) strcpy(bp, np);
7804 	if ((ap = strchr(bp, '@')) != NULL) {
7805 		*ap = '\0';
7806 	}
7807 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7808 	ret = i_path_to_major(path, np);
7809 	kmem_free(bp, len);
7810 	return (ret);
7811 }
7812 
7813 #ifdef DEBUG
7814 
7815 char *pm_msgp;
7816 char *pm_bufend;
7817 char *pm_msgbuf = NULL;
7818 int   pm_logpages = 2;
7819 
7820 #define	PMLOGPGS	pm_logpages
7821 
7822 /*PRINTFLIKE1*/
7823 void
7824 pm_log(const char *fmt, ...)
7825 {
7826 	va_list adx;
7827 	size_t size;
7828 
7829 	mutex_enter(&pm_debug_lock);
7830 	if (pm_msgbuf == NULL) {
7831 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7832 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7833 		pm_msgp = pm_msgbuf;
7834 	}
7835 	va_start(adx, fmt);
7836 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7837 	va_end(adx);
7838 	va_start(adx, fmt);
7839 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7840 		bzero(pm_msgp, pm_bufend - pm_msgp);
7841 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7842 		if (!pm_divertdebug)
7843 			prom_printf("%s", pm_msgp);
7844 		pm_msgp = pm_msgbuf + size;
7845 	} else {
7846 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7847 		if (!pm_divertdebug)
7848 			prom_printf("%s", pm_msgp);
7849 		pm_msgp += size;
7850 	}
7851 	va_end(adx);
7852 	mutex_exit(&pm_debug_lock);
7853 }
7854 #endif	/* DEBUG */
7855 
7856 /*
7857  * We want to save the state of any directly pm'd devices over the suspend/
7858  * resume process so that we can put them back the way the controlling
7859  * process left them.
7860  */
7861 void
7862 pm_save_direct_levels(void)
7863 {
7864 	pm_processes_stopped = 1;
7865 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7866 }
7867 
7868 static int
7869 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7870 {
7871 	_NOTE(ARGUNUSED(arg))
7872 	int i;
7873 	int *ip;
7874 	pm_info_t *info = PM_GET_PM_INFO(dip);
7875 
7876 	if (!info)
7877 		return (DDI_WALK_CONTINUE);
7878 
7879 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7880 		if (PM_NUMCMPTS(dip) > 2) {
7881 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7882 			    sizeof (int), KM_SLEEP);
7883 			ip = info->pmi_lp;
7884 		} else {
7885 			ip = info->pmi_levels;
7886 		}
7887 		/* autopm and processes are stopped, ok not to lock power */
7888 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7889 			*ip++ = PM_CURPOWER(dip, i);
7890 		/*
7891 		 * There is a small window between stopping the
7892 		 * processes and setting pm_processes_stopped where
7893 		 * a driver could get hung up in a pm_raise_power()
7894 		 * call.  Free any such driver now.
7895 		 */
7896 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7897 	}
7898 
7899 	return (DDI_WALK_CONTINUE);
7900 }
7901 
7902 void
7903 pm_restore_direct_levels(void)
7904 {
7905 	/*
7906 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
7907 	 * threads failed) then we don't want to try to restore them
7908 	 */
7909 	if (!pm_processes_stopped)
7910 		return;
7911 
7912 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
7913 	pm_processes_stopped = 0;
7914 }
7915 
7916 static int
7917 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
7918 {
7919 	_NOTE(ARGUNUSED(arg))
7920 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
7921 	int i, nc, result;
7922 	int *ip;
7923 
7924 	pm_info_t *info = PM_GET_PM_INFO(dip);
7925 	if (!info)
7926 		return (DDI_WALK_CONTINUE);
7927 
7928 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7929 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
7930 			ip = &info->pmi_lp[nc - 1];
7931 		} else {
7932 			ip = &info->pmi_levels[nc - 1];
7933 		}
7934 		/*
7935 		 * Because fb drivers fail attempts to turn off the
7936 		 * fb when the monitor is on, but treat a request to
7937 		 * turn on the monitor as a request to turn on the
7938 		 * fb too, we process components in descending order
7939 		 * Because autopm is disabled and processes aren't
7940 		 * running, it is ok to examine current power outside
7941 		 * of the power lock
7942 		 */
7943 		for (i = nc - 1; i >= 0; i--, ip--) {
7944 			if (PM_CURPOWER(dip, i) == *ip)
7945 				continue;
7946 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
7947 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
7948 				cmn_err(CE_WARN, "cpr: unable "
7949 				    "to restore power level of "
7950 				    "component %d of directly "
7951 				    "power manged device %s@%s"
7952 				    " to %d",
7953 				    i, PM_NAME(dip),
7954 				    PM_ADDR(dip), *ip);
7955 				PMD(PMD_FAIL, ("%s: failed to restore "
7956 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
7957 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
7958 				    PM_CURPOWER(dip, i), *ip, result))
7959 			}
7960 		}
7961 		if (nc > 2) {
7962 			kmem_free(info->pmi_lp, nc * sizeof (int));
7963 			info->pmi_lp = NULL;
7964 		}
7965 	}
7966 	return (DDI_WALK_CONTINUE);
7967 }
7968 
7969 /*
7970  * Stolen from the bootdev module
7971  * attempt to convert a path to a major number
7972  */
7973 static major_t
7974 i_path_to_major(char *path, char *leaf_name)
7975 {
7976 	extern major_t path_to_major(char *pathname);
7977 	major_t maj;
7978 
7979 	if ((maj = path_to_major(path)) == (major_t)-1) {
7980 		maj = ddi_name_to_major(leaf_name);
7981 	}
7982 
7983 	return (maj);
7984 }
7985 
7986 /*
7987  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
7988  * An entry in the list means that the device is detached, so we need to
7989  * adjust its ancestors as if they had just seen this attach, and any detached
7990  * ancestors need to have their list entries adjusted.
7991  */
7992 void
7993 pm_driver_removed(major_t major)
7994 {
7995 	static void i_pm_driver_removed(major_t major);
7996 
7997 	/*
7998 	 * Serialize removal of drivers. This is to keep ancestors of
7999 	 * a node that is being deleted from getting deleted and added back
8000 	 * with different counters.
8001 	 */
8002 	mutex_enter(&pm_remdrv_lock);
8003 	i_pm_driver_removed(major);
8004 	mutex_exit(&pm_remdrv_lock);
8005 }
8006 
8007 /*
8008  * This routine is called recursively by pm_noinvol_process_ancestors()
8009  */
8010 static void
8011 i_pm_driver_removed(major_t major)
8012 {
8013 	PMD_FUNC(pmf, "driver_removed")
8014 	static void adjust_ancestors(char *, int);
8015 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8016 	static void pm_noinvol_process_ancestors(char *);
8017 	pm_noinvol_t *ip, *pp = NULL;
8018 	int wasvolpmd;
8019 	ASSERT(major != (major_t)-1);
8020 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8021 again:
8022 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8023 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8024 		if (major != ip->ni_major)
8025 			continue;
8026 		/*
8027 		 * If it is an ancestor of no-invol node, which is
8028 		 * not removed, skip it. This is to cover the case of
8029 		 * ancestor removed without removing its descendants.
8030 		 */
8031 		if (pm_is_noinvol_ancestor(ip)) {
8032 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8033 			continue;
8034 		}
8035 		wasvolpmd = ip->ni_wasvolpmd;
8036 		/*
8037 		 * remove the entry from the list
8038 		 */
8039 		if (pp) {
8040 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8041 			    pmf, ip->ni_path, pp->ni_path))
8042 			pp->ni_next = ip->ni_next;
8043 		} else {
8044 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8045 			    ip->ni_path))
8046 			ASSERT(pm_noinvol_head == ip);
8047 			pm_noinvol_head = ip->ni_next;
8048 		}
8049 		rw_exit(&pm_noinvol_rwlock);
8050 		adjust_ancestors(ip->ni_path, wasvolpmd);
8051 		/*
8052 		 * Had an ancestor been removed before this node, it would have
8053 		 * been skipped. Adjust the no-invol counters for such skipped
8054 		 * ancestors.
8055 		 */
8056 		pm_noinvol_process_ancestors(ip->ni_path);
8057 		kmem_free(ip->ni_path, ip->ni_size);
8058 		kmem_free(ip, sizeof (*ip));
8059 		goto again;
8060 	}
8061 	rw_exit(&pm_noinvol_rwlock);
8062 }
8063 
8064 /*
8065  * returns 1, if *aip is a ancestor of a no-invol node
8066  *	   0, otherwise
8067  */
8068 static int
8069 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8070 {
8071 	pm_noinvol_t *ip;
8072 
8073 	ASSERT(strlen(aip->ni_path) != 0);
8074 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8075 		if (ip == aip)
8076 			continue;
8077 		/*
8078 		 * To be an ancestor, the path must be an initial substring of
8079 		 * the descendent, and end just before a '/' in the
8080 		 * descendent's path.
8081 		 */
8082 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8083 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8084 			return (1);
8085 	}
8086 	return (0);
8087 }
8088 
8089 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8090 /*
8091  * scan through the pm_noinvolpm list adjusting ancestors of the current
8092  * node;  Modifies string *path.
8093  */
8094 static void
8095 adjust_ancestors(char *path, int wasvolpmd)
8096 {
8097 	PMD_FUNC(pmf, "adjust_ancestors")
8098 	char *cp;
8099 	pm_noinvol_t *lp;
8100 	pm_noinvol_t *pp = NULL;
8101 	major_t locked = (major_t)UINT_MAX;
8102 	dev_info_t *dip;
8103 	char	*pathbuf;
8104 	size_t pathbuflen = strlen(path) + 1;
8105 
8106 	/*
8107 	 * First we look up the ancestor's dip.  If we find it, then we
8108 	 * adjust counts up the tree
8109 	 */
8110 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8111 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8112 	(void) strcpy(pathbuf, path);
8113 	cp = strrchr(pathbuf, '/');
8114 	if (cp == NULL)	{
8115 		/* if no ancestors, then nothing to do */
8116 		kmem_free(pathbuf, pathbuflen);
8117 		return;
8118 	}
8119 	*cp = '\0';
8120 	dip = pm_name_to_dip(pathbuf, 1);
8121 	if (dip != NULL) {
8122 		locked = PM_MAJOR(dip);
8123 
8124 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8125 		    path, dip);
8126 
8127 		if (locked != (major_t)UINT_MAX)
8128 			ddi_release_devi(dip);
8129 	} else {
8130 		char *apath;
8131 		size_t len = strlen(pathbuf) + 1;
8132 		int  lock_held = 1;
8133 
8134 		/*
8135 		 * Now check for ancestors that exist only in the list
8136 		 */
8137 		apath = kmem_alloc(len, KM_SLEEP);
8138 		(void) strcpy(apath, pathbuf);
8139 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8140 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8141 			/*
8142 			 * This can only happen once.  Since we have to drop
8143 			 * the lock, we need to extract the relevant info.
8144 			 */
8145 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8146 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8147 				    lp->ni_path, lp->ni_noinvolpm,
8148 				    lp->ni_noinvolpm - 1))
8149 				lp->ni_noinvolpm--;
8150 				if (wasvolpmd && lp->ni_volpmd) {
8151 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8152 					    "%d\n", pmf, lp->ni_path,
8153 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8154 					lp->ni_volpmd--;
8155 				}
8156 				/*
8157 				 * remove the entry from the list, if there
8158 				 * are no more no-invol descendants and node
8159 				 * itself is not a no-invol node.
8160 				 */
8161 				if (!(lp->ni_noinvolpm ||
8162 				    (lp->ni_flags & PMC_NO_INVOL))) {
8163 					ASSERT(lp->ni_volpmd == 0);
8164 					if (pp) {
8165 						PMD(PMD_NOINVOL, ("%s: freeing "
8166 						    "%s, prev is %s\n", pmf,
8167 						    lp->ni_path, pp->ni_path))
8168 						pp->ni_next = lp->ni_next;
8169 					} else {
8170 						PMD(PMD_NOINVOL, ("%s: free %s "
8171 						    "head\n", pmf, lp->ni_path))
8172 						ASSERT(pm_noinvol_head == lp);
8173 						pm_noinvol_head = lp->ni_next;
8174 					}
8175 					lock_held = 0;
8176 					rw_exit(&pm_noinvol_rwlock);
8177 					adjust_ancestors(apath, wasvolpmd);
8178 					/* restore apath */
8179 					(void) strcpy(apath, pathbuf);
8180 					kmem_free(lp->ni_path, lp->ni_size);
8181 					kmem_free(lp, sizeof (*lp));
8182 				}
8183 				break;
8184 			}
8185 		}
8186 		if (lock_held)
8187 			rw_exit(&pm_noinvol_rwlock);
8188 		adjust_ancestors(apath, wasvolpmd);
8189 		kmem_free(apath, len);
8190 	}
8191 	kmem_free(pathbuf, pathbuflen);
8192 }
8193 
8194 /*
8195  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8196  * which were skipped even though their drivers were removed.
8197  */
8198 static void
8199 pm_noinvol_process_ancestors(char *path)
8200 {
8201 	pm_noinvol_t *lp;
8202 
8203 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8204 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8205 		if (strstr(path, lp->ni_path) &&
8206 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8207 			rw_exit(&pm_noinvol_rwlock);
8208 			i_pm_driver_removed(lp->ni_major);
8209 			return;
8210 		}
8211 	}
8212 	rw_exit(&pm_noinvol_rwlock);
8213 }
8214 
8215 /*
8216  * Returns true if (detached) device needs to be kept up because it exported the
8217  * "no-involuntary-power-cycles" property or we're pretending it did (console
8218  * fb case) or it is an ancestor of such a device and has used up the "one
8219  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8220  * upon detach.  In any event, we need an exact hit on the path or we return
8221  * false.
8222  */
8223 int
8224 pm_noinvol_detached(char *path)
8225 {
8226 	PMD_FUNC(pmf, "noinvol_detached")
8227 	pm_noinvol_t *ip;
8228 	int ret = 0;
8229 
8230 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8231 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8232 		if (strcmp(path, ip->ni_path) == 0) {
8233 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8234 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8235 				    "%s\n", pmf, path))
8236 				ret = 1;
8237 				break;
8238 			}
8239 #ifdef	DEBUG
8240 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8241 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8242 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8243 				    path))
8244 #endif
8245 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8246 			break;
8247 		}
8248 	}
8249 	rw_exit(&pm_noinvol_rwlock);
8250 	return (ret);
8251 }
8252 
8253 int
8254 pm_is_cfb(dev_info_t *dip)
8255 {
8256 	return (dip == cfb_dip);
8257 }
8258 
8259 #ifdef	DEBUG
8260 /*
8261  * Return true if all components of the console frame buffer are at
8262  * "normal" power, i.e., fully on.  For the case where the console is not
8263  * a framebuffer, we also return true
8264  */
8265 int
8266 pm_cfb_is_up(void)
8267 {
8268 	return (pm_cfb_comps_off == 0);
8269 }
8270 #endif
8271 
8272 /*
8273  * Preventing scan from powering down the node by incrementing the
8274  * kidsupcnt.
8275  */
8276 void
8277 pm_hold_power(dev_info_t *dip)
8278 {
8279 	e_pm_hold_rele_power(dip, 1);
8280 }
8281 
8282 /*
8283  * Releasing the hold by decrementing the kidsupcnt allowing scan
8284  * to power down the node if all conditions are met.
8285  */
8286 void
8287 pm_rele_power(dev_info_t *dip)
8288 {
8289 	e_pm_hold_rele_power(dip, -1);
8290 }
8291 
8292 /*
8293  * A wrapper of pm_all_to_normal() to power up a dip
8294  * to its normal level
8295  */
8296 int
8297 pm_powerup(dev_info_t *dip)
8298 {
8299 	PMD_FUNC(pmf, "pm_powerup")
8300 
8301 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8302 	ASSERT(!(servicing_interrupt()));
8303 
8304 	/*
8305 	 * in case this node is not already participating pm
8306 	 */
8307 	if (!PM_GET_PM_INFO(dip)) {
8308 		if (!DEVI_IS_ATTACHING(dip))
8309 			return (DDI_SUCCESS);
8310 		if (pm_start(dip) != DDI_SUCCESS)
8311 			return (DDI_FAILURE);
8312 		if (!PM_GET_PM_INFO(dip))
8313 			return (DDI_SUCCESS);
8314 	}
8315 
8316 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8317 }
8318 
8319 int
8320 pm_rescan_walk(dev_info_t *dip, void *arg)
8321 {
8322 	_NOTE(ARGUNUSED(arg))
8323 
8324 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8325 		return (DDI_WALK_CONTINUE);
8326 
8327 	/*
8328 	 * Currently pm_cpr_callb/resume code is the only caller
8329 	 * and it needs to make sure that stopped scan get
8330 	 * reactivated. Otherwise, rescan walk needn't reactive
8331 	 * stopped scan.
8332 	 */
8333 	pm_scan_init(dip);
8334 
8335 	(void) pm_rescan(dip);
8336 	return (DDI_WALK_CONTINUE);
8337 }
8338 
8339 static dev_info_t *
8340 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8341 {
8342 	dev_info_t *wdip, *pdip;
8343 
8344 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8345 		pdip = ddi_get_parent(wdip);
8346 		if (pdip == dip)
8347 			return (wdip);
8348 	}
8349 	return (NULL);
8350 }
8351 
8352 int
8353 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8354     void *arg, void *result)
8355 {
8356 	PMD_FUNC(pmf, "bp_bus_power")
8357 	dev_info_t	*cdip;
8358 	pm_info_t	*cinfo;
8359 	pm_bp_child_pwrchg_t	*bpc;
8360 	pm_sp_misc_t		*pspm;
8361 	pm_bp_nexus_pwrup_t *bpn;
8362 	pm_bp_child_pwrchg_t new_bpc;
8363 	pm_bp_noinvol_t *bpi;
8364 	dev_info_t *tdip;
8365 	char *pathbuf;
8366 	int		ret = DDI_SUCCESS;
8367 	int		errno = 0;
8368 	pm_component_t *cp;
8369 
8370 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8371 	    pm_decode_op(op)))
8372 	switch (op) {
8373 	case BUS_POWER_CHILD_PWRCHG:
8374 		bpc = (pm_bp_child_pwrchg_t *)arg;
8375 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8376 		tdip = bpc->bpc_dip;
8377 		cdip = pm_get_next_descendent(dip, tdip);
8378 		cinfo = PM_GET_PM_INFO(cdip);
8379 		if (cdip != tdip) {
8380 			/*
8381 			 * If the node is an involved parent, it needs to
8382 			 * power up the node as it is needed.  There is nothing
8383 			 * else the framework can do here.
8384 			 */
8385 			if (PM_WANTS_NOTIFICATION(cdip)) {
8386 				PMD(PMD_SET, ("%s: call bus_power for "
8387 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8388 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8389 				    impl_arg, op, arg, result));
8390 			}
8391 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8392 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8393 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8394 			/*
8395 			 * we presume that the parent needs to be up in
8396 			 * order for the child to change state (either
8397 			 * because it must already be on if the child is on
8398 			 * (and the pm_all_to_normal_nexus() will be a nop)
8399 			 * or because it will need to be on for the child
8400 			 * to come on; so we make the call regardless
8401 			 */
8402 			pm_hold_power(cdip);
8403 			if (cinfo) {
8404 				pm_canblock_t canblock = pspm->pspm_canblock;
8405 				ret = pm_all_to_normal_nexus(cdip, canblock);
8406 				if (ret != DDI_SUCCESS) {
8407 					pm_rele_power(cdip);
8408 					return (ret);
8409 				}
8410 			}
8411 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8412 			    PM_DEVICE(cdip)))
8413 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8414 			    result);
8415 			pm_rele_power(cdip);
8416 		} else {
8417 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8418 			    result);
8419 		}
8420 		return (ret);
8421 
8422 	case BUS_POWER_NEXUS_PWRUP:
8423 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8424 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8425 
8426 		if (!e_pm_valid_info(dip, NULL) ||
8427 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8428 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8429 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8430 			    pmf, PM_DEVICE(dip)))
8431 			*pspm->pspm_errnop = EIO;
8432 			*(int *)result = DDI_FAILURE;
8433 			return (DDI_FAILURE);
8434 		}
8435 
8436 		ASSERT(bpn->bpn_dip == dip);
8437 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8438 		    PM_DEVICE(dip)))
8439 		new_bpc.bpc_dip = dip;
8440 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8441 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8442 		new_bpc.bpc_comp = bpn->bpn_comp;
8443 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8444 		new_bpc.bpc_nlevel = bpn->bpn_level;
8445 		new_bpc.bpc_private = bpn->bpn_private;
8446 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8447 		    PM_LEVEL_UPONLY;
8448 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8449 		    &errno;
8450 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8451 		    (void *)&new_bpc, result);
8452 		kmem_free(pathbuf, MAXPATHLEN);
8453 		return (ret);
8454 
8455 	case BUS_POWER_NOINVOL:
8456 		bpi = (pm_bp_noinvol_t *)arg;
8457 		tdip = bpi->bpni_dip;
8458 		cdip = pm_get_next_descendent(dip, tdip);
8459 
8460 		/* In case of rem_drv, the leaf node has been removed */
8461 		if (cdip == NULL)
8462 			return (DDI_SUCCESS);
8463 
8464 		cinfo = PM_GET_PM_INFO(cdip);
8465 		if (cdip != tdip) {
8466 			if (PM_WANTS_NOTIFICATION(cdip)) {
8467 				PMD(PMD_NOINVOL,
8468 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8469 				    pmf, PM_DEVICE(cdip)))
8470 				ret = (*PM_BUS_POWER_FUNC(cdip))
8471 				    (cdip, NULL, op, arg, result);
8472 				if ((cinfo) && (ret == DDI_SUCCESS))
8473 					(void) pm_noinvol_update_node(cdip,
8474 					    bpi);
8475 				return (ret);
8476 			} else {
8477 				PMD(PMD_NOINVOL,
8478 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8479 				    PM_DEVICE(cdip)))
8480 				ret = pm_busop_bus_power(cdip, NULL, op,
8481 				    arg, result);
8482 				/*
8483 				 * Update the current node.
8484 				 */
8485 				if ((cinfo) && (ret == DDI_SUCCESS))
8486 					(void) pm_noinvol_update_node(cdip,
8487 					    bpi);
8488 				return (ret);
8489 			}
8490 		} else {
8491 			/*
8492 			 * For attach, detach, power up:
8493 			 * Do nothing for leaf node since its
8494 			 * counts are already updated.
8495 			 * For CFB and driver removal, since the
8496 			 * path and the target dip passed in is up to and incl.
8497 			 * the immediate ancestor, need to do the update.
8498 			 */
8499 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8500 			    "reached\n", pmf, PM_DEVICE(cdip)))
8501 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8502 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8503 				(void) pm_noinvol_update_node(cdip, bpi);
8504 			return (DDI_SUCCESS);
8505 		}
8506 
8507 	default:
8508 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8509 		return (DDI_FAILURE);
8510 	}
8511 }
8512 
8513 static int
8514 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8515     void *arg, void *resultp)
8516 {
8517 	_NOTE(ARGUNUSED(impl_arg))
8518 	PMD_FUNC(pmf, "bp_set_power")
8519 	pm_ppm_devlist_t *devl;
8520 	int clevel, circ;
8521 #ifdef	DEBUG
8522 	int circ_db, ccirc_db;
8523 #endif
8524 	int ret = DDI_SUCCESS;
8525 	dev_info_t *cdip;
8526 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8527 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8528 	pm_canblock_t canblock = pspm->pspm_canblock;
8529 	int scan = pspm->pspm_scan;
8530 	int comp = bpc->bpc_comp;
8531 	int olevel = bpc->bpc_olevel;
8532 	int nlevel = bpc->bpc_nlevel;
8533 	int comps_off_incr = 0;
8534 	dev_info_t *pdip = ddi_get_parent(dip);
8535 	int dodeps;
8536 	int direction = pspm->pspm_direction;
8537 	int *errnop = pspm->pspm_errnop;
8538 #ifdef PMDDEBUG
8539 	char *dir = pm_decode_direction(direction);
8540 #endif
8541 	int *iresp = (int *)resultp;
8542 	time_t	idletime, thresh;
8543 	pm_component_t *cp = PM_CP(dip, comp);
8544 	int work_type;
8545 
8546 	*iresp = DDI_SUCCESS;
8547 	*errnop = 0;
8548 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8549 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8550 	    pm_decode_op(op)))
8551 
8552 	/*
8553 	 * The following set of conditions indicate we are here to handle a
8554 	 * driver's pm_[raise|lower]_power request, but the device is being
8555 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8556 	 * we want to pm_block and pass a status back to the caller based
8557 	 * on whether the controlling process's next activity on the device
8558 	 * matches the current request or not.  This distinction tells
8559 	 * downstream functions to avoid calling into a driver or changing
8560 	 * the framework's power state.  To actually block, we need:
8561 	 *
8562 	 * PM_ISDIRECT(dip)
8563 	 *	no reason to block unless a process is directly controlling dev
8564 	 * direction != PM_LEVEL_EXACT
8565 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8566 	 * !pm_processes_stopped
8567 	 *	don't block if controlling proc already be stopped for cpr
8568 	 * canblock != PM_CANBLOCK_BYPASS
8569 	 *	our caller must not have explicitly prevented blocking
8570 	 */
8571 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8572 		PM_LOCK_DIP(dip);
8573 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8574 			/* releases dip lock */
8575 			ret = pm_busop_match_request(dip, bpc);
8576 			if (ret == EAGAIN) {
8577 				PM_LOCK_DIP(dip);
8578 				continue;
8579 			}
8580 			return (*iresp = ret);
8581 		}
8582 		PM_UNLOCK_DIP(dip);
8583 	}
8584 	/* BC device is never scanned, so power will stick until we are done */
8585 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8586 	    direction != PM_LEVEL_DOWNONLY) {
8587 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8588 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8589 		    canblock, 0, resultp) != DDI_SUCCESS) {
8590 			/* *resultp set by pm_set_power */
8591 			return (DDI_FAILURE);
8592 		}
8593 	}
8594 	if (PM_WANTS_NOTIFICATION(pdip)) {
8595 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8596 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8597 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8598 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8599 		if (ret != DDI_SUCCESS) {
8600 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8601 			    pmf, PM_DEVICE(pdip)))
8602 			return (DDI_FAILURE);
8603 		}
8604 	} else {
8605 		/*
8606 		 * Since we don't know what the actual power level is,
8607 		 * we place a power hold on the parent no matter what
8608 		 * component and level is changing.
8609 		 */
8610 		pm_hold_power(pdip);
8611 	}
8612 	PM_LOCK_POWER(dip, &circ);
8613 	clevel = PM_CURPOWER(dip, comp);
8614 	/*
8615 	 * It's possible that a call was made to pm_update_maxpower()
8616 	 * on another thread before we took the lock above. So, we need to
8617 	 * make sure that this request isn't processed after the
8618 	 * change of power executed on behalf of pm_update_maxpower().
8619 	 */
8620 	if (nlevel > pm_get_normal_power(dip, comp)) {
8621 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8622 		    pmf))
8623 		ret = DDI_FAILURE;
8624 		*iresp = DDI_FAILURE;
8625 		goto post_notify;
8626 	}
8627 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8628 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8629 	    clevel, dir))
8630 	switch (direction) {
8631 	case PM_LEVEL_UPONLY:
8632 		/* Powering up */
8633 		if (clevel >= nlevel) {
8634 			PMD(PMD_SET, ("%s: current level is already "
8635 			    "at or above the requested level.\n", pmf))
8636 			*iresp = DDI_SUCCESS;
8637 			ret = DDI_SUCCESS;
8638 			goto post_notify;
8639 		}
8640 		break;
8641 	case PM_LEVEL_EXACT:
8642 		/* specific level request */
8643 		if (clevel == nlevel && !PM_ISBC(dip)) {
8644 			PMD(PMD_SET, ("%s: current level is already "
8645 			    "at the requested level.\n", pmf))
8646 			*iresp = DDI_SUCCESS;
8647 			ret = DDI_SUCCESS;
8648 			goto post_notify;
8649 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8650 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8651 			if (!pm_cfb_enabled) {
8652 				PMD(PMD_ERROR | PMD_CFB,
8653 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8654 				*errnop = EINVAL;
8655 				*iresp = DDI_FAILURE;
8656 				ret = DDI_FAILURE;
8657 				goto post_notify;
8658 			}
8659 			mutex_enter(&pm_cfb_lock);
8660 			while (cfb_inuse) {
8661 				mutex_exit(&pm_cfb_lock);
8662 				if (delay_sig(1) == EINTR) {
8663 					ret = DDI_FAILURE;
8664 					*iresp = DDI_FAILURE;
8665 					*errnop = EINTR;
8666 					goto post_notify;
8667 				}
8668 				mutex_enter(&pm_cfb_lock);
8669 			}
8670 			mutex_exit(&pm_cfb_lock);
8671 		}
8672 		break;
8673 	case PM_LEVEL_DOWNONLY:
8674 		/* Powering down */
8675 		thresh = cur_threshold(dip, comp);
8676 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8677 		if (scan && ((PM_KUC(dip) != 0) ||
8678 		    (cp->pmc_busycount > 0) ||
8679 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8680 #ifdef	DEBUG
8681 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8682 				PMD(PMD_SET, ("%s: scan failed: "
8683 				    "kidsupcnt != 0\n", pmf))
8684 			if (cp->pmc_busycount > 0)
8685 				PMD(PMD_SET, ("%s: scan failed: "
8686 				    "device become busy\n", pmf))
8687 			if (idletime < thresh)
8688 				PMD(PMD_SET, ("%s: scan failed: device "
8689 				    "hasn't been idle long enough\n", pmf))
8690 #endif
8691 			*iresp = DDI_FAILURE;
8692 			*errnop = EBUSY;
8693 			ret = DDI_FAILURE;
8694 			goto post_notify;
8695 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8696 			PMD(PMD_SET, ("%s: current level is already at "
8697 			    "or below the requested level.\n", pmf))
8698 			*iresp = DDI_SUCCESS;
8699 			ret = DDI_SUCCESS;
8700 			goto post_notify;
8701 		}
8702 		break;
8703 	}
8704 
8705 	if (PM_IS_CFB(dip) && (comps_off_incr =
8706 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8707 		/*
8708 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8709 		 * component from full power.  Remember that we tried to
8710 		 * lower power in case it fails and we need to back out
8711 		 * the adjustment.
8712 		 */
8713 		update_comps_off(comps_off_incr, dip);
8714 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8715 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8716 		    pm_cfb_comps_off))
8717 	}
8718 
8719 	if ((*iresp = power_dev(dip,
8720 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8721 #ifdef DEBUG
8722 		/*
8723 		 * All descendents of this node should already be powered off.
8724 		 */
8725 		if (PM_CURPOWER(dip, comp) == 0) {
8726 			pm_desc_pwrchk_t pdpchk;
8727 			pdpchk.pdpc_dip = dip;
8728 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8729 			ndi_devi_enter(dip, &circ_db);
8730 			for (cdip = ddi_get_child(dip); cdip != NULL;
8731 			    cdip = ddi_get_next_sibling(cdip)) {
8732 				ndi_devi_enter(cdip, &ccirc_db);
8733 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8734 				    (void *)&pdpchk);
8735 				ndi_devi_exit(cdip, ccirc_db);
8736 			}
8737 			ndi_devi_exit(dip, circ_db);
8738 		}
8739 #endif
8740 		/*
8741 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8742 		 * back up to full power.
8743 		 */
8744 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8745 			update_comps_off(comps_off_incr, dip);
8746 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8747 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8748 			    comp, clevel, nlevel, pm_cfb_comps_off))
8749 		}
8750 		dodeps = 0;
8751 		if (POWERING_OFF(clevel, nlevel)) {
8752 			if (PM_ISBC(dip)) {
8753 				dodeps = (comp == 0);
8754 			} else {
8755 				int i;
8756 				dodeps = 1;
8757 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8758 					/* if some component still on */
8759 					if (PM_CURPOWER(dip, i)) {
8760 						dodeps = 0;
8761 						break;
8762 					}
8763 				}
8764 			}
8765 			if (dodeps)
8766 				work_type = PM_DEP_WK_POWER_OFF;
8767 		} else if (POWERING_ON(clevel, nlevel)) {
8768 			if (PM_ISBC(dip)) {
8769 				dodeps = (comp == 0);
8770 			} else {
8771 				int i;
8772 				dodeps = 1;
8773 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8774 					if (i == comp)
8775 						continue;
8776 					if (PM_CURPOWER(dip, i) > 0) {
8777 						dodeps = 0;
8778 						break;
8779 					}
8780 				}
8781 			}
8782 			if (dodeps)
8783 				work_type = PM_DEP_WK_POWER_ON;
8784 		}
8785 
8786 		if (dodeps) {
8787 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8788 
8789 			(void) ddi_pathname(dip, pathbuf);
8790 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8791 			    PM_DEP_NOWAIT, NULL, 0);
8792 			kmem_free(pathbuf, MAXPATHLEN);
8793 		}
8794 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8795 			int old;
8796 
8797 			/* If old power cached during deadlock, use it. */
8798 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8799 			    cp->pmc_phc_pwr : olevel);
8800 			mutex_enter(&pm_rsvp_lock);
8801 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8802 			    old, canblock);
8803 			pm_enqueue_notify_others(&devl, canblock);
8804 			mutex_exit(&pm_rsvp_lock);
8805 		}
8806 
8807 		/*
8808 		 * If we are coming from a scan, don't do it again,
8809 		 * else we can have infinite loops.
8810 		 */
8811 		if (!scan)
8812 			pm_rescan(dip);
8813 	} else {
8814 		/* if we incremented pm_comps_off_count, but failed */
8815 		if (comps_off_incr > 0) {
8816 			update_comps_off(-comps_off_incr, dip);
8817 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8818 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8819 			    comp, clevel, nlevel, pm_cfb_comps_off))
8820 		}
8821 		*errnop = EIO;
8822 	}
8823 
8824 post_notify:
8825 	/*
8826 	 * This thread may have been in deadlock with pm_power_has_changed.
8827 	 * Before releasing power lock, clear the flag which marks this
8828 	 * condition.
8829 	 */
8830 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8831 
8832 	/*
8833 	 * Update the old power level in the bus power structure with the
8834 	 * actual power level before the transition was made to the new level.
8835 	 * Some involved parents depend on this information to keep track of
8836 	 * their children's power transition.
8837 	 */
8838 	if (*iresp != DDI_FAILURE)
8839 		bpc->bpc_olevel = clevel;
8840 
8841 	if (PM_WANTS_NOTIFICATION(pdip)) {
8842 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8843 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8844 		PM_UNLOCK_POWER(dip, circ);
8845 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8846 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8847 		    PM_DEVICE(dip), ret))
8848 	} else {
8849 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8850 		PM_UNLOCK_POWER(dip, circ);
8851 		/*
8852 		 * Now that we know what power transition has occurred
8853 		 * (if any), release the power hold.  Leave the hold
8854 		 * in effect in the case of OFF->ON transition.
8855 		 */
8856 		if (!(clevel == 0 && nlevel > 0 &&
8857 		    (!PM_ISBC(dip) || comp == 0)))
8858 			pm_rele_power(pdip);
8859 		/*
8860 		 * If the power transition was an ON->OFF transition,
8861 		 * remove the power hold from the parent.
8862 		 */
8863 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8864 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8865 			pm_rele_power(pdip);
8866 	}
8867 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8868 		return (DDI_FAILURE);
8869 	else
8870 		return (DDI_SUCCESS);
8871 }
8872 
8873 /*
8874  * If an app (SunVTS or Xsun) has taken control, then block until it
8875  * gives it up or makes the requested power level change, unless
8876  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8877  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8878  */
8879 static int
8880 pm_busop_match_request(dev_info_t *dip, void *arg)
8881 {
8882 	PMD_FUNC(pmf, "bp_match_request")
8883 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8884 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8885 	int comp = bpc->bpc_comp;
8886 	int nlevel = bpc->bpc_nlevel;
8887 	pm_canblock_t canblock = pspm->pspm_canblock;
8888 	int direction = pspm->pspm_direction;
8889 	int clevel, circ;
8890 
8891 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8892 	PM_LOCK_POWER(dip, &circ);
8893 	clevel = PM_CURPOWER(dip, comp);
8894 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8895 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8896 	if (direction == PM_LEVEL_UPONLY) {
8897 		if (clevel >= nlevel) {
8898 			PM_UNLOCK_POWER(dip, circ);
8899 			PM_UNLOCK_DIP(dip);
8900 			return (DDI_SUCCESS);
8901 		}
8902 	} else if (clevel == nlevel) {
8903 		PM_UNLOCK_POWER(dip, circ);
8904 		PM_UNLOCK_DIP(dip);
8905 		return (DDI_SUCCESS);
8906 	}
8907 	if (canblock == PM_CANBLOCK_FAIL) {
8908 		PM_UNLOCK_POWER(dip, circ);
8909 		PM_UNLOCK_DIP(dip);
8910 		return (DDI_FAILURE);
8911 	}
8912 	if (canblock == PM_CANBLOCK_BLOCK) {
8913 		/*
8914 		 * To avoid a deadlock, we must not hold the
8915 		 * power lock when we pm_block.
8916 		 */
8917 		PM_UNLOCK_POWER(dip, circ);
8918 		PMD(PMD_SET, ("%s: blocking\n", pmf))
8919 		/* pm_block releases dip lock */
8920 		switch (pm_block(dip, comp, nlevel, clevel)) {
8921 		case PMP_RELEASE:
8922 			return (EAGAIN);
8923 		case PMP_SUCCEED:
8924 			return (DDI_SUCCESS);
8925 		case PMP_FAIL:
8926 			return (DDI_FAILURE);
8927 		}
8928 	} else {
8929 		ASSERT(0);
8930 	}
8931 	_NOTE(NOTREACHED);
8932 	return (DDI_FAILURE);	/* keep gcc happy */
8933 }
8934 
8935 static int
8936 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
8937 {
8938 	PMD_FUNC(pmf, "all_to_normal_nexus")
8939 	int		*normal;
8940 	int		i, ncomps;
8941 	size_t		size;
8942 	int		changefailed = 0;
8943 	int		ret, result = DDI_SUCCESS;
8944 	pm_bp_nexus_pwrup_t	bpn;
8945 	pm_sp_misc_t	pspm;
8946 
8947 	ASSERT(PM_GET_PM_INFO(dip));
8948 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8949 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
8950 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
8951 		return (DDI_FAILURE);
8952 	}
8953 	ncomps = PM_NUMCMPTS(dip);
8954 	for (i = 0; i < ncomps; i++) {
8955 		bpn.bpn_dip = dip;
8956 		bpn.bpn_comp = i;
8957 		bpn.bpn_level = normal[i];
8958 		pspm.pspm_canblock = canblock;
8959 		pspm.pspm_scan = 0;
8960 		bpn.bpn_private = &pspm;
8961 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
8962 		    (void *)&bpn, (void *)&result);
8963 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
8964 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
8965 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
8966 			    i, normal[i], result))
8967 			changefailed++;
8968 		}
8969 	}
8970 	kmem_free(normal, size);
8971 	if (changefailed) {
8972 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
8973 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
8974 		return (DDI_FAILURE);
8975 	}
8976 	return (DDI_SUCCESS);
8977 }
8978 
8979 int
8980 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
8981     dev_info_t *tdip)
8982 {
8983 	PMD_FUNC(pmf, "noinvol_update")
8984 	pm_bp_noinvol_t args;
8985 	int ret;
8986 	int result = DDI_SUCCESS;
8987 
8988 	args.bpni_path = path;
8989 	args.bpni_dip = tdip;
8990 	args.bpni_cmd = subcmd;
8991 	args.bpni_wasvolpmd = wasvolpmd;
8992 	args.bpni_volpmd = volpmd;
8993 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
8994 	    "volpmd %d wasvolpmd %d\n", pmf,
8995 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
8996 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
8997 	    &args, &result);
8998 	return (ret);
8999 }
9000 
9001 void
9002 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9003 {
9004 	PMD_FUNC(pmf, "noinvol_update_node")
9005 
9006 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9007 	switch (req->bpni_cmd) {
9008 	case PM_BP_NOINVOL_ATTACH:
9009 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9010 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9011 		    DEVI(dip)->devi_pm_noinvolpm,
9012 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9013 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9014 		PM_LOCK_DIP(dip);
9015 		DEVI(dip)->devi_pm_noinvolpm--;
9016 		if (req->bpni_wasvolpmd) {
9017 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9018 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9019 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9020 			    DEVI(dip)->devi_pm_volpmd - 1))
9021 			if (DEVI(dip)->devi_pm_volpmd)
9022 				DEVI(dip)->devi_pm_volpmd--;
9023 		}
9024 		PM_UNLOCK_DIP(dip);
9025 		break;
9026 
9027 	case PM_BP_NOINVOL_DETACH:
9028 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9029 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9030 		    DEVI(dip)->devi_pm_noinvolpm,
9031 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9032 		PM_LOCK_DIP(dip);
9033 		DEVI(dip)->devi_pm_noinvolpm++;
9034 		if (req->bpni_wasvolpmd) {
9035 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9036 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9037 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9038 			    DEVI(dip)->devi_pm_volpmd + 1))
9039 			DEVI(dip)->devi_pm_volpmd++;
9040 		}
9041 		PM_UNLOCK_DIP(dip);
9042 		break;
9043 
9044 	case PM_BP_NOINVOL_REMDRV:
9045 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9046 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9047 		    DEVI(dip)->devi_pm_noinvolpm,
9048 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9049 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9050 		PM_LOCK_DIP(dip);
9051 		DEVI(dip)->devi_pm_noinvolpm--;
9052 		if (req->bpni_wasvolpmd) {
9053 			PMD(PMD_NOINVOL,
9054 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9055 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9056 			    DEVI(dip)->devi_pm_volpmd,
9057 			    DEVI(dip)->devi_pm_volpmd - 1))
9058 			/*
9059 			 * A power up could come in between and
9060 			 * clear the volpmd, if that's the case,
9061 			 * volpmd would be clear.
9062 			 */
9063 			if (DEVI(dip)->devi_pm_volpmd)
9064 				DEVI(dip)->devi_pm_volpmd--;
9065 		}
9066 		PM_UNLOCK_DIP(dip);
9067 		break;
9068 
9069 	case PM_BP_NOINVOL_CFB:
9070 		PMD(PMD_NOINVOL,
9071 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9072 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9073 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9074 		PM_LOCK_DIP(dip);
9075 		DEVI(dip)->devi_pm_noinvolpm++;
9076 		PM_UNLOCK_DIP(dip);
9077 		break;
9078 
9079 	case PM_BP_NOINVOL_POWER:
9080 		PMD(PMD_NOINVOL,
9081 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9082 		    pmf, PM_DEVICE(dip),
9083 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9084 		    req->bpni_volpmd))
9085 		PM_LOCK_DIP(dip);
9086 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9087 		PM_UNLOCK_DIP(dip);
9088 		break;
9089 
9090 	default:
9091 		break;
9092 	}
9093 
9094 }
9095 
9096 #ifdef DEBUG
9097 static int
9098 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9099 {
9100 	PMD_FUNC(pmf, "desc_pwrchk")
9101 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9102 	pm_info_t *info = PM_GET_PM_INFO(dip);
9103 	int i, curpwr, ce_level;
9104 
9105 	if (!info)
9106 		return (DDI_WALK_CONTINUE);
9107 
9108 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9109 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9110 		curpwr = PM_CURPOWER(dip, i);
9111 		if (curpwr == 0)
9112 			continue;
9113 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9114 		    CE_WARN;
9115 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9116 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9117 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9118 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9119 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9120 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9121 	}
9122 	return (DDI_WALK_CONTINUE);
9123 }
9124 #endif
9125 
9126 /*
9127  * Record the fact that one thread is borrowing the lock on a device node.
9128  * Use is restricted to the case where the lending thread will block until
9129  * the borrowing thread (always curthread) completes.
9130  */
9131 void
9132 pm_borrow_lock(kthread_t *lender)
9133 {
9134 	lock_loan_t *prev = &lock_loan_head;
9135 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9136 
9137 	cur->pmlk_borrower = curthread;
9138 	cur->pmlk_lender = lender;
9139 	mutex_enter(&pm_loan_lock);
9140 	cur->pmlk_next = prev->pmlk_next;
9141 	prev->pmlk_next = cur;
9142 	mutex_exit(&pm_loan_lock);
9143 }
9144 
9145 /*
9146  * Return the borrowed lock.  A thread can borrow only one.
9147  */
9148 void
9149 pm_return_lock(void)
9150 {
9151 	lock_loan_t *cur;
9152 	lock_loan_t *prev = &lock_loan_head;
9153 
9154 	mutex_enter(&pm_loan_lock);
9155 	ASSERT(prev->pmlk_next != NULL);
9156 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9157 		if (cur->pmlk_borrower == curthread)
9158 			break;
9159 
9160 	ASSERT(cur != NULL);
9161 	prev->pmlk_next = cur->pmlk_next;
9162 	mutex_exit(&pm_loan_lock);
9163 	kmem_free(cur, sizeof (*cur));
9164 }
9165