xref: /titanic_50/usr/src/uts/common/os/sunpm.c (revision b86efd96f8acd85ddaa930a2f0c1d664237e4aaf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sunpm.c builds sunpm.o	"power management framework"
30  *	kernel-resident power management code.  Implements power management
31  *	policy
32  *	Assumes: all backwards compat. device components wake up on &
33  *		 the pm_info pointer in dev_info is initially NULL
34  *
35  * PM - (device) Power Management
36  *
37  * Each device may have 0 or more components.  If a device has no components,
38  * then it can't be power managed.  Each component has 2 or more
39  * power states.
40  *
41  * "Backwards Compatible" (bc) devices:
42  * There are two different types of devices from the point of view of this
43  * code.  The original type, left over from the original PM implementation on
44  * the voyager platform are known in this code as "backwards compatible"
45  * devices (PM_ISBC(dip) returns true).
46  * They are recognized by the pm code by the lack of a pm-components property
47  * and a call made by the driver to pm_create_components(9F).
48  * For these devices, component 0 is special, and represents the power state
49  * of the device.  If component 0 is to be set to power level 0 (off), then
50  * the framework must first call into the driver's detach(9E) routine with
51  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
52  * After setting component 0 from 0 to a non-zero power level, a call must be
53  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
54  *
55  * Currently, the only way to get a bc device power managed is via a set of
56  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
57  *
58  * For non-bc devices, the driver describes the components by exporting a
59  * pm-components(9P) property that tells how many components there are,
60  * tells what each component's power state values are, and provides human
61  * readable strings (currently unused) for each component name and power state.
62  * Devices which export pm-components(9P) are automatically power managed
63  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
64  * after parsing power.conf(4)). The exception to this rule is that power
65  * manageable CPU devices may be automatically managed independently of autopm
66  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
67  * ioctls) cpupm. If the CPU devices are not managed independently, then they
68  * are managed by autopm. In either case, for automatically power managed
69  * devices, all components are considered independent of each other, and it is
70  * up to the driver to decide when a transition requires saving or restoring
71  * hardware state.
72  *
73  * Each device component also has a threshold time associated with each power
74  * transition (see power.conf(4)), and a busy/idle state maintained by the
75  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
76  * Components are created idle.
77  *
78  * The PM framework provides several functions:
79  * -implement PM policy as described in power.conf(4)
80  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
81  *  Policies consist of:
82  *    -set threshold values (defaults if none provided by pmconfig)
83  *    -set dependencies among devices
84  *    -enable/disable autopm
85  *    -enable/disable cpupm
86  *    -turn down idle components based on thresholds (if autopm or cpupm is
87  *     enabled) (aka scanning)
88  *    -maintain power states based on dependencies among devices
89  *    -upon request, or when the frame buffer powers off, attempt to turn off
90  *     all components that are idle or become idle over the next (10 sec)
91  *     period in an attempt to get down to an EnergyStar compliant state
92  *    -prevent powering off of a device which exported the
93  *     pm-no-involuntary-power-cycles property without active involvement of
94  *     the device's driver (so no removing power when the device driver is
95  *     not attached)
96  * -provide a mechanism for a device driver to request that a device's component
97  *  be brought back to the power level necessary for the use of the device
98  * -allow a process to directly control the power levels of device components
99  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
100  * -ensure that the console frame buffer is powered up before being referenced
101  *  via prom_printf() or other prom calls that might generate console output
102  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
103  * -provide "backwards compatible" behavior for devices without pm-components
104  *  property
105  *
106  * Scanning:
107  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
108  * component of each managed device to its lowest power based on the threshold
109  * of idleness associated with each transition and the busy/idle state of the
110  * component.
111  *
112  * The actual work of this is done by pm_scan_dev(), which cycles through each
113  * component of a device, checking its idleness against its current threshold,
114  * and calling pm_set_power() as appropriate to change the power level.
115  * This function also indicates when it would next be profitable to scan the
116  * device again, and a new scan is scheduled after that time.
117  *
118  * Dependencies:
119  * It is possible to establish a dependency between the power states of two
120  * otherwise unrelated devices.  This is currently done to ensure that the
121  * cdrom is always up whenever the console framebuffer is up, so that the user
122  * can insert a cdrom and see a popup as a result.
123  *
124  * The dependency terminology used in power.conf(4) is not easy to understand,
125  * so we've adopted a different terminology in the implementation.  We write
126  * of a "keeps up" and a "kept up" device.  A relationship can be established
127  * where one device keeps up another.  That means that if the keepsup device
128  * has any component that is at a non-zero power level, all components of the
129  * "kept up" device must be brought to full power.  This relationship is
130  * asynchronous.  When the keeping device is powered up, a request is queued
131  * to a worker thread to bring up the kept device.  The caller does not wait.
132  * Scan will not turn down a kept up device.
133  *
134  * Direct PM:
135  * A device may be directly power managed by a process.  If a device is
136  * directly pm'd, then it will not be scanned, and dependencies will not be
137  * enforced.  * If a directly pm'd device's driver requests a power change (via
138  * pm_raise_power(9F)), then the request is blocked and notification is sent
139  * to the controlling process, which must issue the requested power change for
140  * the driver to proceed.
141  *
142  */
143 
144 #include <sys/types.h>
145 #include <sys/errno.h>
146 #include <sys/callb.h>		/* callback registration during CPR */
147 #include <sys/conf.h>		/* driver flags and functions */
148 #include <sys/open.h>		/* OTYP_CHR definition */
149 #include <sys/stat.h>		/* S_IFCHR definition */
150 #include <sys/pathname.h>	/* name -> dev_info xlation */
151 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
152 #include <sys/kmem.h>		/* memory alloc stuff */
153 #include <sys/debug.h>
154 #include <sys/archsystm.h>
155 #include <sys/pm.h>
156 #include <sys/ddi.h>
157 #include <sys/sunddi.h>
158 #include <sys/sunndi.h>
159 #include <sys/sunpm.h>
160 #include <sys/epm.h>
161 #include <sys/vfs.h>
162 #include <sys/mode.h>
163 #include <sys/mkdev.h>
164 #include <sys/promif.h>
165 #include <sys/consdev.h>
166 #include <sys/esunddi.h>
167 #include <sys/modctl.h>
168 #include <sys/fs/ufs_fs.h>
169 #include <sys/note.h>
170 #include <sys/taskq.h>
171 #include <sys/bootconf.h>
172 #include <sys/reboot.h>
173 #include <sys/spl.h>
174 #include <sys/disp.h>
175 #include <sys/sobject.h>
176 #include <sys/sunmdi.h>
177 
178 
179 /*
180  * PM LOCKING
181  *	The list of locks:
182  * Global pm mutex locks.
183  *
184  * pm_scan_lock:
185  *		It protects the timeout id of the scan thread, and the value
186  *		of autopm_enabled and cpupm.  This lock is not held
187  *		concurrently with any other PM locks.
188  *
189  * pm_clone_lock:	Protects the clone list and count of poll events
190  *		pending for the pm driver.
191  *		Lock ordering:
192  *			pm_clone_lock -> pm_pscc_interest_rwlock,
193  *			pm_clone_lock -> pm_pscc_direct_rwlock.
194  *
195  * pm_rsvp_lock:
196  *		Used to synchronize the data structures used for processes
197  *		to rendezvous with state change information when doing
198  *		direct PM.
199  *		Lock ordering:
200  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
201  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
202  *			pm_rsvp_lock -> pm_clone_lock.
203  *
204  * ppm_lock:	protects the list of registered ppm drivers
205  *		Lock ordering:
206  *			ppm_lock -> ppm driver unit_lock
207  *
208  * pm_compcnt_lock:
209  *		Protects count of components that are not at their lowest
210  *		power level.
211  *		Lock ordering:
212  *			pm_compcnt_lock -> ppm_lock.
213  *
214  * pm_dep_thread_lock:
215  *		Protects work list for pm_dep_thread.  Not taken concurrently
216  *		with any other pm lock.
217  *
218  * pm_remdrv_lock:
219  *		Serializes the operation of removing noinvol data structure
220  *		entries for a branch of the tree when a driver has been
221  *		removed from the system (modctl_rem_major).
222  *		Lock ordering:
223  *			pm_remdrv_lock -> pm_noinvol_rwlock.
224  *
225  * pm_cfb_lock: (High level spin lock)
226  *		Protects the count of how many components of the console
227  *		frame buffer are off (so we know if we have to bring up the
228  *		console as a result of a prom_printf, etc.
229  *		No other locks are taken while holding this lock.
230  *
231  * pm_loan_lock:
232  *		Protects the lock_loan list.  List is used to record that one
233  *		thread has acquired a power lock but has launched another thread
234  *		to complete its processing.  An entry in the list indicates that
235  *		the worker thread can borrow the lock held by the other thread,
236  *		which must block on the completion of the worker.  Use is
237  *		specific to module loading.
238  *		No other locks are taken while holding this lock.
239  *
240  * Global PM rwlocks
241  *
242  * pm_thresh_rwlock:
243  *		Protects the list of thresholds recorded for future use (when
244  *		devices attach).
245  *		Lock ordering:
246  *			pm_thresh_rwlock -> devi_pm_lock
247  *
248  * pm_noinvol_rwlock:
249  *		Protects list of detached nodes that had noinvol registered.
250  *		No other PM locks are taken while holding pm_noinvol_rwlock.
251  *
252  * pm_pscc_direct_rwlock:
253  *		Protects the list that maps devices being directly power
254  *		managed to the processes that manage them.
255  *		Lock ordering:
256  *			pm_pscc_direct_rwlock -> psce_lock
257  *
258  * pm_pscc_interest_rwlock;
259  *		Protects the list that maps state change events to processes
260  *		that want to know about them.
261  *		Lock ordering:
262  *			pm_pscc_interest_rwlock -> psce_lock
263  *
264  * per-dip locks:
265  *
266  * Each node has these per-dip locks, which are only used if the device is
267  * a candidate for power management (e.g. has pm components)
268  *
269  * devi_pm_lock:
270  *		Protects all power management state of the node except for
271  *		power level, which is protected by ndi_devi_enter().
272  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
273  *		Lock ordering:
274  *			devi_pm_lock -> pm_rsvp_lock,
275  *			devi_pm_lock -> pm_dep_thread_lock,
276  *			devi_pm_lock -> pm_noinvol_rwlock,
277  *			devi_pm_lock -> power lock
278  *
279  * power lock (ndi_devi_enter()):
280  *		Since changing power level is possibly a slow operation (30
281  *		seconds to spin up a disk drive), this is locked separately.
282  *		Since a call into the driver to change the power level of one
283  *		component may result in a call back into the framework to change
284  *		the power level of another, this lock allows re-entrancy by
285  *		the same thread (ndi_devi_enter is used for this because
286  *		the USB framework uses ndi_devi_enter in its power entry point,
287  *		and use of any other lock would produce a deadlock.
288  *
289  * devi_pm_busy_lock:
290  *		This lock protects the integrity of the busy count.  It is
291  *		only taken by pm_busy_component() and pm_idle_component and
292  *		some code that adjust the busy time after the timer gets set
293  *		up or after a CPR operation.  It is per-dip to keep from
294  *		single-threading all the disk drivers on a system.
295  *		It could be per component instead, but most devices have
296  *		only one component.
297  *		No other PM locks are taken while holding this lock.
298  *
299  */
300 
301 static int stdout_is_framebuffer;
302 static kmutex_t	e_pm_power_lock;
303 static kmutex_t pm_loan_lock;
304 kmutex_t	pm_scan_lock;
305 callb_id_t	pm_cpr_cb_id;
306 callb_id_t	pm_panic_cb_id;
307 callb_id_t	pm_halt_cb_id;
308 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
309 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
310 
311 clock_t pm_min_scan = PM_MIN_SCAN;
312 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
313 
314 static int pm_busop_set_power(dev_info_t *,
315     void *, pm_bus_power_op_t, void *, void *);
316 static int pm_busop_match_request(dev_info_t *, void *);
317 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
318 
319 /*
320  * Dependency Processing is done thru a seperate thread.
321  */
322 kmutex_t	pm_dep_thread_lock;
323 kcondvar_t	pm_dep_thread_cv;
324 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
325 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
326 
327 /*
328  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
329  * power managing things in single user mode that have been suppressed via
330  * power.conf entries.  Protected by pm_scan_lock.
331  */
332 int		autopm_enabled;
333 
334 /*
335  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
336  * to define the power management behavior of CPU devices separate from
337  * autopm. Protected by pm_scan_lock.
338  */
339 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
340 
341 /*
342  * This flag is true while processes are stopped for a checkpoint/resume.
343  * Controlling processes of direct pm'd devices are not available to
344  * participate in power level changes, so we bypass them when this is set.
345  */
346 static int	pm_processes_stopped;
347 
348 #ifdef	DEBUG
349 
350 /*
351  * see common/sys/epm.h for PMD_* values
352  */
353 uint_t		pm_debug = 0;
354 
355 /*
356  * If pm_divertdebug is set, then no prom_printf calls will be made by
357  * PMD(), which will prevent debug output from bringing up the console
358  * frame buffer.  Clearing this variable before setting pm_debug will result
359  * in PMD output going to the console.
360  *
361  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
362  * deadlocks and decremented at the end of pm_set_power()
363  */
364 uint_t		pm_divertdebug = 1;
365 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
366 
367 void prdeps(char *);
368 #endif
369 
370 /* Globals */
371 
372 /*
373  * List of recorded thresholds and dependencies
374  */
375 pm_thresh_rec_t *pm_thresh_head;
376 krwlock_t pm_thresh_rwlock;
377 
378 pm_pdr_t *pm_dep_head;
379 static int pm_unresolved_deps = 0;
380 static int pm_prop_deps = 0;
381 
382 /*
383  * List of devices that exported no-involuntary-power-cycles property
384  */
385 pm_noinvol_t *pm_noinvol_head;
386 
387 /*
388  * Locks used in noinvol processing
389  */
390 krwlock_t pm_noinvol_rwlock;
391 kmutex_t pm_remdrv_lock;
392 
393 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
394 int pm_system_idle_threshold;
395 int pm_cpu_idle_threshold;
396 
397 /*
398  * By default nexus has 0 threshold, and depends on its children to keep it up
399  */
400 int pm_default_nexus_threshold = 0;
401 
402 /*
403  * Data structures shared with common/io/pm.c
404  */
405 kmutex_t	pm_clone_lock;
406 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
407 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
408 unsigned char	pm_interest[PM_MAX_CLONE];
409 struct pollhead	pm_pollhead;
410 
411 extern int	hz;
412 extern char	*platform_module_list[];
413 
414 /*
415  * Wrappers for use in ddi_walk_devs
416  */
417 
418 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
419 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
420 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
421 static int		pm_discard_dep_walk(dev_info_t *, void *);
422 #ifdef DEBUG
423 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
424 #endif
425 
426 /*
427  * Routines for managing noinvol devices
428  */
429 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
430 void			pm_noinvol_update_node(dev_info_t *,
431 			    pm_bp_noinvol_t *req);
432 
433 kmutex_t pm_rsvp_lock;
434 kmutex_t pm_compcnt_lock;
435 krwlock_t pm_pscc_direct_rwlock;
436 krwlock_t pm_pscc_interest_rwlock;
437 
438 #define	PSC_INTEREST	0	/* belongs to interest psc list */
439 #define	PSC_DIRECT	1	/* belongs to direct psc list */
440 
441 pscc_t *pm_pscc_interest;
442 pscc_t *pm_pscc_direct;
443 
444 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
445 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
446 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
447 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
448 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
449 
450 #define	PM_INCR_NOTLOWEST(dip) {					\
451 	mutex_enter(&pm_compcnt_lock);					\
452 	if (!PM_IS_NEXUS(dip) ||					\
453 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
454 		if (pm_comps_notlowest == 0)				\
455 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
456 		pm_comps_notlowest++;					\
457 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
458 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
459 	}								\
460 	mutex_exit(&pm_compcnt_lock);					\
461 }
462 #define	PM_DECR_NOTLOWEST(dip) {					\
463 	mutex_enter(&pm_compcnt_lock);					\
464 	if (!PM_IS_NEXUS(dip) ||					\
465 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
466 		ASSERT(pm_comps_notlowest);				\
467 		pm_comps_notlowest--;					\
468 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
469 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
470 		if (pm_comps_notlowest == 0)				\
471 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
472 	}								\
473 	mutex_exit(&pm_compcnt_lock);					\
474 }
475 
476 /*
477  * console frame-buffer power-management is not enabled when
478  * debugging services are present.  to override, set pm_cfb_override
479  * to non-zero.
480  */
481 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
482 kmutex_t pm_cfb_lock;
483 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
484 #ifdef DEBUG
485 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
486 #else
487 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
488 #endif
489 
490 static dev_info_t *cfb_dip = 0;
491 static dev_info_t *cfb_dip_detaching = 0;
492 uint_t cfb_inuse = 0;
493 static ddi_softintr_t pm_soft_id;
494 static clock_t pm_soft_pending;
495 int	pm_scans_disabled = 0;
496 
497 /*
498  * A structure to record the fact that one thread has borrowed a lock held
499  * by another thread.  The context requires that the lender block on the
500  * completion of the borrower.
501  */
502 typedef struct lock_loan {
503 	struct lock_loan	*pmlk_next;
504 	kthread_t		*pmlk_borrower;
505 	kthread_t		*pmlk_lender;
506 	dev_info_t		*pmlk_dip;
507 } lock_loan_t;
508 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
509 
510 #ifdef	DEBUG
511 #define	PMD_FUNC(func, name)	char *(func) = (name);
512 #else
513 #define	PMD_FUNC(func, name)
514 #endif
515 
516 
517 /*
518  * Must be called before first device (including pseudo) attach
519  */
520 void
521 pm_init_locks(void)
522 {
523 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
524 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
525 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
526 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
527 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
528 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
529 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
530 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
531 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
532 }
533 
534 static boolean_t
535 pm_cpr_callb(void *arg, int code)
536 {
537 	_NOTE(ARGUNUSED(arg))
538 	static int auto_save;
539 	static pm_cpupm_t cpupm_save;
540 	static int pm_reset_timestamps(dev_info_t *, void *);
541 
542 	switch (code) {
543 	case CB_CODE_CPR_CHKPT:
544 		/*
545 		 * Cancel scan or wait for scan in progress to finish
546 		 * Other threads may be trying to restart the scan, so we
547 		 * have to keep at it unil it sticks
548 		 */
549 		mutex_enter(&pm_scan_lock);
550 		ASSERT(!pm_scans_disabled);
551 		pm_scans_disabled = 1;
552 		auto_save = autopm_enabled;
553 		autopm_enabled = 0;
554 		cpupm_save = cpupm;
555 		cpupm = PM_CPUPM_NOTSET;
556 		mutex_exit(&pm_scan_lock);
557 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
558 		break;
559 
560 	case CB_CODE_CPR_RESUME:
561 		ASSERT(!autopm_enabled);
562 		ASSERT(cpupm == PM_CPUPM_NOTSET);
563 		ASSERT(pm_scans_disabled);
564 		pm_scans_disabled = 0;
565 		/*
566 		 * Call pm_reset_timestamps to reset timestamps of each
567 		 * device to the time when the system is resumed so that their
568 		 * idleness can be re-calculated. That's to avoid devices from
569 		 * being powered down right after resume if the system was in
570 		 * suspended mode long enough.
571 		 */
572 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
573 
574 		autopm_enabled = auto_save;
575 		cpupm = cpupm_save;
576 		/*
577 		 * If there is any auto-pm device, get the scanning
578 		 * going. Otherwise don't bother.
579 		 */
580 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
581 		break;
582 	}
583 	return (B_TRUE);
584 }
585 
586 /*
587  * This callback routine is called when there is a system panic.  This function
588  * exists for prototype matching.
589  */
590 static boolean_t
591 pm_panic_callb(void *arg, int code)
592 {
593 	_NOTE(ARGUNUSED(arg, code))
594 	void pm_cfb_check_and_powerup(void);
595 	PMD(PMD_CFB, ("pm_panic_callb\n"))
596 	pm_cfb_check_and_powerup();
597 	return (B_TRUE);
598 }
599 
600 static boolean_t
601 pm_halt_callb(void *arg, int code)
602 {
603 	_NOTE(ARGUNUSED(arg, code))
604 	return (B_TRUE);	/* XXX for now */
605 }
606 
607 /*
608  * This needs to be called after the root and platform drivers are loaded
609  * and be single-threaded with respect to driver attach/detach
610  */
611 void
612 pm_init(void)
613 {
614 	PMD_FUNC(pmf, "pm_init")
615 	char **mod;
616 	extern pri_t minclsyspri;
617 	static void pm_dep_thread(void);
618 
619 	pm_comps_notlowest = 0;
620 	pm_system_idle_threshold = pm_default_idle_threshold;
621 	pm_cpu_idle_threshold = 0;
622 
623 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
624 	    CB_CL_CPR_PM, "pm_cpr");
625 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
626 		    CB_CL_PANIC, "pm_panic");
627 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
628 		    CB_CL_HALT, "pm_halt");
629 
630 	/*
631 	 * Create a thread to do dependency processing.
632 	 */
633 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
634 	    TS_RUN, minclsyspri);
635 
636 	/*
637 	 * loadrootmodules already loaded these ppm drivers, now get them
638 	 * attached so they can claim the root drivers as they attach
639 	 */
640 	for (mod = platform_module_list; *mod; mod++) {
641 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
642 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
643 			    *mod);
644 		} else {
645 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
646 			    ddi_major_to_name(ddi_name_to_major(*mod))))
647 		}
648 	}
649 }
650 
651 /*
652  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
653  * enabled) when device becomes power managed or after a failed detach and
654  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
655  * a CPR resume to get all the devices scanning again.
656  */
657 void
658 pm_scan_init(dev_info_t *dip)
659 {
660 	PMD_FUNC(pmf, "scan_init")
661 	pm_scan_t	*scanp;
662 
663 	ASSERT(!PM_ISBC(dip));
664 
665 	PM_LOCK_DIP(dip);
666 	scanp = PM_GET_PM_SCAN(dip);
667 	if (!scanp) {
668 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
669 		    pmf, PM_DEVICE(dip)))
670 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
671 		DEVI(dip)->devi_pm_scan = scanp;
672 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
673 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
674 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
675 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
676 	}
677 	PM_UNLOCK_DIP(dip);
678 }
679 
680 /*
681  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
682  */
683 void
684 pm_scan_fini(dev_info_t *dip)
685 {
686 	PMD_FUNC(pmf, "scan_fini")
687 	pm_scan_t	*scanp;
688 
689 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
690 	ASSERT(!PM_ISBC(dip));
691 	PM_LOCK_DIP(dip);
692 	scanp = PM_GET_PM_SCAN(dip);
693 	if (!scanp) {
694 		PM_UNLOCK_DIP(dip);
695 		return;
696 	}
697 
698 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
699 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
700 
701 	kmem_free(scanp, sizeof (pm_scan_t));
702 	DEVI(dip)->devi_pm_scan = NULL;
703 	PM_UNLOCK_DIP(dip);
704 }
705 
706 /*
707  * Given a pointer to a component struct, return the current power level
708  * (struct contains index unless it is a continuous level).
709  * Located here in hopes of getting both this and dev_is_needed into the
710  * cache together
711  */
712 static int
713 cur_power(pm_component_t *cp)
714 {
715 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
716 		return (cp->pmc_cur_pwr);
717 
718 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
719 }
720 
721 static char *
722 pm_decode_direction(int direction)
723 {
724 	switch (direction) {
725 	case PM_LEVEL_UPONLY:
726 		return ("up");
727 
728 	case PM_LEVEL_EXACT:
729 		return ("exact");
730 
731 	case PM_LEVEL_DOWNONLY:
732 		return ("down");
733 
734 	default:
735 		return ("INVALID DIRECTION");
736 	}
737 }
738 
739 char *
740 pm_decode_op(pm_bus_power_op_t op)
741 {
742 	switch (op) {
743 	case BUS_POWER_CHILD_PWRCHG:
744 		return ("CHILD_PWRCHG");
745 	case BUS_POWER_NEXUS_PWRUP:
746 		return ("NEXUS_PWRUP");
747 	case BUS_POWER_PRE_NOTIFICATION:
748 		return ("PRE_NOTIFICATION");
749 	case BUS_POWER_POST_NOTIFICATION:
750 		return ("POST_NOTIFICATION");
751 	case BUS_POWER_HAS_CHANGED:
752 		return ("HAS_CHANGED");
753 	case BUS_POWER_NOINVOL:
754 		return ("NOINVOL");
755 	default:
756 		return ("UNKNOWN OP");
757 	}
758 }
759 
760 /*
761  * Returns true if level is a possible (valid) power level for component
762  */
763 int
764 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
765 {
766 	PMD_FUNC(pmf, "e_pm_valid_power")
767 	pm_component_t *cp = PM_CP(dip, cmpt);
768 	int i;
769 	int *ip = cp->pmc_comp.pmc_lvals;
770 	int limit = cp->pmc_comp.pmc_numlevels;
771 
772 	if (level < 0)
773 		return (0);
774 	for (i = 0; i < limit; i++) {
775 		if (level == *ip++)
776 			return (1);
777 	}
778 #ifdef DEBUG
779 	if (pm_debug & PMD_FAIL) {
780 		ip = cp->pmc_comp.pmc_lvals;
781 
782 		for (i = 0; i < limit; i++)
783 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
784 			    pmf, i, *ip++))
785 	}
786 #endif
787 	return (0);
788 }
789 
790 /*
791  * Returns true if device is pm'd (after calling pm_start if need be)
792  */
793 int
794 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
795 {
796 	pm_info_t *info;
797 	static int pm_start(dev_info_t *dip);
798 
799 	/*
800 	 * Check if the device is power managed if not.
801 	 * To make the common case (device is power managed already)
802 	 * fast, we check without the lock.  If device is not already
803 	 * power managed, then we take the lock and the long route through
804 	 * go get it managed.  Devices never go unmanaged until they
805 	 * detach.
806 	 */
807 	info = PM_GET_PM_INFO(dip);
808 	if (!info) {
809 		if (!DEVI_IS_ATTACHING(dip)) {
810 			return (0);
811 		}
812 		if (pm_start(dip) != DDI_SUCCESS) {
813 			return (0);
814 		}
815 		info = PM_GET_PM_INFO(dip);
816 	}
817 	ASSERT(info);
818 	if (infop != NULL)
819 		*infop = info;
820 	return (1);
821 }
822 
823 int
824 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
825 {
826 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
827 		if (cpp != NULL)
828 			*cpp = PM_CP(dip, cmpt);
829 		return (1);
830 	} else {
831 		return (0);
832 	}
833 }
834 
835 /*
836  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
837  */
838 static int
839 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
840 {
841 	PMD_FUNC(pmf, "din")
842 	pm_component_t *cp;
843 	char *pathbuf;
844 	int result;
845 
846 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
847 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
848 	    !e_pm_valid_power(dip, cmpt, level))
849 		return (DDI_FAILURE);
850 
851 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
852 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
853 	    level, cur_power(cp)))
854 
855 	if (pm_set_power(dip, cmpt, level,  direction,
856 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
857 		if (direction == PM_LEVEL_UPONLY) {
858 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
859 			(void) ddi_pathname(dip, pathbuf);
860 			cmn_err(CE_WARN, "Device %s failed to power up.",
861 			    pathbuf);
862 			kmem_free(pathbuf, MAXPATHLEN);
863 		}
864 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
865 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
866 		    pm_decode_direction(direction), level, result))
867 		return (DDI_FAILURE);
868 	}
869 
870 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
871 	    PM_DEVICE(dip)))
872 	pm_rescan(dip);
873 	return (DDI_SUCCESS);
874 }
875 
876 /*
877  * We can get multiple pm_rescan() threads, if one of them discovers
878  * that no scan is running at the moment, it kicks it into action.
879  * Otherwise, it tells the current scanning thread to scan again when
880  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
881  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
882  * thread at a time runs the pm_scan_dev() code.
883  */
884 void
885 pm_rescan(void *arg)
886 {
887 	PMD_FUNC(pmf, "rescan")
888 	dev_info_t	*dip = (dev_info_t *)arg;
889 	pm_info_t	*info;
890 	pm_scan_t	*scanp;
891 	timeout_id_t	scanid;
892 
893 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
894 	PM_LOCK_DIP(dip);
895 	info = PM_GET_PM_INFO(dip);
896 	scanp = PM_GET_PM_SCAN(dip);
897 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
898 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
899 		PM_UNLOCK_DIP(dip);
900 		return;
901 	}
902 	if (scanp->ps_scan_flags & PM_SCANNING) {
903 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
904 		PM_UNLOCK_DIP(dip);
905 		return;
906 	} else if (scanp->ps_scan_id) {
907 		scanid = scanp->ps_scan_id;
908 		scanp->ps_scan_id = 0;
909 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
910 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
911 		PM_UNLOCK_DIP(dip);
912 		(void) untimeout(scanid);
913 		PM_LOCK_DIP(dip);
914 	}
915 
916 	/*
917 	 * Dispatching pm_scan during attach time is risky due to the fact that
918 	 * attach might soon fail and dip dissolved, and panic may happen while
919 	 * attempting to stop scan. So schedule a pm_rescan instead.
920 	 * (Note that if either of the first two terms are true, taskq_dispatch
921 	 * will not be invoked).
922 	 *
923 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
924 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
925 	 * to regulate the dispatching.
926 	 *
927 	 * Scan is stopped before the device is detached (in pm_detaching())
928 	 * but it may get re-started during the post_detach processing if the
929 	 * driver fails to detach.
930 	 */
931 	if (DEVI_IS_ATTACHING(dip) ||
932 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
933 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
934 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
935 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
936 		if (scanp->ps_scan_id) {
937 			scanid = scanp->ps_scan_id;
938 			scanp->ps_scan_id = 0;
939 			PM_UNLOCK_DIP(dip);
940 			(void) untimeout(scanid);
941 			PM_LOCK_DIP(dip);
942 			if (scanp->ps_scan_id) {
943 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
944 				    "thread scheduled pm_rescan, scanid %lx\n",
945 				    pmf, PM_DEVICE(dip),
946 				    (ulong_t)scanp->ps_scan_id))
947 				PM_UNLOCK_DIP(dip);
948 				return;
949 			}
950 		}
951 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
952 		    (scanp->ps_idle_down ? pm_id_ticks :
953 		    (pm_min_scan * hz)));
954 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
955 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
956 		    (ulong_t)scanp->ps_scan_id))
957 	} else {
958 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
959 		    pmf, PM_DEVICE(dip)))
960 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
961 	}
962 	PM_UNLOCK_DIP(dip);
963 }
964 
965 void
966 pm_scan(void *arg)
967 {
968 	PMD_FUNC(pmf, "scan")
969 	dev_info_t	*dip = (dev_info_t *)arg;
970 	pm_scan_t	*scanp;
971 	time_t		nextscan;
972 
973 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
974 
975 	PM_LOCK_DIP(dip);
976 	scanp = PM_GET_PM_SCAN(dip);
977 	ASSERT(scanp && PM_GET_PM_INFO(dip));
978 
979 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
980 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
981 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
982 		PM_UNLOCK_DIP(dip);
983 		return;
984 	}
985 
986 	if (scanp->ps_idle_down) {
987 		/*
988 		 * make sure we remember idledown was in affect until
989 		 * we've completed the scan
990 		 */
991 		PMID_SET_SCANS(scanp->ps_idle_down)
992 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
993 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
994 	}
995 
996 	/* possible having two threads running pm_scan() */
997 	if (scanp->ps_scan_flags & PM_SCANNING) {
998 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
999 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1000 		    pmf, PM_DEVICE(dip)))
1001 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1002 		PM_UNLOCK_DIP(dip);
1003 		return;
1004 	}
1005 
1006 	scanp->ps_scan_flags |= PM_SCANNING;
1007 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1008 	do {
1009 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1010 		PM_UNLOCK_DIP(dip);
1011 		nextscan = pm_scan_dev(dip);
1012 		PM_LOCK_DIP(dip);
1013 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1014 
1015 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1016 	scanp->ps_scan_flags &= ~PM_SCANNING;
1017 
1018 	if (scanp->ps_idle_down) {
1019 		scanp->ps_idle_down &= ~PMID_SCANS;
1020 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1021 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1022 	}
1023 
1024 	/* schedule for next idle check */
1025 	if (nextscan != LONG_MAX) {
1026 		if (nextscan > (LONG_MAX / hz))
1027 			nextscan = (LONG_MAX - 1) / hz;
1028 		if (scanp->ps_scan_id) {
1029 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1030 			    "another rescan scheduled scanid(%lx)\n", pmf,
1031 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1032 			PM_UNLOCK_DIP(dip);
1033 			return;
1034 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1035 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1036 			    (clock_t)(nextscan * hz));
1037 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1038 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1039 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1040 		}
1041 	}
1042 	PM_UNLOCK_DIP(dip);
1043 }
1044 
1045 void
1046 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1047 {
1048 	int components = PM_NUMCMPTS(dip);
1049 	int i;
1050 
1051 	ASSERT(components > 0);
1052 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1053 	for (i = 0; i < components; i++) {
1054 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1055 	}
1056 	PM_UNLOCK_BUSY(dip);
1057 }
1058 
1059 /*
1060  * Returns true if device needs to be kept up because it exported the
1061  * "no-involuntary-power-cycles" property or we're pretending it did (console
1062  * fb case) or it is an ancestor of such a device and has used up the "one
1063  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1064  * upon detach
1065  */
1066 int
1067 pm_noinvol(dev_info_t *dip)
1068 {
1069 	PMD_FUNC(pmf, "noinvol")
1070 
1071 	/*
1072 	 * This doesn't change over the life of a driver, so no locking needed
1073 	 */
1074 	if (PM_IS_CFB(dip)) {
1075 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1076 		    pmf, PM_DEVICE(dip)))
1077 		return (1);
1078 	}
1079 	/*
1080 	 * Not an issue if no such kids
1081 	 */
1082 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1083 #ifdef DEBUG
1084 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1085 			dev_info_t *pdip = dip;
1086 			do {
1087 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1088 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1089 				    DEVI(pdip)->devi_pm_noinvolpm,
1090 				    DEVI(pdip)->devi_pm_volpmd))
1091 				pdip = ddi_get_parent(pdip);
1092 			} while (pdip);
1093 		}
1094 #endif
1095 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1096 		return (0);
1097 	}
1098 
1099 	/*
1100 	 * Since we now maintain the counts correct at every node, we no longer
1101 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1102 	 * without the children getting their counts adjusted.
1103 	 */
1104 
1105 #ifdef	DEBUG
1106 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1107 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1108 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1109 		    PM_DEVICE(dip)))
1110 #endif
1111 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1112 }
1113 
1114 /*
1115  * This function performs the actual scanning of the device.
1116  * It attempts to power off the indicated device's components if they have
1117  * been idle and other restrictions are met.
1118  * pm_scan_dev calculates and returns when the next scan should happen for
1119  * this device.
1120  */
1121 time_t
1122 pm_scan_dev(dev_info_t *dip)
1123 {
1124 	PMD_FUNC(pmf, "scan_dev")
1125 	pm_scan_t	*scanp;
1126 	time_t		*timestamp, idletime, now, thresh;
1127 	time_t		timeleft = 0;
1128 	int		i, nxtpwr, curpwr, pwrndx, unused;
1129 	size_t		size;
1130 	pm_component_t	 *cp;
1131 	dev_info_t	*pdip = ddi_get_parent(dip);
1132 	int		circ;
1133 	static int	cur_threshold(dev_info_t *, int);
1134 	static int	pm_next_lower_power(pm_component_t *, int);
1135 
1136 	/*
1137 	 * skip attaching device
1138 	 */
1139 	if (DEVI_IS_ATTACHING(dip)) {
1140 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1141 		    pmf, PM_DEVICE(dip), pm_min_scan))
1142 		return (pm_min_scan);
1143 	}
1144 
1145 	PM_LOCK_DIP(dip);
1146 	scanp = PM_GET_PM_SCAN(dip);
1147 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1148 
1149 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1150 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1151 	    PM_KUC(dip)))
1152 
1153 	/* no scan under the following conditions */
1154 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1155 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1156 	    (PM_KUC(dip) != 0) ||
1157 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1158 		PM_UNLOCK_DIP(dip);
1159 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1160 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1161 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1162 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1163 		    cpupm, PM_KUC(dip),
1164 		    PM_ISDIRECT(dip) ? "is" : "is not",
1165 		    pm_noinvol(dip) ? "is" : "is not"))
1166 		return (LONG_MAX);
1167 	}
1168 	PM_UNLOCK_DIP(dip);
1169 
1170 	if (!ndi_devi_tryenter(pdip, &circ)) {
1171 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1172 		    pmf, PM_DEVICE(pdip)))
1173 		return ((time_t)1);
1174 	}
1175 	now = gethrestime_sec();
1176 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1177 	timestamp = kmem_alloc(size, KM_SLEEP);
1178 	pm_get_timestamps(dip, timestamp);
1179 
1180 	/*
1181 	 * Since we removed support for backwards compatible devices,
1182 	 * (see big comment at top of file)
1183 	 * it is no longer required to deal with component 0 last.
1184 	 */
1185 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1186 		/*
1187 		 * If already off (an optimization, perhaps)
1188 		 */
1189 		cp = PM_CP(dip, i);
1190 		pwrndx = cp->pmc_cur_pwr;
1191 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1192 		    PM_LEVEL_UNKNOWN :
1193 		    cp->pmc_comp.pmc_lvals[pwrndx];
1194 
1195 		if (pwrndx == 0) {
1196 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1197 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1198 			/* skip device if off or at its lowest */
1199 			continue;
1200 		}
1201 
1202 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1203 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1204 			/* were busy or newly became busy by another thread */
1205 			if (timeleft == 0)
1206 				timeleft = max(thresh, pm_min_scan);
1207 			else
1208 				timeleft = min(
1209 				    timeleft, max(thresh, pm_min_scan));
1210 			continue;
1211 		}
1212 
1213 		idletime = now - timestamp[i];		/* idle time */
1214 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1215 		    pmf, PM_DEVICE(dip), i, idletime))
1216 		if (idletime >= thresh || PM_IS_PID(dip)) {
1217 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1218 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1219 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1220 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1221 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1222 			    PM_CURPOWER(dip, i) != nxtpwr) {
1223 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1224 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1225 				    i, curpwr, nxtpwr))
1226 				timeleft = pm_min_scan;
1227 				continue;
1228 			} else {
1229 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1230 				    "%d->%d, GOOD curpwr %d\n", pmf,
1231 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1232 				    cur_power(cp)))
1233 
1234 				if (nxtpwr == 0)	/* component went off */
1235 					continue;
1236 
1237 				/*
1238 				 * scan to next lower level
1239 				 */
1240 				if (timeleft == 0)
1241 					timeleft = max(
1242 					    1, cur_threshold(dip, i));
1243 				else
1244 					timeleft = min(timeleft,
1245 					    max(1, cur_threshold(dip, i)));
1246 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1247 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1248 				    i, timeleft))
1249 			}
1250 		} else {	/* comp not idle long enough */
1251 			if (timeleft == 0)
1252 				timeleft = thresh - idletime;
1253 			else
1254 				timeleft = min(timeleft, (thresh - idletime));
1255 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1256 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1257 		}
1258 	}
1259 	ndi_devi_exit(pdip, circ);
1260 	kmem_free(timestamp, size);
1261 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1262 	    PM_DEVICE(dip), timeleft))
1263 
1264 	/*
1265 	 * if components are already at lowest level, timeleft is left 0
1266 	 */
1267 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1268 }
1269 
1270 /*
1271  * pm_scan_stop - cancel scheduled pm_rescan,
1272  *                wait for termination of dispatched pm_scan thread
1273  *                     and active pm_scan_dev thread.
1274  */
1275 void
1276 pm_scan_stop(dev_info_t *dip)
1277 {
1278 	PMD_FUNC(pmf, "scan_stop")
1279 	pm_scan_t	*scanp;
1280 	timeout_id_t	scanid;
1281 
1282 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1283 	PM_LOCK_DIP(dip);
1284 	scanp = PM_GET_PM_SCAN(dip);
1285 	if (!scanp) {
1286 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1287 		    pmf, PM_DEVICE(dip)))
1288 		PM_UNLOCK_DIP(dip);
1289 		return;
1290 	}
1291 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1292 
1293 	/* cancel scheduled scan taskq */
1294 	while (scanp->ps_scan_id) {
1295 		scanid = scanp->ps_scan_id;
1296 		scanp->ps_scan_id = 0;
1297 		PM_UNLOCK_DIP(dip);
1298 		(void) untimeout(scanid);
1299 		PM_LOCK_DIP(dip);
1300 	}
1301 
1302 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1303 		PM_UNLOCK_DIP(dip);
1304 		delay(1);
1305 		PM_LOCK_DIP(dip);
1306 	}
1307 	PM_UNLOCK_DIP(dip);
1308 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1309 }
1310 
1311 int
1312 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1313 {
1314 	_NOTE(ARGUNUSED(arg))
1315 
1316 	if (!PM_GET_PM_SCAN(dip))
1317 		return (DDI_WALK_CONTINUE);
1318 	ASSERT(!PM_ISBC(dip));
1319 	pm_scan_stop(dip);
1320 	return (DDI_WALK_CONTINUE);
1321 }
1322 
1323 /*
1324  * Converts a power level value to its index
1325  */
1326 static int
1327 power_val_to_index(pm_component_t *cp, int val)
1328 {
1329 	int limit, i, *ip;
1330 
1331 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1332 	    val != PM_LEVEL_EXACT);
1333 	/*  convert power value into index (i) */
1334 	limit = cp->pmc_comp.pmc_numlevels;
1335 	ip = cp->pmc_comp.pmc_lvals;
1336 	for (i = 0; i < limit; i++)
1337 		if (val == *ip++)
1338 			return (i);
1339 	return (-1);
1340 }
1341 
1342 /*
1343  * Converts a numeric power level to a printable string
1344  */
1345 static char *
1346 power_val_to_string(pm_component_t *cp, int val)
1347 {
1348 	int index;
1349 
1350 	if (val == PM_LEVEL_UPONLY)
1351 		return ("<UPONLY>");
1352 
1353 	if (val == PM_LEVEL_UNKNOWN ||
1354 	    (index = power_val_to_index(cp, val)) == -1)
1355 		return ("<LEVEL_UNKNOWN>");
1356 
1357 	return (cp->pmc_comp.pmc_lnames[index]);
1358 }
1359 
1360 /*
1361  * Return true if this node has been claimed by a ppm.
1362  */
1363 static int
1364 pm_ppm_claimed(dev_info_t *dip)
1365 {
1366 	return (PPM(dip) != NULL);
1367 }
1368 
1369 /*
1370  * A node which was voluntarily power managed has just used up its "free cycle"
1371  * and need is volpmd field cleared, and the same done to all its descendents
1372  */
1373 static void
1374 pm_clear_volpm_dip(dev_info_t *dip)
1375 {
1376 	PMD_FUNC(pmf, "clear_volpm_dip")
1377 
1378 	if (dip == NULL)
1379 		return;
1380 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1381 	    PM_DEVICE(dip)))
1382 	DEVI(dip)->devi_pm_volpmd = 0;
1383 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1384 		pm_clear_volpm_dip(dip);
1385 	}
1386 }
1387 
1388 /*
1389  * A node which was voluntarily power managed has used up the "free cycles"
1390  * for the subtree that it is the root of.  Scan through the list of detached
1391  * nodes and adjust the counts of any that are descendents of the node.
1392  */
1393 static void
1394 pm_clear_volpm_list(dev_info_t *dip)
1395 {
1396 	PMD_FUNC(pmf, "clear_volpm_list")
1397 	char	*pathbuf;
1398 	size_t	len;
1399 	pm_noinvol_t *ip;
1400 
1401 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1402 	(void) ddi_pathname(dip, pathbuf);
1403 	len = strlen(pathbuf);
1404 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1405 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1406 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1407 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1408 		    ip->ni_path))
1409 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1410 		    ip->ni_path[len] == '/') {
1411 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1412 			    ip->ni_path))
1413 			ip->ni_volpmd = 0;
1414 			ip->ni_wasvolpmd = 0;
1415 		}
1416 	}
1417 	kmem_free(pathbuf, MAXPATHLEN);
1418 	rw_exit(&pm_noinvol_rwlock);
1419 }
1420 
1421 /*
1422  * Powers a device, suspending or resuming the driver if it is a backward
1423  * compatible device, calling into ppm to change power level.
1424  * Called with the component's power lock held.
1425  */
1426 static int
1427 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1428     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1429 {
1430 	PMD_FUNC(pmf, "power_dev")
1431 	power_req_t power_req;
1432 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1433 	int		resume_needed = 0;
1434 	int		suspended = 0;
1435 	int		result;
1436 	struct pm_component *cp = PM_CP(dip, comp);
1437 	int		bc = PM_ISBC(dip);
1438 	int pm_all_components_off(dev_info_t *);
1439 	int		clearvolpmd = 0;
1440 	char		pathbuf[MAXNAMELEN];
1441 #ifdef DEBUG
1442 	char *ppmname, *ppmaddr;
1443 #endif
1444 	/*
1445 	 * If this is comp 0 of a backwards compat device and we are
1446 	 * going to take the power away, we need to detach it with
1447 	 * DDI_PM_SUSPEND command.
1448 	 */
1449 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1450 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1451 			/* We could not suspend before turning cmpt zero off */
1452 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1453 			    pmf, PM_DEVICE(dip)))
1454 			return (DDI_FAILURE);
1455 		} else {
1456 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1457 			suspended++;
1458 		}
1459 	}
1460 	power_req.request_type = PMR_PPM_SET_POWER;
1461 	power_req.req.ppm_set_power_req.who = dip;
1462 	power_req.req.ppm_set_power_req.cmpt = comp;
1463 	power_req.req.ppm_set_power_req.old_level = old_level;
1464 	power_req.req.ppm_set_power_req.new_level = level;
1465 	power_req.req.ppm_set_power_req.canblock = canblock;
1466 	power_req.req.ppm_set_power_req.cookie = NULL;
1467 #ifdef DEBUG
1468 	if (pm_ppm_claimed(dip)) {
1469 		ppmname = PM_NAME(PPM(dip));
1470 		ppmaddr = PM_ADDR(PPM(dip));
1471 
1472 	} else {
1473 		ppmname = "noppm";
1474 		ppmaddr = "0";
1475 	}
1476 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1477 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1478 	    power_val_to_string(cp, old_level), old_level,
1479 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1480 #endif
1481 	/*
1482 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1483 	 * bc device comp 0 is powering on, then we count it as a power cycle
1484 	 * against its voluntary count.
1485 	 */
1486 	if (DEVI(dip)->devi_pm_volpmd &&
1487 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1488 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1489 		clearvolpmd = 1;
1490 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1491 	    &power_req, &result)) == DDI_SUCCESS) {
1492 		/*
1493 		 * Now do involuntary pm accounting;  If we've just cycled power
1494 		 * on a voluntarily pm'd node, and by inference on its entire
1495 		 * subtree, we need to set the subtree (including those nodes
1496 		 * already detached) volpmd counts to 0, and subtract out the
1497 		 * value of the current node's volpmd count from the ancestors
1498 		 */
1499 		if (clearvolpmd) {
1500 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1501 			pm_clear_volpm_dip(dip);
1502 			pm_clear_volpm_list(dip);
1503 			if (volpmd) {
1504 				(void) ddi_pathname(dip, pathbuf);
1505 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1506 				    volpmd, 0, pathbuf, dip);
1507 			}
1508 		}
1509 	} else {
1510 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1511 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1512 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1513 	}
1514 	/*
1515 	 * If some other devices were also powered up (e.g. other cpus in
1516 	 * the same domain) return a pointer to that list
1517 	 */
1518 	if (devlist) {
1519 		*devlist = (pm_ppm_devlist_t *)
1520 		    power_req.req.ppm_set_power_req.cookie;
1521 	}
1522 	/*
1523 	 * We will have to resume the device if the device is backwards compat
1524 	 * device and either of the following is true:
1525 	 * -This is comp 0 and we have successfully powered it up
1526 	 * -This is comp 0 and we have failed to power it down. Resume is
1527 	 *  needed because we have suspended it above
1528 	 */
1529 
1530 	if (bc && comp == 0) {
1531 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1532 		if (power_op_ret == DDI_SUCCESS) {
1533 			if (POWERING_ON(old_level, level)) {
1534 				/*
1535 				 * It must be either suspended or resumed
1536 				 * via pm_power_has_changed path
1537 				 */
1538 				ASSERT((DEVI(dip)->devi_pm_flags &
1539 				    PMC_SUSPENDED) ||
1540 				    (PM_CP(dip, comp)->pmc_flags &
1541 				    PM_PHC_WHILE_SET_POWER));
1542 
1543 					resume_needed = suspended;
1544 			}
1545 		} else {
1546 			if (POWERING_OFF(old_level, level)) {
1547 				/*
1548 				 * It must be either suspended or resumed
1549 				 * via pm_power_has_changed path
1550 				 */
1551 				ASSERT((DEVI(dip)->devi_pm_flags &
1552 				    PMC_SUSPENDED) ||
1553 				    (PM_CP(dip, comp)->pmc_flags &
1554 				    PM_PHC_WHILE_SET_POWER));
1555 
1556 					resume_needed = suspended;
1557 			}
1558 		}
1559 	}
1560 	if (resume_needed) {
1561 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1562 		/* ppm is not interested in DDI_PM_RESUME */
1563 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1564 		    DDI_SUCCESS) {
1565 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1566 		} else
1567 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1568 			    PM_DEVICE(dip));
1569 	}
1570 	return (power_op_ret);
1571 }
1572 
1573 /*
1574  * Return true if we are the owner or a borrower of the devi lock.  See
1575  * pm_lock_power_single() about borrowing the lock.
1576  */
1577 static int
1578 pm_devi_lock_held(dev_info_t *dip)
1579 {
1580 	lock_loan_t *cur;
1581 
1582 	if (DEVI_BUSY_OWNED(dip))
1583 	    return (1);
1584 
1585 	/* return false if no locks borrowed */
1586 	if (lock_loan_head.pmlk_next == NULL)
1587 		return (0);
1588 
1589 	mutex_enter(&pm_loan_lock);
1590 	/* see if our thread is registered as a lock borrower. */
1591 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1592 		if (cur->pmlk_borrower == curthread)
1593 			break;
1594 	mutex_exit(&pm_loan_lock);
1595 
1596 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1597 }
1598 
1599 /*
1600  * pm_set_power: adjusts power level of device.	 Assumes device is power
1601  * manageable & component exists.
1602  *
1603  * Cases which require us to bring up devices we keep up ("wekeepups") for
1604  * backwards compatible devices:
1605  *	component 0 is off and we're bringing it up from 0
1606  *		bring up wekeepup first
1607  *	and recursively when component 0 is off and we bring some other
1608  *	component up from 0
1609  * For devices which are not backward compatible, our dependency notion is much
1610  * simpler.  Unless all components are off, then wekeeps must be on.
1611  * We don't treat component 0 differently.
1612  * Canblock tells how to deal with a direct pm'd device.
1613  * Scan arg tells us if we were called from scan, in which case we don't need
1614  * to go back to the root node and walk down to change power.
1615  */
1616 int
1617 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1618     pm_canblock_t canblock, int scan, int *retp)
1619 {
1620 	PMD_FUNC(pmf, "set_power")
1621 	char		*pathbuf;
1622 	pm_bp_child_pwrchg_t bpc;
1623 	pm_sp_misc_t	pspm;
1624 	int		ret = DDI_SUCCESS;
1625 	int		unused = DDI_SUCCESS;
1626 	dev_info_t	*pdip = ddi_get_parent(dip);
1627 
1628 #ifdef DEBUG
1629 	int		diverted = 0;
1630 
1631 	/*
1632 	 * This prevents operations on the console from calling prom_printf and
1633 	 * either deadlocking or bringing up the console because of debug
1634 	 * output
1635 	 */
1636 	if (dip == cfb_dip) {
1637 		diverted++;
1638 		mutex_enter(&pm_debug_lock);
1639 		pm_divertdebug++;
1640 		mutex_exit(&pm_debug_lock);
1641 	}
1642 #endif
1643 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1644 	    direction == PM_LEVEL_EXACT);
1645 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1646 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1647 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1648 	(void) ddi_pathname(dip, pathbuf);
1649 	bpc.bpc_dip = dip;
1650 	bpc.bpc_path = pathbuf;
1651 	bpc.bpc_comp = comp;
1652 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1653 	bpc.bpc_nlevel = level;
1654 	pspm.pspm_direction = direction;
1655 	pspm.pspm_errnop = retp;
1656 	pspm.pspm_canblock = canblock;
1657 	pspm.pspm_scan = scan;
1658 	bpc.bpc_private = &pspm;
1659 
1660 	/*
1661 	 * If a config operation is being done (we've locked the parent) or
1662 	 * we already hold the power lock (we've locked the node)
1663 	 * then we can operate directly on the node because we have already
1664 	 * brought up all the ancestors, otherwise, we have to go back to the
1665 	 * top of the tree.
1666 	 */
1667 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1668 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1669 		    (void *)&bpc, (void *)&unused);
1670 	else
1671 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1672 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1673 #ifdef DEBUG
1674 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1675 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1676 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1677 	}
1678 	if (diverted) {
1679 		mutex_enter(&pm_debug_lock);
1680 		pm_divertdebug--;
1681 		mutex_exit(&pm_debug_lock);
1682 	}
1683 #endif
1684 	kmem_free(pathbuf, MAXPATHLEN);
1685 	return (ret);
1686 }
1687 
1688 /*
1689  * If holddip is set, then if a dip is found we return with the node held.
1690  *
1691  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1692  * (resolve_pathname), but it does not drive attach.
1693  */
1694 dev_info_t *
1695 pm_name_to_dip(char *pathname, int holddip)
1696 {
1697 	struct pathname pn;
1698 	char		*component;
1699 	dev_info_t	*parent, *child;
1700 	int		circ;
1701 
1702 	if ((pathname == NULL) || (*pathname != '/'))
1703 		return (NULL);
1704 
1705 	/* setup pathname and allocate component */
1706 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1707 		return (NULL);
1708 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1709 
1710 	/* start at top, process '/' component */
1711 	parent = child = ddi_root_node();
1712 	ndi_hold_devi(parent);
1713 	pn_skipslash(&pn);
1714 	ASSERT(i_ddi_devi_attached(parent));
1715 
1716 	/* process components of pathname */
1717 	while (pn_pathleft(&pn)) {
1718 		(void) pn_getcomponent(&pn, component);
1719 
1720 		/* enter parent and search for component child */
1721 		ndi_devi_enter(parent, &circ);
1722 		child = ndi_devi_findchild(parent, component);
1723 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1724 			child = NULL;
1725 			ndi_devi_exit(parent, circ);
1726 			ndi_rele_devi(parent);
1727 			goto out;
1728 		}
1729 
1730 		/* attached child found, hold child and release parent */
1731 		ndi_hold_devi(child);
1732 		ndi_devi_exit(parent, circ);
1733 		ndi_rele_devi(parent);
1734 
1735 		/* child becomes parent, and process next component */
1736 		parent = child;
1737 		pn_skipslash(&pn);
1738 
1739 		/* loop with active ndi_devi_hold of child->parent */
1740 	}
1741 
1742 out:
1743 	pn_free(&pn);
1744 	kmem_free(component, MAXNAMELEN);
1745 
1746 	/* if we are not asked to return with hold, drop current hold */
1747 	if (child && !holddip)
1748 		ndi_rele_devi(child);
1749 	return (child);
1750 }
1751 
1752 /*
1753  * Search for a dependency and mark it unsatisfied
1754  */
1755 static void
1756 pm_unsatisfy(char *keeper, char *kept)
1757 {
1758 	PMD_FUNC(pmf, "unsatisfy")
1759 	pm_pdr_t *dp;
1760 
1761 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1762 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1763 		if (!dp->pdr_isprop) {
1764 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1765 			    (dp->pdr_kept_count > 0) &&
1766 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1767 				if (dp->pdr_satisfied) {
1768 					dp->pdr_satisfied = 0;
1769 					pm_unresolved_deps++;
1770 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1771 					    "pm_unresolved_deps now %d\n", pmf,
1772 					    pm_unresolved_deps))
1773 				}
1774 			}
1775 		}
1776 	}
1777 }
1778 
1779 /*
1780  * Device dip is being un power managed, it keeps up count other devices.
1781  * We need to release any hold we have on the kept devices, and also
1782  * mark the dependency no longer satisfied.
1783  */
1784 static void
1785 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1786 {
1787 	PMD_FUNC(pmf, "unkeeps")
1788 	int i, j;
1789 	dev_info_t *kept;
1790 	dev_info_t *dip;
1791 	struct pm_component *cp;
1792 	int keeper_on = 0, circ;
1793 
1794 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1795 	    keeper, (void *)keptpaths))
1796 	/*
1797 	 * Try to grab keeper. Keeper may have gone away by now,
1798 	 * in this case, used the passed in value pwr
1799 	 */
1800 	dip = pm_name_to_dip(keeper, 1);
1801 	for (i = 0; i < count; i++) {
1802 		/* Release power hold */
1803 		kept = pm_name_to_dip(keptpaths[i], 1);
1804 		if (kept) {
1805 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1806 			    PM_DEVICE(kept), i))
1807 			/*
1808 			 * We need to check if we skipped a bringup here
1809 			 * because we could have failed the bringup
1810 			 * (ie DIRECT PM device) and have
1811 			 * not increment the count.
1812 			 */
1813 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1814 				keeper_on = 0;
1815 				PM_LOCK_POWER(dip, &circ);
1816 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1817 				    cp = &DEVI(dip)->devi_pm_components[j];
1818 					if (cur_power(cp)) {
1819 						keeper_on++;
1820 						break;
1821 					}
1822 				}
1823 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1824 					pm_rele_power(kept);
1825 					DEVI(kept)->devi_pm_flags
1826 						&= ~PMC_SKIP_BRINGUP;
1827 				}
1828 				PM_UNLOCK_POWER(dip, circ);
1829 			} else if (pwr) {
1830 				if (PM_SKBU(kept) == 0) {
1831 					pm_rele_power(kept);
1832 					DEVI(kept)->devi_pm_flags
1833 					    &= ~PMC_SKIP_BRINGUP;
1834 				}
1835 			}
1836 			ddi_release_devi(kept);
1837 		}
1838 		/*
1839 		 * mark this dependency not satisfied
1840 		 */
1841 		pm_unsatisfy(keeper, keptpaths[i]);
1842 	}
1843 	if (dip)
1844 		ddi_release_devi(dip);
1845 }
1846 
1847 /*
1848  * Device kept is being un power managed, it is kept up by keeper.
1849  * We need to mark the dependency no longer satisfied.
1850  */
1851 static void
1852 pm_unkepts(char *kept, char *keeper)
1853 {
1854 	PMD_FUNC(pmf, "unkepts")
1855 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1856 	ASSERT(keeper != NULL);
1857 	/*
1858 	 * mark this dependency not satisfied
1859 	 */
1860 	pm_unsatisfy(keeper, kept);
1861 }
1862 
1863 /*
1864  * Removes dependency information and hold on the kepts, if the path is a
1865  * path of a keeper.
1866  */
1867 static void
1868 pm_free_keeper(char *path, int pwr)
1869 {
1870 	pm_pdr_t *dp;
1871 	int i;
1872 	size_t length;
1873 
1874 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1875 		if (strcmp(dp->pdr_keeper, path) != 0)
1876 			continue;
1877 		/*
1878 		 * Remove all our kept holds and the dependency records,
1879 		 * then free up the kept lists.
1880 		 */
1881 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1882 		if (dp->pdr_kept_count)  {
1883 			for (i = 0; i < dp->pdr_kept_count; i++) {
1884 				length = strlen(dp->pdr_kept_paths[i]);
1885 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1886 			}
1887 			kmem_free(dp->pdr_kept_paths,
1888 			    dp->pdr_kept_count * sizeof (char **));
1889 			dp->pdr_kept_paths = NULL;
1890 			dp->pdr_kept_count = 0;
1891 		}
1892 	}
1893 }
1894 
1895 /*
1896  * Removes the device represented by path from the list of kepts, if the
1897  * path is a path of a kept
1898  */
1899 static void
1900 pm_free_kept(char *path)
1901 {
1902 	pm_pdr_t *dp;
1903 	int i;
1904 	int j, count;
1905 	size_t length;
1906 	char **paths;
1907 
1908 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1909 		if (dp->pdr_kept_count == 0)
1910 			continue;
1911 		count = dp->pdr_kept_count;
1912 		/* Remove this device from the kept path lists */
1913 		for (i = 0; i < count; i++) {
1914 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1915 				pm_unkepts(path, dp->pdr_keeper);
1916 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1917 				kmem_free(dp->pdr_kept_paths[i], length);
1918 				dp->pdr_kept_paths[i] = NULL;
1919 				dp->pdr_kept_count--;
1920 			}
1921 		}
1922 		/* Compact the kept paths array */
1923 		if (dp->pdr_kept_count) {
1924 			length = dp->pdr_kept_count * sizeof (char **);
1925 			paths = kmem_zalloc(length, KM_SLEEP);
1926 			j = 0;
1927 			for (i = 0; i < count; i++) {
1928 				if (dp->pdr_kept_paths[i] != NULL) {
1929 					paths[j] = dp->pdr_kept_paths[i];
1930 					j++;
1931 				}
1932 			}
1933 			ASSERT(j == dp->pdr_kept_count);
1934 		}
1935 		/* Now free the old array and point to the new one */
1936 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1937 		if (dp->pdr_kept_count)
1938 			dp->pdr_kept_paths = paths;
1939 		else
1940 			dp->pdr_kept_paths = NULL;
1941 	}
1942 }
1943 
1944 /*
1945  * Free the dependency information for a device.
1946  */
1947 void
1948 pm_free_keeps(char *path, int pwr)
1949 {
1950 	PMD_FUNC(pmf, "free_keeps")
1951 
1952 #ifdef DEBUG
1953 	int doprdeps = 0;
1954 	void prdeps(char *);
1955 
1956 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
1957 	if (pm_debug & PMD_KEEPS) {
1958 		doprdeps = 1;
1959 		prdeps("pm_free_keeps before");
1960 	}
1961 #endif
1962 	/*
1963 	 * First assume we are a keeper and remove all our kepts.
1964 	 */
1965 	pm_free_keeper(path, pwr);
1966 	/*
1967 	 * Now assume we a kept device, and remove all our records.
1968 	 */
1969 	pm_free_kept(path);
1970 #ifdef	DEBUG
1971 	if (doprdeps) {
1972 		prdeps("pm_free_keeps after");
1973 	}
1974 #endif
1975 }
1976 
1977 static int
1978 pm_is_kept(char *path)
1979 {
1980 	pm_pdr_t *dp;
1981 	int i;
1982 
1983 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1984 		if (dp->pdr_kept_count == 0)
1985 			continue;
1986 		for (i = 0; i < dp->pdr_kept_count; i++) {
1987 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
1988 				return (1);
1989 		}
1990 	}
1991 	return (0);
1992 }
1993 
1994 static void
1995 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
1996 {
1997 	PMD_FUNC(pmf, "hold_rele_power")
1998 	int circ;
1999 
2000 	if ((dip == NULL) ||
2001 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2002 		return;
2003 
2004 	PM_LOCK_POWER(dip, &circ);
2005 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2006 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2007 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2008 
2009 	PM_KUC(dip) += cnt;
2010 
2011 	ASSERT(PM_KUC(dip) >= 0);
2012 	PM_UNLOCK_POWER(dip, circ);
2013 
2014 	if (cnt < 0 && PM_KUC(dip) == 0)
2015 		pm_rescan(dip);
2016 }
2017 
2018 #define	MAX_PPM_HANDLERS	4
2019 
2020 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2021 
2022 struct	ppm_callbacks {
2023 	int (*ppmc_func)(dev_info_t *);
2024 	dev_info_t	*ppmc_dip;
2025 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2026 
2027 
2028 /*
2029  * This routine calls into all the registered ppms to notify them
2030  * that either all components of power-managed devices are at their
2031  * lowest levels or no longer all are at their lowest levels.
2032  */
2033 static void
2034 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2035 {
2036 	struct ppm_callbacks *ppmcp;
2037 	power_req_t power_req;
2038 	int result = 0;
2039 
2040 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2041 	power_req.req.ppm_all_lowest_req.mode = mode;
2042 	mutex_enter(&ppm_lock);
2043 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2044 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2045 		    DDI_CTLOPS_POWER, &power_req, &result);
2046 	mutex_exit(&ppm_lock);
2047 }
2048 
2049 static void
2050 pm_set_pm_info(dev_info_t *dip, void *value)
2051 {
2052 	DEVI(dip)->devi_pm_info = value;
2053 }
2054 
2055 pm_rsvp_t *pm_blocked_list;
2056 
2057 /*
2058  * Look up an entry in the blocked list by dip and component
2059  */
2060 static pm_rsvp_t *
2061 pm_rsvp_lookup(dev_info_t *dip, int comp)
2062 {
2063 	pm_rsvp_t *p;
2064 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2065 	for (p = pm_blocked_list; p; p = p->pr_next)
2066 		if (p->pr_dip == dip && p->pr_comp == comp) {
2067 			return (p);
2068 		}
2069 	return (NULL);
2070 }
2071 
2072 /*
2073  * Called when a device which is direct power managed (or the parent or
2074  * dependent of such a device) changes power, or when a pm clone is closed
2075  * that was direct power managing a device.  This call results in pm_blocked()
2076  * (below) returning.
2077  */
2078 void
2079 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2080 {
2081 	PMD_FUNC(pmf, "proceed")
2082 	pm_rsvp_t *found = NULL;
2083 	pm_rsvp_t *p;
2084 
2085 	mutex_enter(&pm_rsvp_lock);
2086 	switch (cmd) {
2087 	/*
2088 	 * we're giving up control, let any pending op continue
2089 	 */
2090 	case PMP_RELEASE:
2091 		for (p = pm_blocked_list; p; p = p->pr_next) {
2092 			if (dip == p->pr_dip) {
2093 				p->pr_retval = PMP_RELEASE;
2094 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2095 				    pmf, PM_DEVICE(dip)))
2096 				cv_signal(&p->pr_cv);
2097 			}
2098 		}
2099 		break;
2100 
2101 	/*
2102 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2103 	 * succeed and a non-matching request for the same device fail
2104 	 */
2105 	case PMP_SETPOWER:
2106 		found = pm_rsvp_lookup(dip, comp);
2107 		if (!found)	/* if driver not waiting */
2108 			break;
2109 		/*
2110 		 * This cannot be pm_lower_power, since that can only happen
2111 		 * during detach or probe
2112 		 */
2113 		if (found->pr_newlevel <= newlevel) {
2114 			found->pr_retval = PMP_SUCCEED;
2115 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2116 			    PM_DEVICE(dip)))
2117 		} else {
2118 			found->pr_retval = PMP_FAIL;
2119 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2120 			    PM_DEVICE(dip)))
2121 		}
2122 		cv_signal(&found->pr_cv);
2123 		break;
2124 
2125 	default:
2126 		panic("pm_proceed unknown cmd %d", cmd);
2127 	}
2128 	mutex_exit(&pm_rsvp_lock);
2129 }
2130 
2131 /*
2132  * This routine dispatches new work to the dependency thread. Caller must
2133  * be prepared to block for memory if necessary.
2134  */
2135 void
2136 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2137     int *res, int cached_pwr)
2138 {
2139 	pm_dep_wk_t	*new_work;
2140 
2141 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2142 	new_work->pdw_type = cmd;
2143 	new_work->pdw_wait = wait;
2144 	new_work->pdw_done = 0;
2145 	new_work->pdw_ret = 0;
2146 	new_work->pdw_pwr = cached_pwr;
2147 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2148 	if (keeper != NULL) {
2149 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2150 		    KM_SLEEP);
2151 		(void) strcpy(new_work->pdw_keeper, keeper);
2152 	}
2153 	if (kept != NULL) {
2154 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2155 		(void) strcpy(new_work->pdw_kept, kept);
2156 	}
2157 	mutex_enter(&pm_dep_thread_lock);
2158 	if (pm_dep_thread_workq == NULL) {
2159 		pm_dep_thread_workq = new_work;
2160 		pm_dep_thread_tail = new_work;
2161 		new_work->pdw_next = NULL;
2162 	} else {
2163 		pm_dep_thread_tail->pdw_next = new_work;
2164 		pm_dep_thread_tail = new_work;
2165 		new_work->pdw_next = NULL;
2166 	}
2167 	cv_signal(&pm_dep_thread_cv);
2168 	/* If caller asked for it, wait till it is done. */
2169 	if (wait)  {
2170 		while (!new_work->pdw_done)
2171 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2172 		/*
2173 		 * Pass return status, if any, back.
2174 		 */
2175 		if (res != NULL)
2176 			*res = new_work->pdw_ret;
2177 		/*
2178 		 * If we asked to wait, it is our job to free the request
2179 		 * structure.
2180 		 */
2181 		if (new_work->pdw_keeper)
2182 			kmem_free(new_work->pdw_keeper,
2183 			    strlen(new_work->pdw_keeper) + 1);
2184 		if (new_work->pdw_kept)
2185 			kmem_free(new_work->pdw_kept,
2186 			    strlen(new_work->pdw_kept) + 1);
2187 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2188 	}
2189 	mutex_exit(&pm_dep_thread_lock);
2190 }
2191 
2192 /*
2193  * Release the pm resource for this device.
2194  */
2195 void
2196 pm_rem_info(dev_info_t *dip)
2197 {
2198 	PMD_FUNC(pmf, "rem_info")
2199 	int		i, count = 0;
2200 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2201 	dev_info_t	*pdip = ddi_get_parent(dip);
2202 	char		*pathbuf;
2203 	int		work_type = PM_DEP_WK_DETACH;
2204 
2205 	ASSERT(info);
2206 
2207 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2208 	if (PM_ISDIRECT(dip)) {
2209 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2210 		ASSERT(info->pmi_clone);
2211 		info->pmi_clone = 0;
2212 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2213 	}
2214 	ASSERT(!PM_GET_PM_SCAN(dip));
2215 
2216 	/*
2217 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2218 	 * Others we check all components.  BC node that has already
2219 	 * called pm_destroy_components() has zero component count.
2220 	 * Parents that get notification are not adjusted because their
2221 	 * kidsupcnt is always 0 (or 1 during configuration).
2222 	 */
2223 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2224 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2225 
2226 	/* node is detached, so we can examine power without locking */
2227 	if (PM_ISBC(dip)) {
2228 		count = (PM_CURPOWER(dip, 0) != 0);
2229 	} else {
2230 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2231 			count += (PM_CURPOWER(dip, i) != 0);
2232 	}
2233 
2234 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2235 		e_pm_hold_rele_power(pdip, -count);
2236 
2237 	/* Schedule a request to clean up dependency records */
2238 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2239 	(void) ddi_pathname(dip, pathbuf);
2240 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2241 	    PM_DEP_NOWAIT, NULL, (count > 0));
2242 	kmem_free(pathbuf, MAXPATHLEN);
2243 
2244 	/*
2245 	 * Adjust the pm_comps_notlowest count since this device is
2246 	 * not being power-managed anymore.
2247 	 */
2248 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2249 		if (PM_CURPOWER(dip, i) != 0)
2250 			PM_DECR_NOTLOWEST(dip);
2251 	}
2252 	/*
2253 	 * Once we clear the info pointer, it looks like it is not power
2254 	 * managed to everybody else.
2255 	 */
2256 	pm_set_pm_info(dip, NULL);
2257 	kmem_free(info, sizeof (pm_info_t));
2258 }
2259 
2260 int
2261 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2262 {
2263 	int components = PM_NUMCMPTS(dip);
2264 	int *bufp;
2265 	size_t size;
2266 	int i;
2267 
2268 	if (components <= 0) {
2269 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2270 		    "can't get normal power values\n", PM_DEVICE(dip));
2271 		return (DDI_FAILURE);
2272 	} else {
2273 		size = components * sizeof (int);
2274 		bufp = kmem_alloc(size, KM_SLEEP);
2275 		for (i = 0; i < components; i++) {
2276 			bufp[i] = pm_get_normal_power(dip, i);
2277 		}
2278 	}
2279 	*length = size;
2280 	*valuep = bufp;
2281 	return (DDI_SUCCESS);
2282 }
2283 
2284 static int
2285 pm_reset_timestamps(dev_info_t *dip, void *arg)
2286 {
2287 	_NOTE(ARGUNUSED(arg))
2288 
2289 	int components;
2290 	int	i;
2291 
2292 	if (!PM_GET_PM_INFO(dip))
2293 		return (DDI_WALK_CONTINUE);
2294 	components = PM_NUMCMPTS(dip);
2295 	ASSERT(components > 0);
2296 	PM_LOCK_BUSY(dip);
2297 	for (i = 0; i < components; i++) {
2298 		struct pm_component *cp;
2299 		/*
2300 		 * If the component was not marked as busy,
2301 		 * reset its timestamp to now.
2302 		 */
2303 		cp = PM_CP(dip, i);
2304 		if (cp->pmc_timestamp)
2305 			cp->pmc_timestamp = gethrestime_sec();
2306 	}
2307 	PM_UNLOCK_BUSY(dip);
2308 	return (DDI_WALK_CONTINUE);
2309 }
2310 
2311 /*
2312  * Convert a power level to an index into the levels array (or
2313  * just PM_LEVEL_UNKNOWN in that special case).
2314  */
2315 static int
2316 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2317 {
2318 	PMD_FUNC(pmf, "level_to_index")
2319 	int i;
2320 	int limit = cp->pmc_comp.pmc_numlevels;
2321 	int *ip = cp->pmc_comp.pmc_lvals;
2322 
2323 	if (level == PM_LEVEL_UNKNOWN)
2324 		return (level);
2325 
2326 	for (i = 0; i < limit; i++) {
2327 		if (level == *ip++) {
2328 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2329 			    pmf, PM_DEVICE(dip),
2330 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2331 			return (i);
2332 		}
2333 	}
2334 	panic("pm_level_to_index: level %d not found for device "
2335 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2336 	/*NOTREACHED*/
2337 }
2338 
2339 /*
2340  * Internal function to set current power level
2341  */
2342 static void
2343 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2344 {
2345 	PMD_FUNC(pmf, "set_cur_pwr")
2346 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2347 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2348 
2349 	/*
2350 	 * Nothing to adjust if current & new levels are the same.
2351 	 */
2352 	if (curpwr != PM_LEVEL_UNKNOWN &&
2353 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2354 		return;
2355 
2356 	/*
2357 	 * Keep the count for comps doing transition to/from lowest
2358 	 * level.
2359 	 */
2360 	if (curpwr == 0) {
2361 		PM_INCR_NOTLOWEST(dip);
2362 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2363 		PM_DECR_NOTLOWEST(dip);
2364 	}
2365 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2366 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2367 }
2368 
2369 /*
2370  * This is the default method of setting the power of a device if no ppm
2371  * driver has claimed it.
2372  */
2373 int
2374 pm_power(dev_info_t *dip, int comp, int level)
2375 {
2376 	PMD_FUNC(pmf, "power")
2377 	struct dev_ops	*ops;
2378 	int		(*fn)(dev_info_t *, int, int);
2379 	struct pm_component *cp = PM_CP(dip, comp);
2380 	int retval;
2381 	pm_info_t *info = PM_GET_PM_INFO(dip);
2382 	static int pm_phc_impl(dev_info_t *, int, int, int);
2383 
2384 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2385 	    PM_DEVICE(dip), comp, level))
2386 	if (!(ops = ddi_get_driver(dip))) {
2387 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2388 		    PM_DEVICE(dip)))
2389 		return (DDI_FAILURE);
2390 	}
2391 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2392 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2393 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2394 		    (!fn ? " devo_power NULL" : "")))
2395 		return (DDI_FAILURE);
2396 	}
2397 	cp->pmc_flags |= PM_POWER_OP;
2398 	retval = (*fn)(dip, comp, level);
2399 	cp->pmc_flags &= ~PM_POWER_OP;
2400 	if (retval == DDI_SUCCESS) {
2401 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2402 		return (DDI_SUCCESS);
2403 	}
2404 
2405 	/*
2406 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2407 	 * updated only the power level of the component.  If our attempt to
2408 	 * set the device new to a power level above has failed we sync the
2409 	 * total power state via phc code now.
2410 	 */
2411 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2412 		int phc_lvl =
2413 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2414 
2415 		ASSERT(info);
2416 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2417 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2418 			pmf, PM_DEVICE(dip), comp, phc_lvl))
2419 	}
2420 
2421 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2422 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2423 	    level, power_val_to_string(cp, level)));
2424 	return (DDI_FAILURE);
2425 }
2426 
2427 int
2428 pm_unmanage(dev_info_t *dip)
2429 {
2430 	PMD_FUNC(pmf, "unmanage")
2431 	power_req_t power_req;
2432 	int result, retval = 0;
2433 
2434 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2435 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2436 	    PM_DEVICE(dip)))
2437 	power_req.request_type = PMR_PPM_UNMANAGE;
2438 	power_req.req.ppm_config_req.who = dip;
2439 	if (pm_ppm_claimed(dip))
2440 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2441 		    &power_req, &result);
2442 #ifdef DEBUG
2443 	else
2444 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2445 		    &power_req, &result);
2446 #endif
2447 	ASSERT(retval == DDI_SUCCESS);
2448 	pm_rem_info(dip);
2449 	return (retval);
2450 }
2451 
2452 int
2453 pm_raise_power(dev_info_t *dip, int comp, int level)
2454 {
2455 	if (level < 0)
2456 		return (DDI_FAILURE);
2457 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2458 	    !e_pm_valid_power(dip, comp, level))
2459 		return (DDI_FAILURE);
2460 
2461 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2462 }
2463 
2464 int
2465 pm_lower_power(dev_info_t *dip, int comp, int level)
2466 {
2467 	PMD_FUNC(pmf, "pm_lower_power")
2468 
2469 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2470 	    !e_pm_valid_power(dip, comp, level)) {
2471 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2472 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2473 		return (DDI_FAILURE);
2474 	}
2475 
2476 	if (!DEVI_IS_DETACHING(dip)) {
2477 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2478 		    pmf, PM_DEVICE(dip)))
2479 		return (DDI_FAILURE);
2480 	}
2481 
2482 	/*
2483 	 * If we don't care about saving power, or we're treating this node
2484 	 * specially, then this is a no-op
2485 	 */
2486 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2487 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2488 		    pmf, PM_DEVICE(dip),
2489 		    !autopm_enabled ? "!autopm_enabled " : "",
2490 		    !PM_CPUPM_ENABLED ? "!cpupm_enabled " : "",
2491 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2492 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2493 		return (DDI_SUCCESS);
2494 	}
2495 
2496 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2497 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2498 		    PM_DEVICE(dip)))
2499 		return (DDI_FAILURE);
2500 	}
2501 	return (DDI_SUCCESS);
2502 }
2503 
2504 /*
2505  * Find the entries struct for a given dip in the blocked list, return it locked
2506  */
2507 static psce_t *
2508 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2509 {
2510 	pscc_t *p;
2511 	psce_t *psce;
2512 
2513 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2514 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2515 		if (p->pscc_dip == dip) {
2516 			*psccp = p;
2517 			psce = p->pscc_entries;
2518 			mutex_enter(&psce->psce_lock);
2519 			ASSERT(psce);
2520 			rw_exit(&pm_pscc_direct_rwlock);
2521 			return (psce);
2522 		}
2523 	}
2524 	rw_exit(&pm_pscc_direct_rwlock);
2525 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2526 	/*NOTREACHED*/
2527 }
2528 
2529 /*
2530  * Write an entry indicating a power level change (to be passed to a process
2531  * later) in the given psce.
2532  * If we were called in the path that brings up the console fb in the
2533  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2534  * we create a record that has a size of -1, a physaddr of NULL, and that
2535  * has the overflow flag set.
2536  */
2537 static int
2538 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2539     int old, int which, pm_canblock_t canblock)
2540 {
2541 	char	buf[MAXNAMELEN];
2542 	pm_state_change_t *p;
2543 	size_t	size;
2544 	caddr_t physpath = NULL;
2545 	int	overrun = 0;
2546 
2547 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2548 	(void) ddi_pathname(dip, buf);
2549 	size = strlen(buf) + 1;
2550 	p = psce->psce_in;
2551 	if (canblock == PM_CANBLOCK_BYPASS) {
2552 		physpath = kmem_alloc(size, KM_NOSLEEP);
2553 		if (physpath == NULL) {
2554 			/*
2555 			 * mark current entry as overrun
2556 			 */
2557 			p->flags |= PSC_EVENT_LOST;
2558 			size = (size_t)-1;
2559 		}
2560 	} else
2561 		physpath = kmem_alloc(size, KM_SLEEP);
2562 	if (p->size) {	/* overflow; mark the next entry */
2563 		if (p->size != (size_t)-1)
2564 			kmem_free(p->physpath, p->size);
2565 		ASSERT(psce->psce_out == p);
2566 		if (p == psce->psce_last) {
2567 			psce->psce_first->flags |= PSC_EVENT_LOST;
2568 			psce->psce_out = psce->psce_first;
2569 		} else {
2570 			(p + 1)->flags |= PSC_EVENT_LOST;
2571 			psce->psce_out = (p + 1);
2572 		}
2573 		overrun++;
2574 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2575 		p->flags |= PSC_EVENT_LOST;
2576 		p->size = 0;
2577 		p->physpath = NULL;
2578 	}
2579 	if (which == PSC_INTEREST) {
2580 		mutex_enter(&pm_compcnt_lock);
2581 		if (pm_comps_notlowest == 0)
2582 			p->flags |= PSC_ALL_LOWEST;
2583 		else
2584 			p->flags &= ~PSC_ALL_LOWEST;
2585 		mutex_exit(&pm_compcnt_lock);
2586 	}
2587 	p->event = event;
2588 	p->timestamp = gethrestime_sec();
2589 	p->component = comp;
2590 	p->old_level = old;
2591 	p->new_level = new;
2592 	p->physpath = physpath;
2593 	p->size = size;
2594 	if (physpath != NULL)
2595 		(void) strcpy(p->physpath, buf);
2596 	if (p == psce->psce_last)
2597 		psce->psce_in = psce->psce_first;
2598 	else
2599 		psce->psce_in = ++p;
2600 	mutex_exit(&psce->psce_lock);
2601 	return (overrun);
2602 }
2603 
2604 /*
2605  * Find the next entry on the interest list.  We keep a pointer to the item we
2606  * last returned in the user's cooke.  Returns a locked entries struct.
2607  */
2608 static psce_t *
2609 psc_interest(void **cookie, pscc_t **psccp)
2610 {
2611 	pscc_t *pscc;
2612 	pscc_t **cookiep = (pscc_t **)cookie;
2613 
2614 	if (*cookiep == NULL)
2615 		pscc = pm_pscc_interest;
2616 	else
2617 		pscc = (*cookiep)->pscc_next;
2618 	if (pscc) {
2619 		*cookiep = pscc;
2620 		*psccp = pscc;
2621 		mutex_enter(&pscc->pscc_entries->psce_lock);
2622 		return (pscc->pscc_entries);
2623 	} else {
2624 		return (NULL);
2625 	}
2626 }
2627 
2628 /*
2629  * Create an entry for a process to pick up indicating a power level change.
2630  */
2631 static void
2632 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2633     int newlevel, int oldlevel, pm_canblock_t canblock)
2634 {
2635 	PMD_FUNC(pmf, "enqueue_notify")
2636 	pscc_t	*pscc;
2637 	psce_t	*psce;
2638 	void		*cookie = NULL;
2639 	int	overrun;
2640 
2641 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2642 	switch (cmd) {
2643 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2644 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2645 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2646 		psce = pm_psc_dip_to_direct(dip, &pscc);
2647 		ASSERT(psce);
2648 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2649 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2650 		    pm_poll_cnt[pscc->pscc_clone]))
2651 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2652 		    PSC_DIRECT, canblock);
2653 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2654 		mutex_enter(&pm_clone_lock);
2655 		if (!overrun)
2656 			pm_poll_cnt[pscc->pscc_clone]++;
2657 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2658 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2659 		mutex_exit(&pm_clone_lock);
2660 		break;
2661 	case PSC_HAS_CHANGED:
2662 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2663 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2664 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2665 			psce = pm_psc_dip_to_direct(dip, &pscc);
2666 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2667 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2668 			    pm_poll_cnt[pscc->pscc_clone]))
2669 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2670 			    oldlevel, PSC_DIRECT, canblock);
2671 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2672 			mutex_enter(&pm_clone_lock);
2673 			if (!overrun)
2674 				pm_poll_cnt[pscc->pscc_clone]++;
2675 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2676 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2677 			mutex_exit(&pm_clone_lock);
2678 		}
2679 		mutex_enter(&pm_clone_lock);
2680 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2681 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2682 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2683 			    oldlevel, PSC_INTEREST, canblock);
2684 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2685 		}
2686 		rw_exit(&pm_pscc_interest_rwlock);
2687 		mutex_exit(&pm_clone_lock);
2688 		break;
2689 #ifdef DEBUG
2690 	default:
2691 		ASSERT(0);
2692 #endif
2693 	}
2694 }
2695 
2696 static void
2697 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2698 {
2699 	if (listp) {
2700 		pm_ppm_devlist_t *p, *next = NULL;
2701 
2702 		for (p = *listp; p; p = next) {
2703 			next = p->ppd_next;
2704 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2705 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2706 			    canblock);
2707 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2708 		}
2709 		*listp = NULL;
2710 	}
2711 }
2712 
2713 /*
2714  * Try to get the power locks of the parent node and target (child)
2715  * node.  Return true if successful (with both locks held) or false
2716  * (with no locks held).
2717  */
2718 static int
2719 pm_try_parent_child_locks(dev_info_t *pdip,
2720     dev_info_t *dip, int *pcircp, int *circp)
2721 {
2722 	if (ndi_devi_tryenter(pdip, pcircp))
2723 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2724 			return (1);
2725 		} else {
2726 			ndi_devi_exit(pdip, *pcircp);
2727 		}
2728 	return (0);
2729 }
2730 
2731 /*
2732  * Determine if the power lock owner is blocked by current thread.
2733  * returns :
2734  * 	1 - If the thread owning the effective power lock (the first lock on
2735  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2736  *          a mutex held by the current thread.
2737  *
2738  *	0 - otherwise
2739  *
2740  * Note : This function is called by pm_power_has_changed to determine whether
2741  * it is executing in parallel with pm_set_power.
2742  */
2743 static int
2744 pm_blocked_by_us(dev_info_t *dip)
2745 {
2746 	power_req_t power_req;
2747 	kthread_t *owner;
2748 	int result;
2749 	kmutex_t *mp;
2750 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2751 
2752 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2753 	power_req.req.ppm_power_lock_owner_req.who = dip;
2754 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2755 	    DDI_SUCCESS) {
2756 		/*
2757 		 * It is assumed that if the device is claimed by ppm, ppm
2758 		 * will always implement this request type and it'll always
2759 		 * return success. We panic here, if it fails.
2760 		 */
2761 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2762 		    PM_DEVICE(dip));
2763 		/*NOTREACHED*/
2764 	}
2765 
2766 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2767 	    owner->t_state == TS_SLEEP &&
2768 	    owner->t_sobj_ops &&
2769 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2770 	    (mp = (kmutex_t *)owner->t_wchan) &&
2771 	    mutex_owner(mp) == curthread)
2772 		return (1);
2773 
2774 	return (0);
2775 }
2776 
2777 /*
2778  * Notify parent which wants to hear about a child's power changes.
2779  */
2780 static void
2781 pm_notify_parent(dev_info_t *dip,
2782     dev_info_t *pdip, int comp, int old_level, int level)
2783 {
2784 	pm_bp_has_changed_t bphc;
2785 	pm_sp_misc_t pspm;
2786 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2787 	int result = DDI_SUCCESS;
2788 
2789 	bphc.bphc_dip = dip;
2790 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2791 	bphc.bphc_comp = comp;
2792 	bphc.bphc_olevel = old_level;
2793 	bphc.bphc_nlevel = level;
2794 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2795 	pspm.pspm_scan = 0;
2796 	bphc.bphc_private = &pspm;
2797 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2798 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2799 	kmem_free(pathbuf, MAXPATHLEN);
2800 }
2801 
2802 /*
2803  * Check if we need to resume a BC device, and make the attach call as required.
2804  */
2805 static int
2806 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2807 {
2808 	int ret = DDI_SUCCESS;
2809 
2810 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2811 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2812 		/* ppm is not interested in DDI_PM_RESUME */
2813 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2814 			/* XXX Should we mark it resumed, */
2815 			/* even though it failed? */
2816 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2817 			    PM_NAME(dip), PM_ADDR(dip));
2818 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2819 	}
2820 
2821 	return (ret);
2822 }
2823 
2824 /*
2825  * Tests outside the lock to see if we should bother to enqueue an entry
2826  * for any watching process.  If yes, then caller will take the lock and
2827  * do the full protocol
2828  */
2829 static int
2830 pm_watchers()
2831 {
2832 	if (pm_processes_stopped)
2833 		return (0);
2834 	return (pm_pscc_direct || pm_pscc_interest);
2835 }
2836 
2837 /*
2838  * A driver is reporting that the power of one of its device's components
2839  * has changed.  Update the power state accordingly.
2840  */
2841 int
2842 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2843 {
2844 	PMD_FUNC(pmf, "pm_power_has_changed")
2845 	int ret;
2846 	dev_info_t *pdip = ddi_get_parent(dip);
2847 	struct pm_component *cp;
2848 	int blocked, circ, pcirc, old_level;
2849 	static int pm_phc_impl(dev_info_t *, int, int, int);
2850 
2851 	if (level < 0) {
2852 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2853 		    PM_DEVICE(dip), level))
2854 		return (DDI_FAILURE);
2855 	}
2856 
2857 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2858 	    PM_DEVICE(dip), comp, level))
2859 
2860 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2861 	    !e_pm_valid_power(dip, comp, level))
2862 		return (DDI_FAILURE);
2863 
2864 	/*
2865 	 * A driver thread calling pm_power_has_changed and another thread
2866 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2867 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2868 	 * this specific deadlock.  If we can't get the lock immediately
2869 	 * and we are deadlocked, just update the component's level, do
2870 	 * notifications, and return.  We intend to update the total power
2871 	 * state later (if the other thread fails to set power to the
2872 	 * desired level).  If we were called because of a power change on a
2873 	 * component that isn't involved in a set_power op, update all state
2874 	 * immediately.
2875 	 */
2876 	cp = PM_CP(dip, comp);
2877 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2878 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2879 		    (cp->pmc_flags & PM_POWER_OP)) {
2880 			if (pm_watchers()) {
2881 				mutex_enter(&pm_rsvp_lock);
2882 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2883 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2884 				mutex_exit(&pm_rsvp_lock);
2885 			}
2886 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2887 				pm_notify_parent(dip,
2888 				    pdip, comp, cur_power(cp), level);
2889 			(void) pm_check_and_resume(dip,
2890 			    comp, cur_power(cp), level);
2891 
2892 			/*
2893 			 * Stash the old power index, update curpwr, and flag
2894 			 * that the total power state needs to be synched.
2895 			 */
2896 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2897 			/*
2898 			 * Several pm_power_has_changed calls could arrive
2899 			 * while the set power path remains blocked.  Keep the
2900 			 * oldest old power and the newest new power of any
2901 			 * sequence of phc calls which arrive during deadlock.
2902 			 */
2903 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2904 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2905 			cp->pmc_cur_pwr =
2906 			    pm_level_to_index(dip, cp, level);
2907 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2908 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2909 			return (DDI_SUCCESS);
2910 		} else
2911 			if (blocked) {	/* blocked, but different cmpt? */
2912 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2913 					cmn_err(CE_NOTE,
2914 					    "!pm: parent kuc not updated due "
2915 					    "to possible deadlock.\n");
2916 					return (pm_phc_impl(dip,
2917 						    comp, level, 1));
2918 				}
2919 				old_level = cur_power(cp);
2920 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2921 				    (!PM_ISBC(dip) || comp == 0) &&
2922 				    POWERING_ON(old_level, level))
2923 					pm_hold_power(pdip);
2924 				ret = pm_phc_impl(dip, comp, level, 1);
2925 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2926 					if ((!PM_ISBC(dip) ||
2927 					    comp == 0) && level == 0 &&
2928 					    old_level != PM_LEVEL_UNKNOWN)
2929 						pm_rele_power(pdip);
2930 				}
2931 				ndi_devi_exit(pdip, pcirc);
2932 				/* child lock not held: deadlock */
2933 				return (ret);
2934 			}
2935 		delay(1);
2936 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
2937 	}
2938 
2939 	/* non-deadlock case */
2940 	old_level = cur_power(cp);
2941 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2942 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
2943 		pm_hold_power(pdip);
2944 	ret = pm_phc_impl(dip, comp, level, 1);
2945 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2946 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
2947 		    old_level != PM_LEVEL_UNKNOWN)
2948 			pm_rele_power(pdip);
2949 	}
2950 	PM_UNLOCK_POWER(dip, circ);
2951 	ndi_devi_exit(pdip, pcirc);
2952 	return (ret);
2953 }
2954 
2955 /*
2956  * Account for power changes to a component of the the console frame buffer.
2957  * If lowering power from full (or "unkown", which is treatd as full)
2958  * we will increment the "components off" count of the fb device.
2959  * Subsequent lowering of the same component doesn't affect the count.  If
2960  * raising a component back to full power, we will decrement the count.
2961  *
2962  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
2963  */
2964 static int
2965 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
2966 {
2967 	struct pm_component *cp = PM_CP(dip, cmpt);
2968 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
2969 	int want_normal = (new == cp->pmc_norm_pwr);
2970 	int incr = 0;
2971 
2972 	if (on && !want_normal)
2973 		incr = 1;
2974 	else if (!on && want_normal)
2975 		incr = -1;
2976 	return (incr);
2977 }
2978 
2979 /*
2980  * Adjust the count of console frame buffer components < full power.
2981  */
2982 static void
2983 update_comps_off(int incr, dev_info_t *dip)
2984 {
2985 		mutex_enter(&pm_cfb_lock);
2986 		pm_cfb_comps_off += incr;
2987 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
2988 		mutex_exit(&pm_cfb_lock);
2989 }
2990 
2991 /*
2992  * Update the power state in the framework (via the ppm).  The 'notify'
2993  * argument tells whether to notify watchers.  Power lock is already held.
2994  */
2995 static int
2996 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
2997 {
2998 	PMD_FUNC(pmf, "phc_impl")
2999 	power_req_t power_req;
3000 	int i, dodeps = 0;
3001 	dev_info_t *pdip = ddi_get_parent(dip);
3002 	int result;
3003 	int old_level;
3004 	struct pm_component *cp;
3005 	int incr = 0;
3006 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3007 	int work_type = 0;
3008 	char *pathbuf;
3009 
3010 	/* Must use "official" power level for this test. */
3011 	cp = PM_CP(dip, comp);
3012 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3013 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3014 	if (old_level != PM_LEVEL_UNKNOWN)
3015 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3016 
3017 	if (level == old_level) {
3018 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3019 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3020 		return (DDI_SUCCESS);
3021 	}
3022 
3023 	/*
3024 	 * Tell ppm about this.
3025 	 */
3026 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3027 	power_req.req.ppm_notify_level_req.who = dip;
3028 	power_req.req.ppm_notify_level_req.cmpt = comp;
3029 	power_req.req.ppm_notify_level_req.new_level = level;
3030 	power_req.req.ppm_notify_level_req.old_level = old_level;
3031 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3032 	    &result) == DDI_FAILURE) {
3033 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3034 		    pmf, PM_DEVICE(dip), level))
3035 		return (DDI_FAILURE);
3036 	}
3037 
3038 	if (PM_IS_CFB(dip)) {
3039 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3040 
3041 		if (incr) {
3042 			update_comps_off(incr, dip);
3043 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3044 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3045 			    comp, old_level, level, pm_cfb_comps_off))
3046 		}
3047 	}
3048 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3049 	result = DDI_SUCCESS;
3050 
3051 	if (notify) {
3052 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3053 			pm_notify_parent(dip, pdip, comp, old_level, level);
3054 		(void) pm_check_and_resume(dip, comp, old_level, level);
3055 	}
3056 
3057 	/*
3058 	 * Decrement the dependency kidsup count if we turn a device
3059 	 * off.
3060 	 */
3061 	if (POWERING_OFF(old_level, level)) {
3062 		dodeps = 1;
3063 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3064 			cp = PM_CP(dip, i);
3065 			if (cur_power(cp)) {
3066 				dodeps = 0;
3067 				break;
3068 			}
3069 		}
3070 		if (dodeps)
3071 			work_type = PM_DEP_WK_POWER_OFF;
3072 	}
3073 
3074 	/*
3075 	 * Increment if we turn it on. Check to see
3076 	 * if other comps are already on, if so,
3077 	 * dont increment.
3078 	 */
3079 	if (POWERING_ON(old_level, level)) {
3080 		dodeps = 1;
3081 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3082 			cp = PM_CP(dip, i);
3083 			if (comp == i)
3084 				continue;
3085 			/* -1 also treated as 0 in this case */
3086 			if (cur_power(cp) > 0) {
3087 				dodeps = 0;
3088 				break;
3089 			}
3090 		}
3091 		if (dodeps)
3092 			work_type = PM_DEP_WK_POWER_ON;
3093 	}
3094 
3095 	if (dodeps) {
3096 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3097 		(void) ddi_pathname(dip, pathbuf);
3098 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3099 		    PM_DEP_NOWAIT, NULL, 0);
3100 		kmem_free(pathbuf, MAXPATHLEN);
3101 	}
3102 
3103 	if (notify && (level != old_level) && pm_watchers()) {
3104 		mutex_enter(&pm_rsvp_lock);
3105 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3106 		    PM_CANBLOCK_BLOCK);
3107 		mutex_exit(&pm_rsvp_lock);
3108 	}
3109 
3110 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3111 	pm_rescan(dip);
3112 	return (DDI_SUCCESS);
3113 }
3114 
3115 /*
3116  * This function is called at startup time to notify pm of the existence
3117  * of any platform power managers for this platform.  As a result of
3118  * this registration, each function provided will be called each time
3119  * a device node is attached, until one returns true, and it must claim the
3120  * device node (by returning non-zero) if it wants to be involved in the
3121  * node's power management.  If it does claim the node, then it will
3122  * subsequently be notified of attach and detach events.
3123  *
3124  */
3125 
3126 int
3127 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3128 {
3129 	PMD_FUNC(pmf, "register_ppm")
3130 	struct ppm_callbacks *ppmcp;
3131 	pm_component_t *cp;
3132 	int i, pwr, result, circ;
3133 	power_req_t power_req;
3134 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3135 	void pm_ppm_claim(dev_info_t *);
3136 
3137 	mutex_enter(&ppm_lock);
3138 	ppmcp = ppm_callbacks;
3139 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3140 		if (ppmcp->ppmc_func == NULL) {
3141 			ppmcp->ppmc_func = func;
3142 			ppmcp->ppmc_dip = dip;
3143 			break;
3144 		}
3145 	}
3146 	mutex_exit(&ppm_lock);
3147 
3148 	if (i >= MAX_PPM_HANDLERS)
3149 		return (DDI_FAILURE);
3150 	while ((dip = ddi_get_parent(dip)) != NULL) {
3151 		if (PM_GET_PM_INFO(dip) == NULL)
3152 			continue;
3153 		pm_ppm_claim(dip);
3154 		if (pm_ppm_claimed(dip)) {
3155 			/*
3156 			 * Tell ppm about this.
3157 			 */
3158 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3159 			p->old_level = PM_LEVEL_UNKNOWN;
3160 			p->who = dip;
3161 			PM_LOCK_POWER(dip, &circ);
3162 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3163 				cp = PM_CP(dip, i);
3164 				pwr = cp->pmc_cur_pwr;
3165 				if (pwr != PM_LEVEL_UNKNOWN) {
3166 					p->cmpt = i;
3167 					p->new_level = cur_power(cp);
3168 					p->old_level = PM_LEVEL_UNKNOWN;
3169 					if (pm_ctlops(PPM(dip), dip,
3170 					    DDI_CTLOPS_POWER, &power_req,
3171 					    &result) == DDI_FAILURE) {
3172 						PMD(PMD_FAIL, ("%s: pc "
3173 						    "%s@%s(%s#%d) to %d "
3174 						    "fails\n", pmf,
3175 						    PM_DEVICE(dip), pwr))
3176 					}
3177 				}
3178 			}
3179 			PM_UNLOCK_POWER(dip, circ);
3180 		}
3181 	}
3182 	return (DDI_SUCCESS);
3183 }
3184 
3185 /*
3186  * Call the ppm's that have registered and adjust the devinfo struct as
3187  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3188  * by each ppm are assumed to be disjoint.
3189  */
3190 void
3191 pm_ppm_claim(dev_info_t *dip)
3192 {
3193 	struct ppm_callbacks *ppmcp;
3194 
3195 	if (PPM(dip)) {
3196 		return;
3197 	}
3198 	mutex_enter(&ppm_lock);
3199 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3200 		if ((*ppmcp->ppmc_func)(dip)) {
3201 			DEVI(dip)->devi_pm_ppm =
3202 			    (struct dev_info *)ppmcp->ppmc_dip;
3203 			mutex_exit(&ppm_lock);
3204 			return;
3205 		}
3206 	}
3207 	mutex_exit(&ppm_lock);
3208 }
3209 
3210 /*
3211  * Node is being detached so stop autopm until we see if it succeeds, in which
3212  * case pm_stop will be called.  For backwards compatible devices we bring the
3213  * device up to full power on the assumption the detach will succeed.
3214  */
3215 void
3216 pm_detaching(dev_info_t *dip)
3217 {
3218 	PMD_FUNC(pmf, "detaching")
3219 	pm_info_t *info = PM_GET_PM_INFO(dip);
3220 	int iscons;
3221 
3222 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3223 	    PM_NUMCMPTS(dip)))
3224 	if (info == NULL)
3225 		return;
3226 	ASSERT(DEVI_IS_DETACHING(dip));
3227 	PM_LOCK_DIP(dip);
3228 	info->pmi_dev_pm_state |= PM_DETACHING;
3229 	PM_UNLOCK_DIP(dip);
3230 	if (!PM_ISBC(dip))
3231 		pm_scan_stop(dip);
3232 
3233 	/*
3234 	 * console and old-style devices get brought up when detaching.
3235 	 */
3236 	iscons = PM_IS_CFB(dip);
3237 	if (iscons || PM_ISBC(dip)) {
3238 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3239 		if (iscons) {
3240 			mutex_enter(&pm_cfb_lock);
3241 			while (cfb_inuse) {
3242 				mutex_exit(&pm_cfb_lock);
3243 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3244 				delay(1);
3245 				mutex_enter(&pm_cfb_lock);
3246 			}
3247 			ASSERT(cfb_dip_detaching == NULL);
3248 			ASSERT(cfb_dip);
3249 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3250 			cfb_dip = NULL;
3251 			mutex_exit(&pm_cfb_lock);
3252 		}
3253 	}
3254 }
3255 
3256 /*
3257  * Node failed to detach.  If it used to be autopm'd, make it so again.
3258  */
3259 void
3260 pm_detach_failed(dev_info_t *dip)
3261 {
3262 	PMD_FUNC(pmf, "detach_failed")
3263 	pm_info_t *info = PM_GET_PM_INFO(dip);
3264 	int pm_all_at_normal(dev_info_t *);
3265 
3266 	if (info == NULL)
3267 		return;
3268 	ASSERT(DEVI_IS_DETACHING(dip));
3269 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3270 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3271 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3272 			/* Make sure the operation is still needed */
3273 			if (!pm_all_at_normal(dip)) {
3274 				if (pm_all_to_normal(dip,
3275 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3276 					PMD(PMD_ERROR, ("%s: could not bring "
3277 					    "%s@%s(%s#%d) to normal\n", pmf,
3278 					    PM_DEVICE(dip)))
3279 				}
3280 			}
3281 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3282 		}
3283 	}
3284 	if (!PM_ISBC(dip)) {
3285 		mutex_enter(&pm_scan_lock);
3286 		if (PM_SCANABLE(dip))
3287 			pm_scan_init(dip);
3288 		mutex_exit(&pm_scan_lock);
3289 		pm_rescan(dip);
3290 	}
3291 }
3292 
3293 /* generic Backwards Compatible component */
3294 static char *bc_names[] = {"off", "on"};
3295 
3296 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3297 
3298 static void
3299 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3300 {
3301 	pm_comp_t *pmc;
3302 	pmc = &cp->pmc_comp;
3303 	pmc->pmc_numlevels = 2;
3304 	pmc->pmc_lvals[0] = 0;
3305 	pmc->pmc_lvals[1] = norm;
3306 	e_pm_set_cur_pwr(dip, cp, norm);
3307 }
3308 
3309 static void
3310 e_pm_default_components(dev_info_t *dip, int cmpts)
3311 {
3312 	int i;
3313 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3314 
3315 	p = DEVI(dip)->devi_pm_components;
3316 	for (i = 0; i < cmpts; i++, p++) {
3317 		p->pmc_comp = bc_comp;	/* struct assignment */
3318 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3319 		    KM_SLEEP);
3320 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3321 		    KM_SLEEP);
3322 		p->pmc_comp.pmc_numlevels = 2;
3323 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3324 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3325 	}
3326 }
3327 
3328 /*
3329  * Called from functions that require components to exist already to allow
3330  * for their creation by parsing the pm-components property.
3331  * Device will not be power managed as a result of this call
3332  * No locking needed because we're single threaded by the ndi_devi_enter
3333  * done while attaching, and the device isn't visible until after it has
3334  * attached
3335  */
3336 int
3337 pm_premanage(dev_info_t *dip, int style)
3338 {
3339 	PMD_FUNC(pmf, "premanage")
3340 	pm_comp_t	*pcp, *compp;
3341 	int		cmpts, i, norm, error;
3342 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3343 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3344 
3345 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3346 	/*
3347 	 * If this dip has already been processed, don't mess with it
3348 	 */
3349 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3350 		return (DDI_SUCCESS);
3351 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3352 		return (DDI_FAILURE);
3353 	}
3354 	/*
3355 	 * Look up pm-components property and create components accordingly
3356 	 * If that fails, fall back to backwards compatibility
3357 	 */
3358 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3359 		/*
3360 		 * If error is set, the property existed but was not well formed
3361 		 */
3362 		if (error || (style == PM_STYLE_NEW)) {
3363 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3364 			return (DDI_FAILURE);
3365 		}
3366 		/*
3367 		 * If they don't have the pm-components property, then we
3368 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3369 		 * behavior driver must have called pm_create_components, and
3370 		 * we need to flesh out dummy components
3371 		 */
3372 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3373 			/*
3374 			 * Not really failure, but we don't want the
3375 			 * caller to treat it as success
3376 			 */
3377 			return (DDI_FAILURE);
3378 		}
3379 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3380 		e_pm_default_components(dip, cmpts);
3381 		for (i = 0; i < cmpts; i++) {
3382 			/*
3383 			 * if normal power not set yet, we don't really know
3384 			 * what *ANY* of the power values are.  If normal
3385 			 * power is set, then we assume for this backwards
3386 			 * compatible case that the values are 0, normal power.
3387 			 */
3388 			norm = pm_get_normal_power(dip, i);
3389 			if (norm == (uint_t)-1) {
3390 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3391 				    PM_DEVICE(dip), i))
3392 				return (DDI_FAILURE);
3393 			}
3394 			/*
3395 			 * Components of BC devices start at their normal power,
3396 			 * so count them to be not at their lowest power.
3397 			 */
3398 			PM_INCR_NOTLOWEST(dip);
3399 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3400 		}
3401 	} else {
3402 		/*
3403 		 * e_pm_create_components was called from pm_autoconfig(), it
3404 		 * creates components with no descriptions (or known levels)
3405 		 */
3406 		cmpts = PM_NUMCMPTS(dip);
3407 		ASSERT(cmpts != 0);
3408 		pcp = compp;
3409 		p = DEVI(dip)->devi_pm_components;
3410 		for (i = 0; i < cmpts; i++, p++) {
3411 			p->pmc_comp = *pcp++;   /* struct assignment */
3412 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3413 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3414 		}
3415 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3416 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3417 			    PMC_CPU_THRESH);
3418 		else
3419 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3420 			    PMC_DEF_THRESH);
3421 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3422 	}
3423 	return (DDI_SUCCESS);
3424 }
3425 
3426 /*
3427  * Called from during or after the device's attach to let us know it is ready
3428  * to play autopm.   Look up the pm model and manage the device accordingly.
3429  * Returns system call errno value.
3430  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3431  * a little cleaner
3432  *
3433  * Called with dip lock held, return with dip lock unheld.
3434  */
3435 
3436 int
3437 e_pm_manage(dev_info_t *dip, int style)
3438 {
3439 	PMD_FUNC(pmf, "e_manage")
3440 	pm_info_t	*info;
3441 	dev_info_t	*pdip = ddi_get_parent(dip);
3442 	int	pm_thresh_specd(dev_info_t *);
3443 	int	count;
3444 	char	*pathbuf;
3445 
3446 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3447 		return (DDI_FAILURE);
3448 	}
3449 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3450 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3451 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3452 
3453 	/*
3454 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3455 	 * out at their normal power, so they are "up", others start out
3456 	 * unknown, which is effectively "up".  Parent which want notification
3457 	 * get kidsupcnt of 0 always.
3458 	 */
3459 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3460 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3461 		e_pm_hold_rele_power(pdip, count);
3462 
3463 	pm_set_pm_info(dip, info);
3464 	/*
3465 	 * Apply any recorded thresholds
3466 	 */
3467 	(void) pm_thresh_specd(dip);
3468 
3469 	/*
3470 	 * Do dependency processing.
3471 	 */
3472 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3473 	(void) ddi_pathname(dip, pathbuf);
3474 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3475 	    PM_DEP_NOWAIT, NULL, 0);
3476 	kmem_free(pathbuf, MAXPATHLEN);
3477 
3478 	if (!PM_ISBC(dip)) {
3479 		mutex_enter(&pm_scan_lock);
3480 		if (PM_SCANABLE(dip)) {
3481 			pm_scan_init(dip);
3482 			mutex_exit(&pm_scan_lock);
3483 			pm_rescan(dip);
3484 		} else {
3485 			mutex_exit(&pm_scan_lock);
3486 		}
3487 	}
3488 	return (0);
3489 }
3490 
3491 /*
3492  * This is the obsolete exported interface for a driver to find out its
3493  * "normal" (max) power.
3494  * We only get components destroyed while no power management is
3495  * going on (and the device is detached), so we don't need a mutex here
3496  */
3497 int
3498 pm_get_normal_power(dev_info_t *dip, int comp)
3499 {
3500 
3501 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3502 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3503 	}
3504 	return (DDI_FAILURE);
3505 }
3506 
3507 /*
3508  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3509  */
3510 int
3511 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3512 {
3513 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3514 		*levelp = PM_CURPOWER(dip, comp);
3515 		return (DDI_SUCCESS);
3516 	}
3517 	return (DDI_FAILURE);
3518 }
3519 
3520 /*
3521  * Returns current threshold of indicated component
3522  */
3523 static int
3524 cur_threshold(dev_info_t *dip, int comp)
3525 {
3526 	pm_component_t *cp = PM_CP(dip, comp);
3527 	int pwr;
3528 
3529 	if (PM_ISBC(dip)) {
3530 		/*
3531 		 * backwards compatible nodes only have one threshold
3532 		 */
3533 		return (cp->pmc_comp.pmc_thresh[1]);
3534 	}
3535 	pwr = cp->pmc_cur_pwr;
3536 	if (pwr == PM_LEVEL_UNKNOWN) {
3537 		int thresh;
3538 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3539 			thresh = pm_default_nexus_threshold;
3540 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3541 			thresh = pm_cpu_idle_threshold;
3542 		else
3543 			thresh = pm_system_idle_threshold;
3544 		return (thresh);
3545 	}
3546 	ASSERT(cp->pmc_comp.pmc_thresh);
3547 	return (cp->pmc_comp.pmc_thresh[pwr]);
3548 }
3549 
3550 /*
3551  * Compute next lower component power level given power index.
3552  */
3553 static int
3554 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3555 {
3556 	int nxt_pwr;
3557 
3558 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3559 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3560 	} else {
3561 		pwrndx--;
3562 		ASSERT(pwrndx >= 0);
3563 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3564 	}
3565 	return (nxt_pwr);
3566 }
3567 
3568 /*
3569  * Bring all components of device to normal power
3570  */
3571 int
3572 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3573 {
3574 	PMD_FUNC(pmf, "all_to_normal")
3575 	int		*normal;
3576 	int		i, ncomps, result;
3577 	size_t		size;
3578 	int		changefailed = 0;
3579 
3580 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3581 	ASSERT(PM_GET_PM_INFO(dip));
3582 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3583 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3584 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3585 		return (DDI_FAILURE);
3586 	}
3587 	ncomps = PM_NUMCMPTS(dip);
3588 	for (i = 0; i < ncomps; i++) {
3589 		if (pm_set_power(dip, i, normal[i],
3590 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3591 			changefailed++;
3592 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3593 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3594 			    PM_DEVICE(dip), i, normal[i], result))
3595 		}
3596 	}
3597 	kmem_free(normal, size);
3598 	if (changefailed) {
3599 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3600 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3601 		return (DDI_FAILURE);
3602 	}
3603 	return (DDI_SUCCESS);
3604 }
3605 
3606 /*
3607  * Returns true if all components of device are at normal power
3608  */
3609 int
3610 pm_all_at_normal(dev_info_t *dip)
3611 {
3612 	PMD_FUNC(pmf, "all_at_normal")
3613 	int		*normal;
3614 	int		i;
3615 	size_t		size;
3616 
3617 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3618 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3619 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3620 		return (DDI_FAILURE);
3621 	}
3622 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3623 		int current = PM_CURPOWER(dip, i);
3624 		if (normal[i] > current) {
3625 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3626 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3627 			    normal[i], current))
3628 			break;
3629 		}
3630 	}
3631 	kmem_free(normal, size);
3632 	if (i != PM_NUMCMPTS(dip)) {
3633 		return (0);
3634 	}
3635 	return (1);
3636 }
3637 
3638 static void
3639 bring_wekeeps_up(char *keeper)
3640 {
3641 	PMD_FUNC(pmf, "bring_wekeeps_up")
3642 	int i;
3643 	pm_pdr_t *dp;
3644 	pm_info_t *wku_info;
3645 	char *kept_path;
3646 	dev_info_t *kept;
3647 	static void bring_pmdep_up(dev_info_t *, int);
3648 
3649 	if (panicstr) {
3650 		return;
3651 	}
3652 	/*
3653 	 * We process the request even if the keeper detaches because
3654 	 * detach processing expects this to increment kidsupcnt of kept.
3655 	 */
3656 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3657 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3658 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3659 			continue;
3660 		for (i = 0; i < dp->pdr_kept_count; i++) {
3661 			kept_path = dp->pdr_kept_paths[i];
3662 			if (kept_path == NULL)
3663 				continue;
3664 			ASSERT(kept_path[0] != '\0');
3665 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3666 				continue;
3667 			wku_info = PM_GET_PM_INFO(kept);
3668 			if (wku_info == NULL) {
3669 				if (kept)
3670 					ddi_release_devi(kept);
3671 				continue;
3672 			}
3673 			/*
3674 			 * Don't mess with it if it is being detached, it isn't
3675 			 * safe to call its power entry point
3676 			 */
3677 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3678 				if (kept)
3679 					ddi_release_devi(kept);
3680 				continue;
3681 			}
3682 			bring_pmdep_up(kept, 1);
3683 			ddi_release_devi(kept);
3684 		}
3685 	}
3686 }
3687 
3688 /*
3689  * Bring up the 'kept' device passed as argument
3690  */
3691 static void
3692 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3693 {
3694 	PMD_FUNC(pmf, "bring_pmdep_up")
3695 	int is_all_at_normal = 0;
3696 
3697 	/*
3698 	 * If the kept device has been unmanaged, do nothing.
3699 	 */
3700 	if (!PM_GET_PM_INFO(kept_dip))
3701 		return;
3702 
3703 	/* Just ignore DIRECT PM device till they are released. */
3704 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3705 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3706 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3707 		    "controlling process did something else\n", pmf,
3708 		    PM_DEVICE(kept_dip)))
3709 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3710 		return;
3711 	}
3712 	/* if we got here the keeper had a transition from OFF->ON */
3713 	if (hold)
3714 		pm_hold_power(kept_dip);
3715 
3716 	if (!is_all_at_normal)
3717 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3718 }
3719 
3720 /*
3721  * A bunch of stuff that belongs only to the next routine (or two)
3722  */
3723 
3724 static const char namestr[] = "NAME=";
3725 static const int nameln = sizeof (namestr) - 1;
3726 static const char pmcompstr[] = "pm-components";
3727 
3728 struct pm_comp_pkg {
3729 	pm_comp_t		*comp;
3730 	struct pm_comp_pkg	*next;
3731 };
3732 
3733 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3734 
3735 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3736 			((ch) >= 'A' && (ch) <= 'F'))
3737 
3738 /*
3739  * Rather than duplicate this code ...
3740  * (this code excerpted from the function that follows it)
3741  */
3742 #define	FINISH_COMP { \
3743 	ASSERT(compp); \
3744 	compp->pmc_lnames_sz = size; \
3745 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3746 	compp->pmc_numlevels = level; \
3747 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3748 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3749 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3750 	/* copy string out of prop array into buffer */ \
3751 	for (j = 0; j < level; j++) { \
3752 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3753 		compp->pmc_lvals[j] = lvals[j]; \
3754 		(void) strcpy(tp, lnames[j]); \
3755 		compp->pmc_lnames[j] = tp; \
3756 		tp += lszs[j]; \
3757 	} \
3758 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3759 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3760 	}
3761 
3762 /*
3763  * Create (empty) component data structures.
3764  */
3765 static void
3766 e_pm_create_components(dev_info_t *dip, int num_components)
3767 {
3768 	struct pm_component *compp, *ocompp;
3769 	int i, size = 0;
3770 
3771 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3772 	ASSERT(!DEVI(dip)->devi_pm_components);
3773 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3774 	size = sizeof (struct pm_component) * num_components;
3775 
3776 	compp = kmem_zalloc(size, KM_SLEEP);
3777 	ocompp = compp;
3778 	DEVI(dip)->devi_pm_comp_size = size;
3779 	DEVI(dip)->devi_pm_num_components = num_components;
3780 	PM_LOCK_BUSY(dip);
3781 	for (i = 0; i < num_components;  i++) {
3782 		compp->pmc_timestamp = gethrestime_sec();
3783 		compp->pmc_norm_pwr = (uint_t)-1;
3784 		compp++;
3785 	}
3786 	PM_UNLOCK_BUSY(dip);
3787 	DEVI(dip)->devi_pm_components = ocompp;
3788 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3789 }
3790 
3791 /*
3792  * Parse hex or decimal value from char string
3793  */
3794 static char *
3795 pm_parsenum(char *cp, int *valp)
3796 {
3797 	int ch, offset;
3798 	char numbuf[256];
3799 	char *np = numbuf;
3800 	int value = 0;
3801 
3802 	ch = *cp++;
3803 	if (isdigit(ch)) {
3804 		if (ch == '0') {
3805 			if ((ch = *cp++) == 'x' || ch == 'X') {
3806 				ch = *cp++;
3807 				while (isxdigit(ch)) {
3808 					*np++ = (char)ch;
3809 					ch = *cp++;
3810 				}
3811 				*np = 0;
3812 				cp--;
3813 				goto hexval;
3814 			} else {
3815 				goto digit;
3816 			}
3817 		} else {
3818 digit:
3819 			while (isdigit(ch)) {
3820 				*np++ = (char)ch;
3821 				ch = *cp++;
3822 			}
3823 			*np = 0;
3824 			cp--;
3825 			goto decval;
3826 		}
3827 	} else
3828 		return (NULL);
3829 
3830 hexval:
3831 	for (np = numbuf; *np; np++) {
3832 		if (*np >= 'a' && *np <= 'f')
3833 			offset = 'a' - 10;
3834 		else if (*np >= 'A' && *np <= 'F')
3835 			offset = 'A' - 10;
3836 		else if (*np >= '0' && *np <= '9')
3837 			offset = '0';
3838 		value *= 16;
3839 		value += *np - offset;
3840 	}
3841 	*valp = value;
3842 	return (cp);
3843 
3844 decval:
3845 	offset = '0';
3846 	for (np = numbuf; *np; np++) {
3847 		value *= 10;
3848 		value += *np - offset;
3849 	}
3850 	*valp = value;
3851 	return (cp);
3852 }
3853 
3854 /*
3855  * Set max (previously documented as "normal") power.
3856  */
3857 static void
3858 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3859 {
3860 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3861 }
3862 
3863 /*
3864  * Internal routine for destroying components
3865  * It is called even when there might not be any, so it must be forgiving.
3866  */
3867 static void
3868 e_pm_destroy_components(dev_info_t *dip)
3869 {
3870 	int i;
3871 	struct pm_component *cp;
3872 
3873 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3874 	if (PM_NUMCMPTS(dip) == 0)
3875 		return;
3876 	cp = DEVI(dip)->devi_pm_components;
3877 	ASSERT(cp);
3878 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3879 		int nlevels = cp->pmc_comp.pmc_numlevels;
3880 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3881 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3882 		/*
3883 		 * For BC nodes, the rest is static in bc_comp, so skip it
3884 		 */
3885 		if (PM_ISBC(dip))
3886 			continue;
3887 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
3888 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
3889 		kmem_free(cp->pmc_comp.pmc_lname_buf,
3890 				cp->pmc_comp.pmc_lnames_sz);
3891 	}
3892 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
3893 	DEVI(dip)->devi_pm_components = NULL;
3894 	DEVI(dip)->devi_pm_num_components = 0;
3895 	DEVI(dip)->devi_pm_flags &=
3896 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
3897 }
3898 
3899 /*
3900  * Read the pm-components property (if there is one) and use it to set up
3901  * components.  Returns a pointer to an array of component structures if
3902  * pm-components found and successfully parsed, else returns NULL.
3903  * Sets error return *errp to true to indicate a failure (as opposed to no
3904  * property being present).
3905  */
3906 pm_comp_t *
3907 pm_autoconfig(dev_info_t *dip, int *errp)
3908 {
3909 	PMD_FUNC(pmf, "autoconfig")
3910 	uint_t nelems;
3911 	char **pp;
3912 	pm_comp_t *compp = NULL;
3913 	int i, j, level, components = 0;
3914 	size_t size = 0;
3915 	struct pm_comp_pkg *p, *ptail;
3916 	struct pm_comp_pkg *phead = NULL;
3917 	int *lvals = NULL;
3918 	int *lszs = NULL;
3919 	int *np = NULL;
3920 	int npi = 0;
3921 	char **lnames = NULL;
3922 	char *cp, *tp;
3923 	pm_comp_t *ret = NULL;
3924 
3925 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3926 	*errp = 0;	/* assume success */
3927 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3928 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
3929 		return (NULL);
3930 	}
3931 
3932 	if (nelems < 3) {	/* need at least one name and two levels */
3933 		goto errout;
3934 	}
3935 
3936 	/*
3937 	 * pm_create_components is no longer allowed
3938 	 */
3939 	if (PM_NUMCMPTS(dip) != 0) {
3940 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
3941 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
3942 		goto errout;
3943 	}
3944 
3945 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3946 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3947 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
3948 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3949 
3950 	level = 0;
3951 	phead = NULL;
3952 	for (i = 0; i < nelems; i++) {
3953 		cp = pp[i];
3954 		if (!isdigit(*cp)) {	/*  must be name */
3955 			if (strncmp(cp, namestr, nameln) != 0) {
3956 				goto errout;
3957 			}
3958 			if (i != 0) {
3959 				if (level == 0) {	/* no level spec'd */
3960 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
3961 					    pmf))
3962 					goto errout;
3963 				}
3964 				np[npi++] = lvals[level - 1];
3965 				/* finish up previous component levels */
3966 				FINISH_COMP;
3967 			}
3968 			cp += nameln;
3969 			if (!*cp) {
3970 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
3971 				goto errout;
3972 			}
3973 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
3974 			if (phead == NULL) {
3975 				phead = ptail = p;
3976 			} else {
3977 				ptail->next = p;
3978 				ptail = p;
3979 			}
3980 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
3981 			    KM_SLEEP);
3982 			compp->pmc_name_sz = strlen(cp) + 1;
3983 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
3984 			    KM_SLEEP);
3985 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
3986 			components++;
3987 			level = 0;
3988 		} else {	/* better be power level <num>=<name> */
3989 #ifdef DEBUG
3990 			tp = cp;
3991 #endif
3992 			if (i == 0 ||
3993 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
3994 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
3995 				goto errout;
3996 			}
3997 #ifdef DEBUG
3998 			tp = cp;
3999 #endif
4000 			if (*cp++ != '=' || !*cp) {
4001 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4002 				goto errout;
4003 			}
4004 
4005 			lszs[level] = strlen(cp) + 1;
4006 			size += lszs[level];
4007 			lnames[level] = cp;	/* points into prop string */
4008 			level++;
4009 		}
4010 	}
4011 	np[npi++] = lvals[level - 1];
4012 	if (level == 0) {	/* ended with a name */
4013 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4014 		goto errout;
4015 	}
4016 	FINISH_COMP;
4017 
4018 
4019 	/*
4020 	 * Now we have a list of components--we have to return instead an
4021 	 * array of them, but we can just copy the top level and leave
4022 	 * the rest as is
4023 	 */
4024 	(void) e_pm_create_components(dip, components);
4025 	for (i = 0; i < components; i++)
4026 		e_pm_set_max_power(dip, i, np[i]);
4027 
4028 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4029 	for (i = 0, p = phead; i < components; i++) {
4030 		ASSERT(p);
4031 		/*
4032 		 * Now sanity-check values:  levels must be monotonically
4033 		 * increasing
4034 		 */
4035 		if (p->comp->pmc_numlevels < 2) {
4036 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4037 			    "levels\n", pmf,
4038 			    p->comp->pmc_name, PM_DEVICE(dip),
4039 			    p->comp->pmc_numlevels))
4040 			goto errout;
4041 		}
4042 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4043 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4044 			    (p->comp->pmc_lvals[j] <=
4045 			    p->comp->pmc_lvals[j - 1]))) {
4046 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4047 				    "not mono. incr, %d follows %d\n", pmf,
4048 				    p->comp->pmc_name, PM_DEVICE(dip),
4049 				    p->comp->pmc_lvals[j],
4050 				    p->comp->pmc_lvals[j - 1]))
4051 				goto errout;
4052 			}
4053 		}
4054 		ret[i] = *p->comp;	/* struct assignment */
4055 		for (j = 0; j < i; j++) {
4056 			/*
4057 			 * Test for unique component names
4058 			 */
4059 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4060 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4061 				    "unique\n", pmf, ret[j].pmc_name,
4062 				    PM_DEVICE(dip)))
4063 				goto errout;
4064 			}
4065 		}
4066 		ptail = p;
4067 		p = p->next;
4068 		phead = p;	/* errout depends on phead making sense */
4069 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4070 		kmem_free(ptail, sizeof (*ptail));
4071 	}
4072 out:
4073 	ddi_prop_free(pp);
4074 	if (lvals)
4075 		kmem_free(lvals, nelems * sizeof (int));
4076 	if (lszs)
4077 		kmem_free(lszs, nelems * sizeof (int));
4078 	if (lnames)
4079 		kmem_free(lnames, nelems * sizeof (char *));
4080 	if (np)
4081 		kmem_free(np, nelems * sizeof (int));
4082 	return (ret);
4083 
4084 errout:
4085 	e_pm_destroy_components(dip);
4086 	*errp = 1;	/* signal failure */
4087 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4088 	for (i = 0; i < nelems - 1; i++)
4089 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4090 	if (nelems != 0)
4091 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4092 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4093 	for (p = phead; p; ) {
4094 		pm_comp_t *pp;
4095 		int n;
4096 
4097 		ptail = p;
4098 		/*
4099 		 * Free component data structures
4100 		 */
4101 		pp = p->comp;
4102 		n = pp->pmc_numlevels;
4103 		if (pp->pmc_name_sz) {
4104 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4105 		}
4106 		if (pp->pmc_lnames_sz) {
4107 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4108 		}
4109 		if (pp->pmc_lnames) {
4110 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4111 		}
4112 		if (pp->pmc_thresh) {
4113 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4114 		}
4115 		if (pp->pmc_lvals) {
4116 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4117 		}
4118 		p = ptail->next;
4119 		kmem_free(ptail, sizeof (*ptail));
4120 	}
4121 	if (ret != NULL)
4122 		kmem_free(ret, components * sizeof (pm_comp_t));
4123 	ret = NULL;
4124 	goto out;
4125 }
4126 
4127 /*
4128  * Set threshold values for a devices components by dividing the target
4129  * threshold (base) by the number of transitions and assign each transition
4130  * that threshold.  This will get the entire device down in the target time if
4131  * all components are idle and even if there are dependencies among components.
4132  *
4133  * Devices may well get powered all the way down before the target time, but
4134  * at least the EPA will be happy.
4135  */
4136 void
4137 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4138 {
4139 	PMD_FUNC(pmf, "set_device_threshold")
4140 	int target_threshold = (base * 95) / 100;
4141 	int level, comp;		/* loop counters */
4142 	int transitions = 0;
4143 	int ncomp = PM_NUMCMPTS(dip);
4144 	int thresh;
4145 	int remainder;
4146 	pm_comp_t *pmc;
4147 	int i, circ;
4148 
4149 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4150 	PM_LOCK_DIP(dip);
4151 	/*
4152 	 * First we handle the easy one.  If we're setting the default
4153 	 * threshold for a node with children, then we set it to the
4154 	 * default nexus threshold (currently 0) and mark it as default
4155 	 * nexus threshold instead
4156 	 */
4157 	if (PM_IS_NEXUS(dip)) {
4158 		if (flag == PMC_DEF_THRESH) {
4159 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4160 			    PM_DEVICE(dip)))
4161 			thresh = pm_default_nexus_threshold;
4162 			for (comp = 0; comp < ncomp; comp++) {
4163 				pmc = &PM_CP(dip, comp)->pmc_comp;
4164 				for (level = 1; level < pmc->pmc_numlevels;
4165 				    level++) {
4166 					pmc->pmc_thresh[level] = thresh;
4167 				}
4168 			}
4169 			DEVI(dip)->devi_pm_dev_thresh =
4170 			    pm_default_nexus_threshold;
4171 			/*
4172 			 * If the nexus node is being reconfigured back to
4173 			 * the default threshold, adjust the notlowest count.
4174 			 */
4175 			if (DEVI(dip)->devi_pm_flags &
4176 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4177 				PM_LOCK_POWER(dip, &circ);
4178 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4179 					if (PM_CURPOWER(dip, i) == 0)
4180 						continue;
4181 					mutex_enter(&pm_compcnt_lock);
4182 					ASSERT(pm_comps_notlowest);
4183 					pm_comps_notlowest--;
4184 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4185 					    "notlowest to %d\n", pmf,
4186 					    PM_DEVICE(dip), pm_comps_notlowest))
4187 					if (pm_comps_notlowest == 0)
4188 						pm_ppm_notify_all_lowest(dip,
4189 						    PM_ALL_LOWEST);
4190 					mutex_exit(&pm_compcnt_lock);
4191 				}
4192 				PM_UNLOCK_POWER(dip, circ);
4193 			}
4194 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4195 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4196 			PM_UNLOCK_DIP(dip);
4197 			return;
4198 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4199 			/*
4200 			 * If the nexus node is being configured for a
4201 			 * non-default threshold, include that node in
4202 			 * the notlowest accounting.
4203 			 */
4204 			PM_LOCK_POWER(dip, &circ);
4205 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4206 				if (PM_CURPOWER(dip, i) == 0)
4207 					continue;
4208 				mutex_enter(&pm_compcnt_lock);
4209 				if (pm_comps_notlowest == 0)
4210 					pm_ppm_notify_all_lowest(dip,
4211 					    PM_NOT_ALL_LOWEST);
4212 				pm_comps_notlowest++;
4213 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4214 				    "notlowest to %d\n", pmf,
4215 				    PM_DEVICE(dip), pm_comps_notlowest))
4216 				mutex_exit(&pm_compcnt_lock);
4217 			}
4218 			PM_UNLOCK_POWER(dip, circ);
4219 		}
4220 	}
4221 	/*
4222 	 * Compute the total number of transitions for all components
4223 	 * of the device.  Distribute the threshold evenly over them
4224 	 */
4225 	for (comp = 0; comp < ncomp; comp++) {
4226 		pmc = &PM_CP(dip, comp)->pmc_comp;
4227 		ASSERT(pmc->pmc_numlevels > 1);
4228 		transitions += pmc->pmc_numlevels - 1;
4229 	}
4230 	ASSERT(transitions);
4231 	thresh = target_threshold / transitions;
4232 
4233 	for (comp = 0; comp < ncomp; comp++) {
4234 		pmc = &PM_CP(dip, comp)->pmc_comp;
4235 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4236 			pmc->pmc_thresh[level] = thresh;
4237 		}
4238 	}
4239 
4240 #ifdef DEBUG
4241 	for (comp = 0; comp < ncomp; comp++) {
4242 		pmc = &PM_CP(dip, comp)->pmc_comp;
4243 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4244 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4245 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4246 			    comp, level, pmc->pmc_thresh[level]))
4247 		}
4248 	}
4249 #endif
4250 	/*
4251 	 * Distribute any remainder till they are all gone
4252 	 */
4253 	remainder = target_threshold - thresh * transitions;
4254 	level = 1;
4255 #ifdef DEBUG
4256 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4257 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4258 	    transitions))
4259 #endif
4260 	while (remainder > 0) {
4261 		comp = 0;
4262 		while (remainder && (comp < ncomp)) {
4263 			pmc = &PM_CP(dip, comp)->pmc_comp;
4264 			if (level < pmc->pmc_numlevels) {
4265 				pmc->pmc_thresh[level] += 1;
4266 				remainder--;
4267 			}
4268 			comp++;
4269 		}
4270 		level++;
4271 	}
4272 #ifdef DEBUG
4273 	for (comp = 0; comp < ncomp; comp++) {
4274 		pmc = &PM_CP(dip, comp)->pmc_comp;
4275 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4276 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4277 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4278 			    comp, level, pmc->pmc_thresh[level]))
4279 		}
4280 	}
4281 #endif
4282 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4283 	DEVI(dip)->devi_pm_dev_thresh = base;
4284 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4285 	DEVI(dip)->devi_pm_flags |= flag;
4286 	PM_UNLOCK_DIP(dip);
4287 }
4288 
4289 /*
4290  * Called when there is no old-style platform power management driver
4291  */
4292 static int
4293 ddi_no_platform_power(power_req_t *req)
4294 {
4295 	_NOTE(ARGUNUSED(req))
4296 	return (DDI_FAILURE);
4297 }
4298 
4299 /*
4300  * This function calls the entry point supplied by the platform-specific
4301  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4302  * The use of global for getting the  function name from platform-specific
4303  * pm driver is not ideal, but it is simple and efficient.
4304  * The previous property lookup was being done in the idle loop on swift
4305  * systems without pmc chips and hurt deskbench performance as well as
4306  * violating scheduler locking rules
4307  */
4308 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4309 
4310 /*
4311  * Old obsolete interface for a device to request a power change (but only
4312  * an increase in power)
4313  */
4314 int
4315 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4316 {
4317 	return (pm_raise_power(dip, cmpt, level));
4318 }
4319 
4320 /*
4321  * The old obsolete interface to platform power management.  Only used by
4322  * Gypsy platform and APM on X86.
4323  */
4324 int
4325 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4326 {
4327 	power_req_t	request;
4328 
4329 	request.request_type = PMR_SET_POWER;
4330 	request.req.set_power_req.who = dip;
4331 	request.req.set_power_req.cmpt = pm_cmpt;
4332 	request.req.set_power_req.level = pm_level;
4333 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4334 }
4335 
4336 /*
4337  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4338  * passed.  Returns true if subsequent processing could result in power being
4339  * removed from the device.  The arg is not currently used because it is
4340  * implicit in the operation of cpr/DR.
4341  */
4342 int
4343 ddi_removing_power(dev_info_t *dip)
4344 {
4345 	_NOTE(ARGUNUSED(dip))
4346 	return (pm_powering_down);
4347 }
4348 
4349 /*
4350  * Returns true if a device indicates that its parent handles suspend/resume
4351  * processing for it.
4352  */
4353 int
4354 e_ddi_parental_suspend_resume(dev_info_t *dip)
4355 {
4356 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4357 }
4358 
4359 /*
4360  * Called for devices which indicate that their parent does suspend/resume
4361  * handling for them
4362  */
4363 int
4364 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4365 {
4366 	power_req_t	request;
4367 	request.request_type = PMR_SUSPEND;
4368 	request.req.suspend_req.who = dip;
4369 	request.req.suspend_req.cmd = cmd;
4370 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4371 }
4372 
4373 /*
4374  * Called for devices which indicate that their parent does suspend/resume
4375  * handling for them
4376  */
4377 int
4378 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4379 {
4380 	power_req_t	request;
4381 	request.request_type = PMR_RESUME;
4382 	request.req.resume_req.who = dip;
4383 	request.req.resume_req.cmd = cmd;
4384 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4385 }
4386 
4387 /*
4388  * Old obsolete exported interface for drivers to create components.
4389  * This is now handled by exporting the pm-components property.
4390  */
4391 int
4392 pm_create_components(dev_info_t *dip, int num_components)
4393 {
4394 	PMD_FUNC(pmf, "pm_create_components")
4395 
4396 	if (num_components < 1)
4397 		return (DDI_FAILURE);
4398 
4399 	if (!DEVI_IS_ATTACHING(dip)) {
4400 		return (DDI_FAILURE);
4401 	}
4402 
4403 	/* don't need to lock dip because attach is single threaded */
4404 	if (DEVI(dip)->devi_pm_components) {
4405 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4406 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4407 		return (DDI_FAILURE);
4408 	}
4409 	e_pm_create_components(dip, num_components);
4410 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4411 	e_pm_default_components(dip, num_components);
4412 	return (DDI_SUCCESS);
4413 }
4414 
4415 /*
4416  * Obsolete interface previously called by drivers to destroy their components
4417  * at detach time.  This is now done automatically.  However, we need to keep
4418  * this for the old drivers.
4419  */
4420 void
4421 pm_destroy_components(dev_info_t *dip)
4422 {
4423 	PMD_FUNC(pmf, "pm_destroy_components")
4424 	dev_info_t *pdip = ddi_get_parent(dip);
4425 
4426 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4427 	    PM_DEVICE(dip)))
4428 	ASSERT(DEVI_IS_DETACHING(dip));
4429 #ifdef DEBUG
4430 	if (!PM_ISBC(dip))
4431 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4432 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4433 		    PM_ADDR(dip));
4434 #endif
4435 	/*
4436 	 * We ignore this unless this is an old-style driver, except for
4437 	 * printing the message above
4438 	 */
4439 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4440 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4441 		    PM_DEVICE(dip)))
4442 		return;
4443 	}
4444 	ASSERT(PM_GET_PM_INFO(dip));
4445 
4446 	/*
4447 	 * pm_unmanage will clear info pointer later, after dealing with
4448 	 * dependencies
4449 	 */
4450 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4451 	/*
4452 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4453 	 * Parents that get notification are not adjusted because their
4454 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4455 	 */
4456 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4457 		pm_rele_power(pdip);
4458 #ifdef DEBUG
4459 	else {
4460 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4461 		    pmf, PM_DEVICE(dip)))
4462 	}
4463 #endif
4464 	e_pm_destroy_components(dip);
4465 	/*
4466 	 * Forget we ever knew anything about the components of this  device
4467 	 */
4468 	DEVI(dip)->devi_pm_flags &=
4469 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4470 }
4471 
4472 /*
4473  * Exported interface for a driver to set a component busy.
4474  */
4475 int
4476 pm_busy_component(dev_info_t *dip, int cmpt)
4477 {
4478 	struct pm_component *cp;
4479 
4480 	ASSERT(dip != NULL);
4481 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4482 		return (DDI_FAILURE);
4483 	PM_LOCK_BUSY(dip);
4484 	cp->pmc_busycount++;
4485 	cp->pmc_timestamp = 0;
4486 	PM_UNLOCK_BUSY(dip);
4487 	return (DDI_SUCCESS);
4488 }
4489 
4490 /*
4491  * Exported interface for a driver to set a component idle.
4492  */
4493 int
4494 pm_idle_component(dev_info_t *dip, int cmpt)
4495 {
4496 	PMD_FUNC(pmf, "pm_idle_component")
4497 	struct pm_component *cp;
4498 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4499 
4500 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4501 		return (DDI_FAILURE);
4502 
4503 	PM_LOCK_BUSY(dip);
4504 	if (cp->pmc_busycount) {
4505 		if (--(cp->pmc_busycount) == 0)
4506 			cp->pmc_timestamp = gethrestime_sec();
4507 	} else {
4508 		cp->pmc_timestamp = gethrestime_sec();
4509 	}
4510 
4511 	PM_UNLOCK_BUSY(dip);
4512 
4513 	/*
4514 	 * if device becomes idle during idle down period, try scan it down
4515 	 */
4516 	if (scanp && PM_IS_PID(dip)) {
4517 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4518 		    PM_DEVICE(dip)))
4519 		pm_rescan(dip);
4520 		return (DDI_SUCCESS);
4521 	}
4522 
4523 	/*
4524 	 * handle scan not running with nexus threshold == 0
4525 	 */
4526 
4527 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4528 		pm_rescan(dip);
4529 	}
4530 
4531 	return (DDI_SUCCESS);
4532 }
4533 
4534 /*
4535  * This is the old  obsolete interface called by drivers to set their normal
4536  * power.  Thus we can't fix its behavior or return a value.
4537  * This functionality is replaced by the pm-component property.
4538  * We'll only get components destroyed while no power management is
4539  * going on (and the device is detached), so we don't need a mutex here
4540  */
4541 void
4542 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4543 {
4544 	PMD_FUNC(pmf, "set_normal_power")
4545 #ifdef DEBUG
4546 	if (!PM_ISBC(dip))
4547 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4548 		    "(driver exporting pm-components property) ignored",
4549 		    PM_NAME(dip), PM_ADDR(dip));
4550 #endif
4551 	if (PM_ISBC(dip)) {
4552 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4553 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4554 		e_pm_set_max_power(dip, comp, level);
4555 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4556 	}
4557 }
4558 
4559 /*
4560  * Called on a successfully detached driver to free pm resources
4561  */
4562 static void
4563 pm_stop(dev_info_t *dip)
4564 {
4565 	PMD_FUNC(pmf, "stop")
4566 	dev_info_t *pdip = ddi_get_parent(dip);
4567 
4568 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4569 	/* stopping scan, destroy scan data structure */
4570 	if (!PM_ISBC(dip)) {
4571 		pm_scan_stop(dip);
4572 		pm_scan_fini(dip);
4573 	}
4574 
4575 	if (PM_GET_PM_INFO(dip) != NULL) {
4576 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4577 			/*
4578 			 * Old style driver may have called
4579 			 * pm_destroy_components already, but just in case ...
4580 			 */
4581 			e_pm_destroy_components(dip);
4582 		} else {
4583 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4584 			    pmf, PM_DEVICE(dip)))
4585 		}
4586 	} else {
4587 		if (PM_NUMCMPTS(dip))
4588 			e_pm_destroy_components(dip);
4589 		else {
4590 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4591 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4592 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4593 					pm_rele_power(pdip);
4594 				} else if (pdip &&
4595 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4596 					(void) mdi_power(pdip,
4597 					    MDI_PM_RELE_POWER,
4598 					    (void *)dip, NULL, 0);
4599 				}
4600 			}
4601 		}
4602 	}
4603 }
4604 
4605 /*
4606  * The node is the subject of a reparse pm props ioctl. Throw away the old
4607  * info and start over.
4608  */
4609 int
4610 e_new_pm_props(dev_info_t *dip)
4611 {
4612 	if (PM_GET_PM_INFO(dip) != NULL) {
4613 		pm_stop(dip);
4614 
4615 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4616 			return (DDI_FAILURE);
4617 		}
4618 	}
4619 	e_pm_props(dip);
4620 	return (DDI_SUCCESS);
4621 }
4622 
4623 /*
4624  * Device has been attached, so process its pm properties
4625  */
4626 void
4627 e_pm_props(dev_info_t *dip)
4628 {
4629 	char *pp;
4630 	int len;
4631 	int flags = 0;
4632 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4633 
4634 	/*
4635 	 * It doesn't matter if we do this more than once, we should always
4636 	 * get the same answers, and if not, then the last one in is the
4637 	 * best one.
4638 	 */
4639 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4640 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4641 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4642 			flags = PMC_NEEDS_SR;
4643 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4644 			flags = PMC_NO_SR;
4645 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4646 			flags = PMC_PARENTAL_SR;
4647 		} else {
4648 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4649 			    "%s property value '%s'", PM_NAME(dip),
4650 			    PM_ADDR(dip), "pm-hardware-state", pp);
4651 		}
4652 		kmem_free(pp, len);
4653 	}
4654 	/*
4655 	 * This next segment (PMC_WANTS_NOTIFY) is in
4656 	 * support of nexus drivers which will want to be involved in
4657 	 * (or at least notified of) their child node's power level transitions.
4658 	 * "pm-want-child-notification?" is defined by the parent.
4659 	 */
4660 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4661 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4662 		flags |= PMC_WANTS_NOTIFY;
4663 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4664 	    dip, propflag, "pm-want-child-notification?"));
4665 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4666 	    "no-involuntary-power-cycles"))
4667 		flags |= PMC_NO_INVOL;
4668 	/*
4669 	 * Is the device a CPU device?
4670 	 */
4671 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4672 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4673 		if (strcmp(pp, "CPU") == 0) {
4674 			flags |= PMC_CPU_DEVICE;
4675 		} else {
4676 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4677 			    "%s property value '%s'", PM_NAME(dip),
4678 			PM_ADDR(dip), "pm-class", pp);
4679 		}
4680 		kmem_free(pp, len);
4681 	}
4682 	/* devfs single threads us */
4683 	DEVI(dip)->devi_pm_flags |= flags;
4684 }
4685 
4686 /*
4687  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4688  * driver which has claimed a node.
4689  * Sets old_power in arg struct.
4690  */
4691 static int
4692 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4693     ddi_ctl_enum_t ctlop, void *arg, void *result)
4694 {
4695 	_NOTE(ARGUNUSED(dip))
4696 	PMD_FUNC(pmf, "ctlops")
4697 	power_req_t *reqp = (power_req_t *)arg;
4698 	int retval;
4699 	dev_info_t *target_dip;
4700 	int new_level, old_level, cmpt;
4701 #ifdef DEBUG
4702 	char *format;
4703 #endif
4704 
4705 	/*
4706 	 * The interface for doing the actual power level changes is now
4707 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4708 	 * different platform-specific power control drivers.
4709 	 *
4710 	 * This driver implements the "default" version of this interface.
4711 	 * If no ppm driver has been installed then this interface is called
4712 	 * instead.
4713 	 */
4714 	ASSERT(dip == NULL);
4715 	switch (ctlop) {
4716 	case DDI_CTLOPS_POWER:
4717 		switch (reqp->request_type) {
4718 		case PMR_PPM_SET_POWER:
4719 		{
4720 			target_dip = reqp->req.ppm_set_power_req.who;
4721 			ASSERT(target_dip == rdip);
4722 			new_level = reqp->req.ppm_set_power_req.new_level;
4723 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4724 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4725 			old_level = PM_CURPOWER(target_dip, cmpt);
4726 			reqp->req.ppm_set_power_req.old_level = old_level;
4727 			retval = pm_power(target_dip, cmpt, new_level);
4728 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4729 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4730 			    old_level, new_level, (retval == DDI_SUCCESS ?
4731 			    "chd" : "no chg")))
4732 			return (retval);
4733 		}
4734 
4735 		case PMR_PPM_PRE_DETACH:
4736 		case PMR_PPM_POST_DETACH:
4737 		case PMR_PPM_PRE_ATTACH:
4738 		case PMR_PPM_POST_ATTACH:
4739 		case PMR_PPM_PRE_PROBE:
4740 		case PMR_PPM_POST_PROBE:
4741 		case PMR_PPM_PRE_RESUME:
4742 		case PMR_PPM_INIT_CHILD:
4743 		case PMR_PPM_UNINIT_CHILD:
4744 #ifdef DEBUG
4745 			switch (reqp->request_type) {
4746 				case PMR_PPM_PRE_DETACH:
4747 					format = "%s: PMR_PPM_PRE_DETACH "
4748 					    "%s@%s(%s#%d)\n";
4749 					break;
4750 				case PMR_PPM_POST_DETACH:
4751 					format = "%s: PMR_PPM_POST_DETACH "
4752 					    "%s@%s(%s#%d) rets %d\n";
4753 					break;
4754 				case PMR_PPM_PRE_ATTACH:
4755 					format = "%s: PMR_PPM_PRE_ATTACH "
4756 					    "%s@%s(%s#%d)\n";
4757 					break;
4758 				case PMR_PPM_POST_ATTACH:
4759 					format = "%s: PMR_PPM_POST_ATTACH "
4760 					    "%s@%s(%s#%d) rets %d\n";
4761 					break;
4762 				case PMR_PPM_PRE_PROBE:
4763 					format = "%s: PMR_PPM_PRE_PROBE "
4764 					    "%s@%s(%s#%d)\n";
4765 					break;
4766 				case PMR_PPM_POST_PROBE:
4767 					format = "%s: PMR_PPM_POST_PROBE "
4768 					    "%s@%s(%s#%d) rets %d\n";
4769 					break;
4770 				case PMR_PPM_PRE_RESUME:
4771 					format = "%s: PMR_PPM_PRE_RESUME "
4772 					    "%s@%s(%s#%d) rets %d\n";
4773 					break;
4774 				case PMR_PPM_INIT_CHILD:
4775 					format = "%s: PMR_PPM_INIT_CHILD "
4776 					    "%s@%s(%s#%d)\n";
4777 					break;
4778 				case PMR_PPM_UNINIT_CHILD:
4779 					format = "%s: PMR_PPM_UNINIT_CHILD "
4780 					    "%s@%s(%s#%d)\n";
4781 					break;
4782 				default:
4783 					break;
4784 			}
4785 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4786 			    reqp->req.ppm_config_req.result))
4787 #endif
4788 			return (DDI_SUCCESS);
4789 
4790 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4791 			/*
4792 			 * Nothing for us to do
4793 			 */
4794 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4795 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4796 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4797 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4798 			    reqp->req.ppm_notify_level_req.cmpt,
4799 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4800 			    reqp->req.ppm_notify_level_req.cmpt),
4801 			    reqp->req.ppm_notify_level_req.new_level))
4802 			return (DDI_SUCCESS);
4803 
4804 		case PMR_PPM_UNMANAGE:
4805 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4806 			    pmf, PM_DEVICE(rdip)))
4807 			return (DDI_SUCCESS);
4808 
4809 		case PMR_PPM_LOCK_POWER:
4810 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4811 			    reqp->req.ppm_lock_power_req.circp);
4812 			return (DDI_SUCCESS);
4813 
4814 		case PMR_PPM_UNLOCK_POWER:
4815 			pm_unlock_power_single(
4816 			    reqp->req.ppm_unlock_power_req.who,
4817 			    reqp->req.ppm_unlock_power_req.circ);
4818 			return (DDI_SUCCESS);
4819 
4820 		case PMR_PPM_TRY_LOCK_POWER:
4821 			*(int *)result = pm_try_locking_power_single(
4822 			    reqp->req.ppm_lock_power_req.who,
4823 			    reqp->req.ppm_lock_power_req.circp);
4824 			return (DDI_SUCCESS);
4825 
4826 		case PMR_PPM_POWER_LOCK_OWNER:
4827 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4828 			ASSERT(target_dip == rdip);
4829 			reqp->req.ppm_power_lock_owner_req.owner =
4830 			    DEVI(rdip)->devi_busy_thread;
4831 			return (DDI_SUCCESS);
4832 		default:
4833 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4834 			return (DDI_FAILURE);
4835 		}
4836 
4837 	default:
4838 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4839 		return (DDI_FAILURE);
4840 	}
4841 }
4842 
4843 /*
4844  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4845  * power_ops struct for this functionality instead?
4846  * However, we only ever do this on a ppm driver.
4847  */
4848 int
4849 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4850 {
4851 	int (*fp)();
4852 
4853 	/* if no ppm handler, call the default routine */
4854 	if (d == NULL) {
4855 		return (pm_default_ctlops(d, r, op, a, v));
4856 	}
4857 	if (!d || !r)
4858 		return (DDI_FAILURE);
4859 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4860 		DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4861 
4862 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4863 	return ((*fp)(d, r, op, a, v));
4864 }
4865 
4866 /*
4867  * Called on a node when attach completes or the driver makes its first pm
4868  * call (whichever comes first).
4869  * In the attach case, device may not be power manageable at all.
4870  * Don't need to lock the dip because we're single threaded by the devfs code
4871  */
4872 static int
4873 pm_start(dev_info_t *dip)
4874 {
4875 	PMD_FUNC(pmf, "start")
4876 	int ret;
4877 	dev_info_t *pdip = ddi_get_parent(dip);
4878 	int e_pm_manage(dev_info_t *, int);
4879 	void pm_noinvol_specd(dev_info_t *dip);
4880 
4881 	e_pm_props(dip);
4882 	pm_noinvol_specd(dip);
4883 	/*
4884 	 * If this dip has already been processed, don't mess with it
4885 	 * (but decrement the speculative count we did above, as whatever
4886 	 * code put it under pm already will have dealt with it)
4887 	 */
4888 	if (PM_GET_PM_INFO(dip)) {
4889 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
4890 		    pmf, PM_DEVICE(dip)))
4891 		return (0);
4892 	}
4893 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
4894 
4895 	if (PM_GET_PM_INFO(dip) == NULL) {
4896 		/*
4897 		 * keep the kidsupcount increment as is
4898 		 */
4899 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
4900 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4901 			pm_hold_power(pdip);
4902 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4903 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
4904 			    (void *)dip, NULL, 0);
4905 		}
4906 
4907 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
4908 		    "left up\n", pmf, PM_DEVICE(dip)))
4909 	}
4910 
4911 	return (ret);
4912 }
4913 
4914 /*
4915  * Keep a list of recorded thresholds.  For now we just keep a list and
4916  * search it linearly.  We don't expect too many entries.  Can always hash it
4917  * later if we need to.
4918  */
4919 void
4920 pm_record_thresh(pm_thresh_rec_t *rp)
4921 {
4922 	pm_thresh_rec_t *pptr, *ptr;
4923 
4924 	ASSERT(*rp->ptr_physpath);
4925 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
4926 	for (pptr = NULL, ptr = pm_thresh_head;
4927 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
4928 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
4929 			/* replace this one */
4930 			rp->ptr_next = ptr->ptr_next;
4931 			if (pptr) {
4932 				pptr->ptr_next = rp;
4933 			} else {
4934 				pm_thresh_head = rp;
4935 			}
4936 			rw_exit(&pm_thresh_rwlock);
4937 			kmem_free(ptr, ptr->ptr_size);
4938 			return;
4939 		}
4940 		continue;
4941 	}
4942 	/*
4943 	 * There was not a match in the list, insert this one in front
4944 	 */
4945 	if (pm_thresh_head) {
4946 		rp->ptr_next = pm_thresh_head;
4947 		pm_thresh_head = rp;
4948 	} else {
4949 		rp->ptr_next = NULL;
4950 		pm_thresh_head = rp;
4951 	}
4952 	rw_exit(&pm_thresh_rwlock);
4953 }
4954 
4955 /*
4956  * Create a new dependency record and hang a new dependency entry off of it
4957  */
4958 pm_pdr_t *
4959 newpdr(char *kept, char *keeps, int isprop)
4960 {
4961 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
4962 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
4963 	p->pdr_size = size;
4964 	p->pdr_isprop = isprop;
4965 	p->pdr_kept_paths = NULL;
4966 	p->pdr_kept_count = 0;
4967 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
4968 	(void) strcpy(p->pdr_kept, kept);
4969 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
4970 	(void) strcpy(p->pdr_keeper, keeps);
4971 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
4972 	    (intptr_t)p + size);
4973 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
4974 	    (intptr_t)p + size);
4975 	return (p);
4976 }
4977 
4978 /*
4979  * Keep a list of recorded dependencies.  We only keep the
4980  * keeper -> kept list for simplification. At this point We do not
4981  * care about whether the devices are attached or not yet,
4982  * this would be done in pm_keeper() and pm_kept().
4983  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
4984  * and it is up to the user to issue the ioctl again if they want it
4985  * (e.g. pmconfig)
4986  * Returns true if dependency already exists in the list.
4987  */
4988 int
4989 pm_record_keeper(char *kept, char *keeper, int isprop)
4990 {
4991 	PMD_FUNC(pmf, "record_keeper")
4992 	pm_pdr_t *npdr, *ppdr, *pdr;
4993 
4994 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
4995 	ASSERT(kept && keeper);
4996 #ifdef DEBUG
4997 	if (pm_debug & PMD_KEEPS)
4998 		prdeps("pm_record_keeper entry");
4999 #endif
5000 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5001 	    ppdr = pdr, pdr = pdr->pdr_next) {
5002 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5003 		    pdr->pdr_keeper))
5004 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5005 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5006 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5007 			return (1);
5008 		}
5009 	}
5010 	/*
5011 	 * We did not find any match, so we have to make an entry
5012 	 */
5013 	npdr = newpdr(kept, keeper, isprop);
5014 	if (ppdr) {
5015 		ASSERT(ppdr->pdr_next == NULL);
5016 		ppdr->pdr_next = npdr;
5017 	} else {
5018 		ASSERT(pm_dep_head == NULL);
5019 		pm_dep_head = npdr;
5020 	}
5021 #ifdef DEBUG
5022 	if (pm_debug & PMD_KEEPS)
5023 		prdeps("pm_record_keeper after new record");
5024 #endif
5025 	if (!isprop)
5026 		pm_unresolved_deps++;
5027 	else
5028 		pm_prop_deps++;
5029 	return (0);
5030 }
5031 
5032 /*
5033  * Look up this device in the set of devices we've seen ioctls for
5034  * to see if we are holding a threshold spec for it.  If so, make it so.
5035  * At ioctl time, we were given the physical path of the device.
5036  */
5037 int
5038 pm_thresh_specd(dev_info_t *dip)
5039 {
5040 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5041 	char *path = 0;
5042 	char pathbuf[MAXNAMELEN];
5043 	pm_thresh_rec_t *rp;
5044 
5045 	path = ddi_pathname(dip, pathbuf);
5046 
5047 	rw_enter(&pm_thresh_rwlock, RW_READER);
5048 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5049 		if (strcmp(rp->ptr_physpath, path) != 0)
5050 			continue;
5051 		pm_apply_recorded_thresh(dip, rp);
5052 		rw_exit(&pm_thresh_rwlock);
5053 		return (1);
5054 	}
5055 	rw_exit(&pm_thresh_rwlock);
5056 	return (0);
5057 }
5058 
5059 static int
5060 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5061 {
5062 	PMD_FUNC(pmf, "set_keeping")
5063 	pm_info_t *kept_info;
5064 	int j, up = 0, circ;
5065 	void prdeps(char *);
5066 
5067 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5068 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5069 #ifdef DEBUG
5070 	if (pm_debug & PMD_KEEPS)
5071 		prdeps("Before PAD\n");
5072 #endif
5073 	ASSERT(keeper != kept);
5074 	if (PM_GET_PM_INFO(keeper) == NULL) {
5075 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5076 		    "%s@%s(%s#%d), but the latter is not power managed",
5077 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5078 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5079 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5080 		return (0);
5081 	}
5082 	kept_info = PM_GET_PM_INFO(kept);
5083 	ASSERT(kept_info);
5084 	PM_LOCK_POWER(keeper, &circ);
5085 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5086 		if (PM_CURPOWER(keeper, j)) {
5087 			up++;
5088 			break;
5089 		}
5090 	}
5091 	if (up) {
5092 		/* Bringup and maintain a hold on the kept */
5093 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5094 		    PM_DEVICE(kept)))
5095 		bring_pmdep_up(kept, 1);
5096 	}
5097 	PM_UNLOCK_POWER(keeper, circ);
5098 #ifdef DEBUG
5099 	if (pm_debug & PMD_KEEPS)
5100 		prdeps("After PAD\n");
5101 #endif
5102 	return (1);
5103 }
5104 
5105 /*
5106  * Should this device keep up another device?
5107  * Look up this device in the set of devices we've seen ioctls for
5108  * to see if we are holding a dependency spec for it.  If so, make it so.
5109  * Because we require the kept device to be attached already in order to
5110  * make the list entry (and hold it), we only need to look for keepers.
5111  * At ioctl time, we were given the physical path of the device.
5112  */
5113 int
5114 pm_keeper(char *keeper)
5115 {
5116 	PMD_FUNC(pmf, "keeper")
5117 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5118 	dev_info_t *dip;
5119 	pm_pdr_t *dp;
5120 	dev_info_t *kept = NULL;
5121 	int ret = 0;
5122 	int i;
5123 
5124 	if (!pm_unresolved_deps && !pm_prop_deps)
5125 		return (0);
5126 	ASSERT(keeper != NULL);
5127 	dip = pm_name_to_dip(keeper, 1);
5128 	if (dip == NULL)
5129 		return (0);
5130 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5131 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5132 		if (!dp->pdr_isprop) {
5133 			if (!pm_unresolved_deps)
5134 				continue;
5135 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5136 			if (dp->pdr_satisfied) {
5137 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5138 				continue;
5139 			}
5140 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5141 				ret += pm_apply_recorded_dep(dip, dp);
5142 			}
5143 		} else {
5144 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5145 				continue;
5146 			for (i = 0; i < dp->pdr_kept_count; i++) {
5147 				if (dp->pdr_kept_paths[i] == NULL)
5148 					continue;
5149 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5150 				if (kept == NULL)
5151 					continue;
5152 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5153 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5154 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5155 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5156 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5157 				    dp->pdr_kept_count))
5158 				if (kept != dip) {
5159 					ret += pm_set_keeping(dip, kept);
5160 				}
5161 				ddi_release_devi(kept);
5162 			}
5163 
5164 		}
5165 	}
5166 	ddi_release_devi(dip);
5167 	return (ret);
5168 }
5169 
5170 /*
5171  * Should this device be kept up by another device?
5172  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5173  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5174  * kept device lists.
5175  */
5176 static int
5177 pm_kept(char *keptp)
5178 {
5179 	PMD_FUNC(pmf, "kept")
5180 	pm_pdr_t *dp;
5181 	int found = 0;
5182 	int ret = 0;
5183 	dev_info_t *keeper;
5184 	dev_info_t *kept;
5185 	size_t length;
5186 	int i;
5187 	char **paths;
5188 	char *path;
5189 
5190 	ASSERT(keptp != NULL);
5191 	kept = pm_name_to_dip(keptp, 1);
5192 	if (kept == NULL)
5193 		return (0);
5194 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5195 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5196 		if (dp->pdr_isprop) {
5197 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5198 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5199 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5200 				/*
5201 				 * Dont allow self dependency.
5202 				 */
5203 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5204 					continue;
5205 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5206 				if (keeper == NULL)
5207 					continue;
5208 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5209 				    "%p\n", pmf, (void *)kept))
5210 #ifdef DEBUG
5211 				if (pm_debug & PMD_DEP)
5212 					prdeps("Before Adding from pm_kept\n");
5213 #endif
5214 				/*
5215 				 * Add ourselves to the dip list.
5216 				 */
5217 				if (dp->pdr_kept_count == 0) {
5218 					length = strlen(keptp) + 1;
5219 					path =
5220 					    kmem_alloc(length, KM_SLEEP);
5221 					paths = kmem_alloc(sizeof (char **),
5222 						    KM_SLEEP);
5223 					(void) strcpy(path, keptp);
5224 					paths[0] = path;
5225 					dp->pdr_kept_paths = paths;
5226 					dp->pdr_kept_count++;
5227 				} else {
5228 					/* Check to see if already on list */
5229 					for (i = 0; i < dp->pdr_kept_count;
5230 					    i++) {
5231 						if (strcmp(keptp,
5232 						    dp->pdr_kept_paths[i])
5233 						    == 0) {
5234 							found++;
5235 							break;
5236 						}
5237 					}
5238 					if (found) {
5239 						ddi_release_devi(keeper);
5240 						continue;
5241 					}
5242 					length = dp->pdr_kept_count *
5243 					    sizeof (char **);
5244 					paths = kmem_alloc(
5245 					    length + sizeof (char **),
5246 					    KM_SLEEP);
5247 					if (dp->pdr_kept_count) {
5248 						bcopy(dp->pdr_kept_paths,
5249 						    paths, length);
5250 						kmem_free(dp->pdr_kept_paths,
5251 							length);
5252 					}
5253 					dp->pdr_kept_paths = paths;
5254 					length = strlen(keptp) + 1;
5255 					path =
5256 					    kmem_alloc(length, KM_SLEEP);
5257 					(void) strcpy(path, keptp);
5258 					dp->pdr_kept_paths[i] = path;
5259 					dp->pdr_kept_count++;
5260 				}
5261 #ifdef DEBUG
5262 				if (pm_debug & PMD_DEP)
5263 					prdeps("After from pm_kept\n");
5264 #endif
5265 				if (keeper) {
5266 					ret += pm_set_keeping(keeper, kept);
5267 					ddi_release_devi(keeper);
5268 				}
5269 			}
5270 		} else {
5271 			/*
5272 			 * pm_keeper would be called later to do
5273 			 * the actual pm_set_keeping.
5274 			 */
5275 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5276 			    pmf, (void *)kept))
5277 #ifdef DEBUG
5278 			if (pm_debug & PMD_DEP)
5279 				prdeps("Before Adding from pm_kept\n");
5280 #endif
5281 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5282 				if (dp->pdr_kept_paths == NULL) {
5283 					length = strlen(keptp) + 1;
5284 					path =
5285 					    kmem_alloc(length, KM_SLEEP);
5286 					paths = kmem_alloc(sizeof (char **),
5287 						KM_SLEEP);
5288 					(void) strcpy(path, keptp);
5289 					paths[0] = path;
5290 					dp->pdr_kept_paths = paths;
5291 					dp->pdr_kept_count++;
5292 				}
5293 			}
5294 #ifdef DEBUG
5295 			if (pm_debug & PMD_DEP)
5296 			    prdeps("After from pm_kept\n");
5297 #endif
5298 		}
5299 	}
5300 	ddi_release_devi(kept);
5301 	return (ret);
5302 }
5303 
5304 /*
5305  * Apply a recorded dependency.  dp specifies the dependency, and
5306  * keeper is already known to be the device that keeps up the other (kept) one.
5307  * We have to the whole tree for the "kept" device, then apply
5308  * the dependency (which may already be applied).
5309  */
5310 int
5311 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5312 {
5313 	PMD_FUNC(pmf, "apply_recorded_dep")
5314 	dev_info_t *kept = NULL;
5315 	int ret = 0;
5316 	char *keptp = NULL;
5317 
5318 	/*
5319 	 * Device to Device dependency can only be 1 to 1.
5320 	 */
5321 	if (dp->pdr_kept_paths == NULL)
5322 		return (0);
5323 	keptp = dp->pdr_kept_paths[0];
5324 	if (keptp == NULL)
5325 		return (0);
5326 	ASSERT(*keptp != '\0');
5327 	kept = pm_name_to_dip(keptp, 1);
5328 	if (kept == NULL)
5329 		return (0);
5330 	if (kept) {
5331 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5332 		    dp->pdr_keeper, keptp))
5333 		if (pm_set_keeping(keeper, kept)) {
5334 			ASSERT(dp->pdr_satisfied == 0);
5335 			dp->pdr_satisfied = 1;
5336 			ASSERT(pm_unresolved_deps);
5337 			pm_unresolved_deps--;
5338 			ret++;
5339 		}
5340 	}
5341 	ddi_release_devi(kept);
5342 
5343 	return (ret);
5344 }
5345 
5346 /*
5347  * Called from common/io/pm.c
5348  */
5349 int
5350 pm_cur_power(pm_component_t *cp)
5351 {
5352 	return (cur_power(cp));
5353 }
5354 
5355 /*
5356  * External interface to sanity-check a power level.
5357  */
5358 int
5359 pm_valid_power(dev_info_t *dip, int comp, int level)
5360 {
5361 	PMD_FUNC(pmf, "valid_power")
5362 
5363 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5364 		return (e_pm_valid_power(dip, comp, level));
5365 	else {
5366 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5367 		    pmf, comp, PM_NUMCMPTS(dip), level))
5368 		return (0);
5369 	}
5370 }
5371 
5372 /*
5373  * Called when a device that is direct power managed needs to change state.
5374  * This routine arranges to block the request until the process managing
5375  * the device makes the change (or some other incompatible change) or
5376  * the process closes /dev/pm.
5377  */
5378 static int
5379 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5380 {
5381 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5382 	int ret = 0;
5383 	void pm_dequeue_blocked(pm_rsvp_t *);
5384 	void pm_enqueue_blocked(pm_rsvp_t *);
5385 
5386 	ASSERT(!pm_processes_stopped);
5387 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5388 	new->pr_dip = dip;
5389 	new->pr_comp = comp;
5390 	new->pr_newlevel = newpower;
5391 	new->pr_oldlevel = oldpower;
5392 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5393 	mutex_enter(&pm_rsvp_lock);
5394 	pm_enqueue_blocked(new);
5395 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5396 	    PM_CANBLOCK_BLOCK);
5397 	PM_UNLOCK_DIP(dip);
5398 	/*
5399 	 * truss may make the cv_wait_sig return prematurely
5400 	 */
5401 	while (ret == 0) {
5402 		/*
5403 		 * Normally there will be no user context involved, but if
5404 		 * there is (e.g. we are here via an ioctl call to a driver)
5405 		 * then we should allow the process to abort the request,
5406 		 * or we get an unkillable process if the same thread does
5407 		 * PM_DIRECT_PM and pm_raise_power
5408 		 */
5409 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5410 			ret = PMP_FAIL;
5411 		} else {
5412 			ret = new->pr_retval;
5413 		}
5414 	}
5415 	pm_dequeue_blocked(new);
5416 	mutex_exit(&pm_rsvp_lock);
5417 	cv_destroy(&new->pr_cv);
5418 	kmem_free(new, sizeof (*new));
5419 	return (ret);
5420 }
5421 
5422 /*
5423  * Returns true if the process is interested in power level changes (has issued
5424  * PM_GET_STATE_CHANGE ioctl).
5425  */
5426 int
5427 pm_interest_registered(int clone)
5428 {
5429 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5430 	return (pm_interest[clone]);
5431 }
5432 
5433 /*
5434  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5435  * watch all state transitions (dip == NULL).  Set up data
5436  * structs to communicate with process about state changes.
5437  */
5438 void
5439 pm_register_watcher(int clone, dev_info_t *dip)
5440 {
5441 	pscc_t	*p;
5442 	psce_t	*psce;
5443 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5444 
5445 	/*
5446 	 * We definitely need a control struct, then we have to search to see
5447 	 * there is already an entries struct (in the dip != NULL case).
5448 	 */
5449 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5450 	pscc->pscc_clone = clone;
5451 	pscc->pscc_dip = dip;
5452 
5453 	if (dip) {
5454 		int found = 0;
5455 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5456 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5457 			/*
5458 			 * Already an entry for this clone, so just use it
5459 			 * for the new one (for the case where a single
5460 			 * process is watching multiple devices)
5461 			 */
5462 			if (p->pscc_clone == clone) {
5463 				ASSERT(p->pscc_dip != dip);
5464 				pscc->pscc_entries = p->pscc_entries;
5465 				pscc->pscc_entries->psce_references++;
5466 				found++;
5467 			}
5468 		}
5469 		if (!found) {		/* create a new one */
5470 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5471 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5472 			psce->psce_first =
5473 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5474 			    KM_SLEEP);
5475 			psce->psce_in = psce->psce_out = psce->psce_first;
5476 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5477 			psce->psce_references = 1;
5478 			pscc->pscc_entries = psce;
5479 		}
5480 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5481 		rw_exit(&pm_pscc_direct_rwlock);
5482 	} else {
5483 		ASSERT(!pm_interest_registered(clone));
5484 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5485 #ifdef DEBUG
5486 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5487 			/*
5488 			 * Should not be an entry for this clone!
5489 			 */
5490 			ASSERT(p->pscc_clone != clone);
5491 		}
5492 #endif
5493 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5494 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5495 		    PSCCOUNT, KM_SLEEP);
5496 		psce->psce_in = psce->psce_out = psce->psce_first;
5497 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5498 		psce->psce_references = 1;
5499 		pscc->pscc_entries = psce;
5500 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5501 		pm_interest[clone] = 1;
5502 		rw_exit(&pm_pscc_interest_rwlock);
5503 	}
5504 }
5505 
5506 /*
5507  * Remove the given entry from the blocked list
5508  */
5509 void
5510 pm_dequeue_blocked(pm_rsvp_t *p)
5511 {
5512 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5513 	if (pm_blocked_list == p) {
5514 		ASSERT(p->pr_prev == NULL);
5515 		if (p->pr_next != NULL)
5516 			p->pr_next->pr_prev = NULL;
5517 		pm_blocked_list = p->pr_next;
5518 	} else {
5519 		ASSERT(p->pr_prev != NULL);
5520 		p->pr_prev->pr_next = p->pr_next;
5521 		if (p->pr_next != NULL)
5522 			p->pr_next->pr_prev = p->pr_prev;
5523 	}
5524 }
5525 
5526 /*
5527  * Remove the given control struct from the given list
5528  */
5529 static void
5530 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5531 {
5532 	if (*list == p) {
5533 		ASSERT(p->pscc_prev == NULL);
5534 		if (p->pscc_next != NULL)
5535 			p->pscc_next->pscc_prev = NULL;
5536 		*list = p->pscc_next;
5537 	} else {
5538 		ASSERT(p->pscc_prev != NULL);
5539 		p->pscc_prev->pscc_next = p->pscc_next;
5540 		if (p->pscc_next != NULL)
5541 			p->pscc_next->pscc_prev = p->pscc_prev;
5542 	}
5543 }
5544 
5545 /*
5546  * Stick the control struct specified on the front of the list
5547  */
5548 static void
5549 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5550 {
5551 	pscc_t *h;	/* entry at head of list */
5552 	if ((h = *list) == NULL) {
5553 		*list = p;
5554 		ASSERT(p->pscc_next == NULL);
5555 		ASSERT(p->pscc_prev == NULL);
5556 	} else {
5557 		p->pscc_next = h;
5558 		ASSERT(h->pscc_prev == NULL);
5559 		h->pscc_prev = p;
5560 		ASSERT(p->pscc_prev == NULL);
5561 		*list = p;
5562 	}
5563 }
5564 
5565 /*
5566  * If dip is NULL, process is closing "clone" clean up all its registrations.
5567  * Otherwise only clean up those for dip because process is just giving up
5568  * control of a direct device.
5569  */
5570 void
5571 pm_deregister_watcher(int clone, dev_info_t *dip)
5572 {
5573 	pscc_t	*p, *pn;
5574 	psce_t	*psce;
5575 	int found = 0;
5576 
5577 	if (dip == NULL) {
5578 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5579 		for (p = pm_pscc_interest; p; p = pn) {
5580 			pn = p->pscc_next;
5581 			if (p->pscc_clone == clone) {
5582 				pm_dequeue_pscc(p, &pm_pscc_interest);
5583 				psce = p->pscc_entries;
5584 				ASSERT(psce->psce_references == 1);
5585 				mutex_destroy(&psce->psce_lock);
5586 				kmem_free(psce->psce_first,
5587 				    sizeof (pm_state_change_t) * PSCCOUNT);
5588 				kmem_free(psce, sizeof (*psce));
5589 				kmem_free(p, sizeof (*p));
5590 			}
5591 		}
5592 		pm_interest[clone] = 0;
5593 		rw_exit(&pm_pscc_interest_rwlock);
5594 	}
5595 	found = 0;
5596 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5597 	for (p = pm_pscc_direct; p; p = pn) {
5598 		pn = p->pscc_next;
5599 		if ((dip && p->pscc_dip == dip) ||
5600 		    (dip == NULL && clone == p->pscc_clone)) {
5601 			ASSERT(clone == p->pscc_clone);
5602 			found++;
5603 			/*
5604 			 * Remove from control list
5605 			 */
5606 			pm_dequeue_pscc(p, &pm_pscc_direct);
5607 			/*
5608 			 * If we're the last reference, free the
5609 			 * entries struct.
5610 			 */
5611 			psce = p->pscc_entries;
5612 			ASSERT(psce);
5613 			if (psce->psce_references == 1) {
5614 				kmem_free(psce->psce_first,
5615 				    PSCCOUNT * sizeof (pm_state_change_t));
5616 				kmem_free(psce, sizeof (*psce));
5617 			} else {
5618 				psce->psce_references--;
5619 			}
5620 			kmem_free(p, sizeof (*p));
5621 		}
5622 	}
5623 	ASSERT(dip == NULL || found);
5624 	rw_exit(&pm_pscc_direct_rwlock);
5625 }
5626 
5627 /*
5628  * Search the indicated list for an entry that matches clone, and return a
5629  * pointer to it.  To be interesting, the entry must have something ready to
5630  * be passed up to the controlling process.
5631  * The returned entry will be locked upon return from this call.
5632  */
5633 static psce_t *
5634 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5635 {
5636 	pscc_t	*p;
5637 	psce_t	*psce;
5638 	rw_enter(lock, RW_READER);
5639 	for (p = *list; p; p = p->pscc_next) {
5640 		if (clone == p->pscc_clone) {
5641 			psce = p->pscc_entries;
5642 			mutex_enter(&psce->psce_lock);
5643 			if (psce->psce_out->size) {
5644 				rw_exit(lock);
5645 				return (psce);
5646 			} else {
5647 				mutex_exit(&psce->psce_lock);
5648 			}
5649 		}
5650 	}
5651 	rw_exit(lock);
5652 	return (NULL);
5653 }
5654 
5655 /*
5656  * Find an entry for a particular clone in the direct list.
5657  */
5658 psce_t *
5659 pm_psc_clone_to_direct(int clone)
5660 {
5661 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5662 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5663 	    &pm_pscc_direct_rwlock));
5664 }
5665 
5666 /*
5667  * Find an entry for a particular clone in the interest list.
5668  */
5669 psce_t *
5670 pm_psc_clone_to_interest(int clone)
5671 {
5672 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5673 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5674 	    &pm_pscc_interest_rwlock));
5675 }
5676 
5677 /*
5678  * Put the given entry at the head of the blocked list
5679  */
5680 void
5681 pm_enqueue_blocked(pm_rsvp_t *p)
5682 {
5683 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5684 	ASSERT(p->pr_next == NULL);
5685 	ASSERT(p->pr_prev == NULL);
5686 	if (pm_blocked_list != NULL) {
5687 		p->pr_next = pm_blocked_list;
5688 		ASSERT(pm_blocked_list->pr_prev == NULL);
5689 		pm_blocked_list->pr_prev = p;
5690 		pm_blocked_list = p;
5691 	} else {
5692 		pm_blocked_list = p;
5693 	}
5694 }
5695 
5696 /*
5697  * Sets every power managed device back to its default threshold
5698  */
5699 void
5700 pm_all_to_default_thresholds(void)
5701 {
5702 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5703 	    (void *) &pm_system_idle_threshold);
5704 }
5705 
5706 static int
5707 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5708 {
5709 	int thr = (int)(*(int *)arg);
5710 
5711 	if (!PM_GET_PM_INFO(dip))
5712 		return (DDI_WALK_CONTINUE);
5713 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5714 	return (DDI_WALK_CONTINUE);
5715 }
5716 
5717 /*
5718  * Returns the current threshold value (in seconds) for the indicated component
5719  */
5720 int
5721 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5722 {
5723 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5724 		return (DDI_FAILURE);
5725 	} else {
5726 		*threshp = cur_threshold(dip, comp);
5727 		return (DDI_SUCCESS);
5728 	}
5729 }
5730 
5731 /*
5732  * To be called when changing the power level of a component of a device.
5733  * On some platforms, changing power on one device may require that power
5734  * be changed on other, related devices in the same transaction.  Thus, we
5735  * always pass this request to the platform power manager so that all the
5736  * affected devices will be locked.
5737  */
5738 void
5739 pm_lock_power(dev_info_t *dip, int *circp)
5740 {
5741 	power_req_t power_req;
5742 	int result;
5743 
5744 	power_req.request_type = PMR_PPM_LOCK_POWER;
5745 	power_req.req.ppm_lock_power_req.who = dip;
5746 	power_req.req.ppm_lock_power_req.circp = circp;
5747 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5748 }
5749 
5750 /*
5751  * Release the lock (or locks) acquired to change the power of a device.
5752  * See comments for pm_lock_power.
5753  */
5754 void
5755 pm_unlock_power(dev_info_t *dip, int circ)
5756 {
5757 	power_req_t power_req;
5758 	int result;
5759 
5760 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5761 	power_req.req.ppm_unlock_power_req.who = dip;
5762 	power_req.req.ppm_unlock_power_req.circ = circ;
5763 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5764 }
5765 
5766 
5767 /*
5768  * Attempt (without blocking) to acquire the lock(s) needed to change the
5769  * power of a component of a device.  See comments for pm_lock_power.
5770  *
5771  * Return: 1 if lock(s) acquired, 0 if not.
5772  */
5773 int
5774 pm_try_locking_power(dev_info_t *dip, int *circp)
5775 {
5776 	power_req_t power_req;
5777 	int result;
5778 
5779 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5780 	power_req.req.ppm_lock_power_req.who = dip;
5781 	power_req.req.ppm_lock_power_req.circp = circp;
5782 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5783 	return (result);
5784 }
5785 
5786 
5787 /*
5788  * Lock power state of a device.
5789  *
5790  * The implementation handles a special case where another thread may have
5791  * acquired the lock and created/launched this thread to do the work.  If
5792  * the lock cannot be acquired immediately, we check to see if this thread
5793  * is registered as a borrower of the lock.  If so, we may proceed without
5794  * the lock.  This assumes that the lending thread blocks on the completion
5795  * of this thread.
5796  *
5797  * Note 1: for use by ppm only.
5798  *
5799  * Note 2: On failing to get the lock immediately, we search lock_loan list
5800  * for curthread (as borrower of the lock).  On a hit, we check that the
5801  * lending thread already owns the lock we want.  It is safe to compare
5802  * devi_busy_thread and thread id of the lender because in the == case (the
5803  * only one we care about) we know that the owner is blocked.  Similarly,
5804  * If we find that curthread isn't registered as a lock borrower, it is safe
5805  * to use the blocking call (ndi_devi_enter) because we know that if we
5806  * weren't already listed as a borrower (upstream on the call stack) we won't
5807  * become one.
5808  */
5809 void
5810 pm_lock_power_single(dev_info_t *dip, int *circp)
5811 {
5812 	lock_loan_t *cur;
5813 
5814 	/* if the lock is available, we are done. */
5815 	if (ndi_devi_tryenter(dip, circp))
5816 		return;
5817 
5818 	mutex_enter(&pm_loan_lock);
5819 	/* see if our thread is registered as a lock borrower. */
5820 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5821 		if (cur->pmlk_borrower == curthread)
5822 			break;
5823 	mutex_exit(&pm_loan_lock);
5824 
5825 	/* if this thread not already registered, it is safe to block */
5826 	if (cur == NULL)
5827 		ndi_devi_enter(dip, circp);
5828 	else {
5829 		/* registered: does lender own the lock we want? */
5830 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5831 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5832 			cur->pmlk_dip = dip;
5833 		} else /* no: just block for it */
5834 			ndi_devi_enter(dip, circp);
5835 
5836 	}
5837 }
5838 
5839 /*
5840  * Drop the lock on the device's power state.  See comment for
5841  * pm_lock_power_single() for special implementation considerations.
5842  *
5843  * Note: for use by ppm only.
5844  */
5845 void
5846 pm_unlock_power_single(dev_info_t *dip, int circ)
5847 {
5848 	lock_loan_t *cur;
5849 
5850 	/* optimization: mutex not needed to check empty list */
5851 	if (lock_loan_head.pmlk_next == NULL) {
5852 		ndi_devi_exit(dip, circ);
5853 		return;
5854 	}
5855 
5856 	mutex_enter(&pm_loan_lock);
5857 	/* see if our thread is registered as a lock borrower. */
5858 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5859 		if (cur->pmlk_borrower == curthread)
5860 			break;
5861 	mutex_exit(&pm_loan_lock);
5862 
5863 	if (cur == NULL || cur->pmlk_dip != dip)
5864 		/* we acquired the lock directly, so return it */
5865 		ndi_devi_exit(dip, circ);
5866 }
5867 
5868 /*
5869  * Try to take the lock for changing the power level of a component.
5870  *
5871  * Note: for use by ppm only.
5872  */
5873 int
5874 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5875 {
5876 	return (ndi_devi_tryenter(dip, circp));
5877 }
5878 
5879 #ifdef	DEBUG
5880 /*
5881  * The following are used only to print out data structures for debugging
5882  */
5883 void
5884 prdeps(char *msg)
5885 {
5886 
5887 	pm_pdr_t *rp;
5888 	int i;
5889 
5890 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
5891 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
5892 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
5893 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
5894 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
5895 		    (void *)rp->pdr_next);
5896 		if (rp->pdr_kept_count != 0) {
5897 			pm_log("kept list = ");
5898 			i = 0;
5899 			while (i < rp->pdr_kept_count) {
5900 				pm_log("%s ", rp->pdr_kept_paths[i]);
5901 				i++;
5902 			}
5903 			pm_log("\n");
5904 		}
5905 	}
5906 }
5907 
5908 void
5909 pr_noinvol(char *hdr)
5910 {
5911 	pm_noinvol_t *ip;
5912 
5913 	pm_log("%s\n", hdr);
5914 	rw_enter(&pm_noinvol_rwlock, RW_READER);
5915 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
5916 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
5917 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
5918 	rw_exit(&pm_noinvol_rwlock);
5919 }
5920 #endif
5921 
5922 /*
5923  * Attempt to apply the thresholds indicated by rp to the node specified by
5924  * dip.
5925  */
5926 void
5927 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5928 {
5929 	PMD_FUNC(pmf, "apply_recorded_thresh")
5930 	int i, j;
5931 	int comps = PM_NUMCMPTS(dip);
5932 	struct pm_component *cp;
5933 	pm_pte_t *ep;
5934 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
5935 
5936 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
5937 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
5938 	PM_LOCK_DIP(dip);
5939 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
5940 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
5941 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
5942 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
5943 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
5944 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
5945 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
5946 		PM_UNLOCK_DIP(dip);
5947 		return;
5948 	}
5949 
5950 	ep = rp->ptr_entries;
5951 	/*
5952 	 * Here we do the special case of a device threshold
5953 	 */
5954 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
5955 		ASSERT(ep && ep->pte_numthresh == 1);
5956 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
5957 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
5958 		PM_UNLOCK_DIP(dip);
5959 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
5960 		if (PM_SCANABLE(dip))
5961 			pm_rescan(dip);
5962 		return;
5963 	}
5964 	for (i = 0; i < comps; i++) {
5965 		cp = PM_CP(dip, i);
5966 		for (j = 0; j < ep->pte_numthresh; j++) {
5967 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
5968 			    "to %x\n", pmf, j, PM_DEVICE(dip),
5969 			    i, ep->pte_thresh[j]))
5970 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
5971 		}
5972 		ep++;
5973 	}
5974 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
5975 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
5976 	PM_UNLOCK_DIP(dip);
5977 
5978 	if (PM_SCANABLE(dip))
5979 		pm_rescan(dip);
5980 }
5981 
5982 /*
5983  * Returns true if the threshold specified by rp could be applied to dip
5984  * (that is, the number of components and transitions are the same)
5985  */
5986 int
5987 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5988 {
5989 	PMD_FUNC(pmf, "valid_thresh")
5990 	int comps, i;
5991 	pm_component_t *cp;
5992 	pm_pte_t *ep;
5993 
5994 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
5995 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
5996 		    rp->ptr_physpath))
5997 		return (0);
5998 	}
5999 	/*
6000 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6001 	 * an entry with numcomps == 0, (since we don't know how many
6002 	 * components there are in advance).  This is always a valid
6003 	 * spec.
6004 	 */
6005 	if (rp->ptr_numcomps == 0) {
6006 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6007 		return (1);
6008 	}
6009 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6010 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6011 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6012 		return (0);
6013 	}
6014 	ep = rp->ptr_entries;
6015 	for (i = 0; i < comps; i++) {
6016 		cp = PM_CP(dip, i);
6017 		if ((ep + i)->pte_numthresh !=
6018 		    cp->pmc_comp.pmc_numlevels - 1) {
6019 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6020 			    pmf, rp->ptr_physpath, i,
6021 			    cp->pmc_comp.pmc_numlevels - 1,
6022 			    (ep + i)->pte_numthresh))
6023 			return (0);
6024 		}
6025 	}
6026 	return (1);
6027 }
6028 
6029 /*
6030  * Remove any recorded threshold for device physpath
6031  * We know there will be at most one.
6032  */
6033 void
6034 pm_unrecord_threshold(char *physpath)
6035 {
6036 	pm_thresh_rec_t *pptr, *ptr;
6037 
6038 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6039 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6040 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6041 			if (pptr) {
6042 				pptr->ptr_next = ptr->ptr_next;
6043 			} else {
6044 				ASSERT(pm_thresh_head == ptr);
6045 				pm_thresh_head = ptr->ptr_next;
6046 			}
6047 			kmem_free(ptr, ptr->ptr_size);
6048 			break;
6049 		}
6050 		pptr = ptr;
6051 	}
6052 	rw_exit(&pm_thresh_rwlock);
6053 }
6054 
6055 /*
6056  * Discard all recorded thresholds.  We are returning to the default pm state.
6057  */
6058 void
6059 pm_discard_thresholds(void)
6060 {
6061 	pm_thresh_rec_t *rp;
6062 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6063 	while (pm_thresh_head) {
6064 		rp = pm_thresh_head;
6065 		pm_thresh_head = rp->ptr_next;
6066 		kmem_free(rp, rp->ptr_size);
6067 	}
6068 	rw_exit(&pm_thresh_rwlock);
6069 }
6070 
6071 /*
6072  * Discard all recorded dependencies.  We are returning to the default pm state.
6073  */
6074 void
6075 pm_discard_dependencies(void)
6076 {
6077 	pm_pdr_t *rp;
6078 	int i;
6079 	size_t length;
6080 
6081 #ifdef DEBUG
6082 	if (pm_debug & PMD_DEP)
6083 		prdeps("Before discard\n");
6084 #endif
6085 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6086 
6087 #ifdef DEBUG
6088 	if (pm_debug & PMD_DEP)
6089 		prdeps("After discard\n");
6090 #endif
6091 	while (pm_dep_head) {
6092 		rp = pm_dep_head;
6093 		if (!rp->pdr_isprop) {
6094 			ASSERT(rp->pdr_satisfied == 0);
6095 			ASSERT(pm_unresolved_deps);
6096 			pm_unresolved_deps--;
6097 		} else {
6098 			ASSERT(pm_prop_deps);
6099 			pm_prop_deps--;
6100 		}
6101 		pm_dep_head = rp->pdr_next;
6102 		if (rp->pdr_kept_count)  {
6103 			for (i = 0; i < rp->pdr_kept_count; i++) {
6104 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6105 				kmem_free(rp->pdr_kept_paths[i], length);
6106 			}
6107 			kmem_free(rp->pdr_kept_paths,
6108 				rp->pdr_kept_count * sizeof (char **));
6109 		}
6110 		kmem_free(rp, rp->pdr_size);
6111 	}
6112 }
6113 
6114 
6115 static int
6116 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6117 {
6118 	_NOTE(ARGUNUSED(arg))
6119 	char *pathbuf;
6120 
6121 	if (PM_GET_PM_INFO(dip) == NULL)
6122 		return (DDI_WALK_CONTINUE);
6123 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6124 	(void) ddi_pathname(dip, pathbuf);
6125 	pm_free_keeper(pathbuf, 0);
6126 	kmem_free(pathbuf, MAXPATHLEN);
6127 	return (DDI_WALK_CONTINUE);
6128 }
6129 
6130 static int
6131 pm_kept_walk(dev_info_t *dip, void *arg)
6132 {
6133 	_NOTE(ARGUNUSED(arg))
6134 	char *pathbuf;
6135 
6136 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6137 	(void) ddi_pathname(dip, pathbuf);
6138 	(void) pm_kept(pathbuf);
6139 	kmem_free(pathbuf, MAXPATHLEN);
6140 
6141 	return (DDI_WALK_CONTINUE);
6142 }
6143 
6144 static int
6145 pm_keeper_walk(dev_info_t *dip, void *arg)
6146 {
6147 	_NOTE(ARGUNUSED(arg))
6148 	char *pathbuf;
6149 
6150 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6151 	(void) ddi_pathname(dip, pathbuf);
6152 	(void) pm_keeper(pathbuf);
6153 	kmem_free(pathbuf, MAXPATHLEN);
6154 
6155 	return (DDI_WALK_CONTINUE);
6156 }
6157 
6158 static char *
6159 pdw_type_decode(int type)
6160 {
6161 	switch (type) {
6162 	case PM_DEP_WK_POWER_ON:
6163 		return ("power on");
6164 	case PM_DEP_WK_POWER_OFF:
6165 		return ("power off");
6166 	case PM_DEP_WK_DETACH:
6167 		return ("detach");
6168 	case PM_DEP_WK_REMOVE_DEP:
6169 		return ("remove dep");
6170 	case PM_DEP_WK_BRINGUP_SELF:
6171 		return ("bringup self");
6172 	case PM_DEP_WK_RECORD_KEEPER:
6173 		return ("add dependent");
6174 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6175 		return ("add dependent property");
6176 	case PM_DEP_WK_KEPT:
6177 		return ("kept");
6178 	case PM_DEP_WK_KEEPER:
6179 		return ("keeper");
6180 	case PM_DEP_WK_ATTACH:
6181 		return ("attach");
6182 	case PM_DEP_WK_CHECK_KEPT:
6183 		return ("check kept");
6184 	case PM_DEP_WK_CPR_SUSPEND:
6185 		return ("suspend");
6186 	case PM_DEP_WK_CPR_RESUME:
6187 		return ("resume");
6188 	default:
6189 		return ("unknown");
6190 	}
6191 
6192 }
6193 
6194 static void
6195 pm_rele_dep(char *keeper)
6196 {
6197 	PMD_FUNC(pmf, "rele_dep")
6198 	pm_pdr_t *dp;
6199 	char *kept_path = NULL;
6200 	dev_info_t *kept = NULL;
6201 	int count = 0;
6202 
6203 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6204 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6205 			continue;
6206 		for (count = 0; count < dp->pdr_kept_count; count++) {
6207 			kept_path = dp->pdr_kept_paths[count];
6208 			if (kept_path == NULL)
6209 				continue;
6210 			kept = pm_name_to_dip(kept_path, 1);
6211 			if (kept) {
6212 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6213 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6214 				    keeper))
6215 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6216 				pm_rele_power(kept);
6217 				ddi_release_devi(kept);
6218 			}
6219 		}
6220 	}
6221 }
6222 
6223 /*
6224  * Called when we are just released from direct PM.  Bring ourself up
6225  * if our keeper is up since dependency is not honored while a kept
6226  * device is under direct PM.
6227  */
6228 static void
6229 pm_bring_self_up(char *keptpath)
6230 {
6231 	PMD_FUNC(pmf, "bring_self_up")
6232 	dev_info_t *kept;
6233 	dev_info_t *keeper;
6234 	pm_pdr_t *dp;
6235 	int i, j;
6236 	int up = 0, circ;
6237 
6238 	kept = pm_name_to_dip(keptpath, 1);
6239 	if (kept == NULL)
6240 		return;
6241 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6242 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6243 		if (dp->pdr_kept_count == 0)
6244 			continue;
6245 		for (i = 0; i < dp->pdr_kept_count; i++) {
6246 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6247 				continue;
6248 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6249 			if (keeper) {
6250 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6251 				    pmf, PM_DEVICE(keeper)))
6252 				PM_LOCK_POWER(keeper, &circ);
6253 				for (j = 0; j < PM_NUMCMPTS(keeper);
6254 				    j++) {
6255 					if (PM_CURPOWER(keeper, j)) {
6256 						PMD(PMD_KEEPS, ("%s: comp="
6257 						    "%d is up\n", pmf, j))
6258 						up++;
6259 					}
6260 				}
6261 				if (up) {
6262 					if (PM_SKBU(kept))
6263 						DEVI(kept)->devi_pm_flags &=
6264 						    ~PMC_SKIP_BRINGUP;
6265 					bring_pmdep_up(kept, 1);
6266 				}
6267 				PM_UNLOCK_POWER(keeper, circ);
6268 				ddi_release_devi(keeper);
6269 			}
6270 		}
6271 	}
6272 	ddi_release_devi(kept);
6273 }
6274 
6275 static void
6276 pm_process_dep_request(pm_dep_wk_t *work)
6277 {
6278 	PMD_FUNC(pmf, "dep_req")
6279 	int ret;
6280 
6281 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6282 	    pdw_type_decode(work->pdw_type)))
6283 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6284 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6285 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6286 
6287 	switch (work->pdw_type) {
6288 	case PM_DEP_WK_POWER_ON:
6289 		/* Bring up the kept devices and put a hold on them */
6290 		bring_wekeeps_up(work->pdw_keeper);
6291 		break;
6292 	case PM_DEP_WK_POWER_OFF:
6293 		/* Release the kept devices */
6294 		pm_rele_dep(work->pdw_keeper);
6295 		break;
6296 	case PM_DEP_WK_DETACH:
6297 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6298 		break;
6299 	case PM_DEP_WK_REMOVE_DEP:
6300 		pm_discard_dependencies();
6301 		break;
6302 	case PM_DEP_WK_BRINGUP_SELF:
6303 		/*
6304 		 * We deferred satisfying our dependency till now, so satisfy
6305 		 * it again and bring ourselves up.
6306 		 */
6307 		pm_bring_self_up(work->pdw_kept);
6308 		break;
6309 	case PM_DEP_WK_RECORD_KEEPER:
6310 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6311 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6312 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6313 		break;
6314 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6315 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6316 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6317 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6318 		break;
6319 	case PM_DEP_WK_KEPT:
6320 		ret = pm_kept(work->pdw_kept);
6321 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6322 		    ret))
6323 		break;
6324 	case PM_DEP_WK_KEEPER:
6325 		ret = pm_keeper(work->pdw_keeper);
6326 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6327 		    pmf, ret))
6328 		break;
6329 	case PM_DEP_WK_ATTACH:
6330 		ret = pm_keeper(work->pdw_keeper);
6331 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6332 		    pmf, ret))
6333 		ret = pm_kept(work->pdw_kept);
6334 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6335 		    pmf, ret))
6336 		break;
6337 	case PM_DEP_WK_CHECK_KEPT:
6338 		ret = pm_is_kept(work->pdw_kept);
6339 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6340 		    pmf, work->pdw_kept, ret))
6341 		break;
6342 	case PM_DEP_WK_CPR_SUSPEND:
6343 		pm_discard_dependencies();
6344 		break;
6345 	case PM_DEP_WK_CPR_RESUME:
6346 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6347 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6348 		break;
6349 	default:
6350 		ASSERT(0);
6351 		break;
6352 	}
6353 	/*
6354 	 * Free the work structure if the requester is not waiting
6355 	 * Otherwise it is the requester's responsiblity to free it.
6356 	 */
6357 	if (!work->pdw_wait) {
6358 		if (work->pdw_keeper)
6359 			kmem_free(work->pdw_keeper,
6360 			    strlen(work->pdw_keeper) + 1);
6361 		if (work->pdw_kept)
6362 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6363 		kmem_free(work, sizeof (pm_dep_wk_t));
6364 	} else {
6365 		/*
6366 		 * Notify requester if it is waiting for it.
6367 		 */
6368 		work->pdw_ret = ret;
6369 		work->pdw_done = 1;
6370 		cv_signal(&work->pdw_cv);
6371 	}
6372 }
6373 
6374 /*
6375  * Process PM dependency requests.
6376  */
6377 static void
6378 pm_dep_thread(void)
6379 {
6380 	pm_dep_wk_t *work;
6381 	callb_cpr_t cprinfo;
6382 
6383 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6384 	    "pm_dep_thread");
6385 	for (;;) {
6386 		mutex_enter(&pm_dep_thread_lock);
6387 		if (pm_dep_thread_workq == NULL) {
6388 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6389 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6390 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6391 		}
6392 		work = pm_dep_thread_workq;
6393 		pm_dep_thread_workq = work->pdw_next;
6394 		if (pm_dep_thread_tail == work)
6395 			pm_dep_thread_tail = work->pdw_next;
6396 		mutex_exit(&pm_dep_thread_lock);
6397 		pm_process_dep_request(work);
6398 
6399 	}
6400 	/*NOTREACHED*/
6401 }
6402 
6403 /*
6404  * Set the power level of the indicated device to unknown (if it is not a
6405  * backwards compatible device), as it has just been resumed, and it won't
6406  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6407  */
6408 void
6409 pm_forget_power_level(dev_info_t *dip)
6410 {
6411 	dev_info_t *pdip = ddi_get_parent(dip);
6412 	int i, count = 0;
6413 
6414 	if (!PM_ISBC(dip)) {
6415 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6416 			count += (PM_CURPOWER(dip, i) == 0);
6417 
6418 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6419 			e_pm_hold_rele_power(pdip, count);
6420 
6421 		/*
6422 		 * Count this as a power cycle if we care
6423 		 */
6424 		if (DEVI(dip)->devi_pm_volpmd &&
6425 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6426 			DEVI(dip)->devi_pm_volpmd = 0;
6427 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6428 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6429 	}
6430 }
6431 
6432 /*
6433  * This function advises the caller whether it should make a power-off
6434  * transition at this time or not.  If the transition is not advised
6435  * at this time, the time that the next power-off transition can
6436  * be made from now is returned through "intervalp" pointer.
6437  * This function returns:
6438  *
6439  *  1  power-off advised
6440  *  0  power-off not advised, intervalp will point to seconds from
6441  *	  now that a power-off is advised.  If it is passed the number
6442  *	  of years that policy specifies the device should last,
6443  *	  a large number is returned as the time interval.
6444  *  -1  error
6445  */
6446 int
6447 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6448 {
6449 	PMD_FUNC(pmf, "pm_trans_check")
6450 	char dbuf[DC_SCSI_MFR_LEN];
6451 	struct pm_scsi_cycles *scp;
6452 	int service_years, service_weeks, full_years;
6453 	time_t now, service_seconds, tdiff;
6454 	time_t within_year, when_allowed;
6455 	char *ptr;
6456 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6457 	int cycles_diff, cycles_over;
6458 
6459 	if (datap == NULL) {
6460 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6461 		return (-1);
6462 	}
6463 
6464 	if (datap->format == DC_SCSI_FORMAT) {
6465 		/*
6466 		 * Power cycles of the scsi drives are distributed
6467 		 * over 5 years with the following percentage ratio:
6468 		 *
6469 		 *	30%, 25%, 20%, 15%, and 10%
6470 		 *
6471 		 * The power cycle quota for each year is distributed
6472 		 * linearly through out the year.  The equation for
6473 		 * determining the expected cycles is:
6474 		 *
6475 		 *	e = a * (n / y)
6476 		 *
6477 		 * e = expected cycles
6478 		 * a = allocated cycles for this year
6479 		 * n = number of seconds since beginning of this year
6480 		 * y = number of seconds in a year
6481 		 *
6482 		 * Note that beginning of the year starts the day that
6483 		 * the drive has been put on service.
6484 		 *
6485 		 * If the drive has passed its expected cycles, we
6486 		 * can determine when it can start to power cycle
6487 		 * again to keep it on track to meet the 5-year
6488 		 * life expectancy.  The equation for determining
6489 		 * when to power cycle is:
6490 		 *
6491 		 *	w = y * (c / a)
6492 		 *
6493 		 * w = when it can power cycle again
6494 		 * y = number of seconds in a year
6495 		 * c = current number of cycles
6496 		 * a = allocated cycles for the year
6497 		 *
6498 		 */
6499 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6500 
6501 		scp = &datap->un.scsi_cycles;
6502 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6503 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6504 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6505 		if (scp->ncycles < 0 || scp->flag != 0) {
6506 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6507 			return (-1);
6508 		}
6509 
6510 		if (scp->ncycles > scp->lifemax) {
6511 			*intervalp = (LONG_MAX / hz);
6512 			return (0);
6513 		}
6514 
6515 		/*
6516 		 * convert service date to time_t
6517 		 */
6518 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6519 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6520 		ptr = dbuf;
6521 		service_years = stoi(&ptr) - EPOCH_YEAR;
6522 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6523 		    DC_SCSI_WEEK_LEN);
6524 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6525 
6526 		/*
6527 		 * scsi standard does not specify WW data,
6528 		 * could be (00-51) or (01-52)
6529 		 */
6530 		ptr = dbuf;
6531 		service_weeks = stoi(&ptr);
6532 		if (service_years < 0 ||
6533 		    service_weeks < 0 || service_weeks > 52) {
6534 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6535 			    pmf, service_years, service_weeks))
6536 			return (-1);
6537 		}
6538 
6539 		/*
6540 		 * calculate service date in seconds-since-epoch,
6541 		 * adding one day for each leap-year.
6542 		 *
6543 		 * (years-since-epoch + 2) fixes integer truncation,
6544 		 * example: (8) leap-years during [1972, 2000]
6545 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6546 		 */
6547 		service_seconds = (service_years * DC_SPY) +
6548 		    (service_weeks * DC_SPW) +
6549 		    (((service_years + 2) / 4) * DC_SPD);
6550 
6551 		now = gethrestime_sec();
6552 		/*
6553 		 * since the granularity of 'svc_date' is day not second,
6554 		 * 'now' should be rounded up to full day.
6555 		 */
6556 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6557 		if (service_seconds > now) {
6558 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6559 			    "than now (%ld)!\n", pmf, service_seconds, now))
6560 			return (-1);
6561 		}
6562 
6563 		tdiff = now - service_seconds;
6564 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6565 
6566 		/*
6567 		 * NOTE - Leap years are not considered in the calculations
6568 		 * below.
6569 		 */
6570 		full_years = (tdiff / DC_SPY);
6571 		if ((full_years >= DC_SCSI_NPY) &&
6572 		    (scp->ncycles <= scp->lifemax))
6573 			return (1);
6574 
6575 		/*
6576 		 * Determine what is the normal cycle usage for the
6577 		 * device at the beginning and the end of this year.
6578 		 */
6579 		lower_bound_cycles = (!full_years) ? 0 :
6580 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6581 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6582 
6583 		if (scp->ncycles <= lower_bound_cycles)
6584 			return (1);
6585 
6586 		/*
6587 		 * The linear slope that determines how many cycles
6588 		 * are allowed this year is number of seconds
6589 		 * passed this year over total number of seconds in a year.
6590 		 */
6591 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6592 		within_year = (tdiff % DC_SPY);
6593 		cycles_allowed = lower_bound_cycles +
6594 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6595 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6596 		    full_years, within_year))
6597 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6598 		    cycles_allowed))
6599 
6600 		if (scp->ncycles <= cycles_allowed)
6601 			return (1);
6602 
6603 		/*
6604 		 * The transition is not advised now but we can
6605 		 * determine when the next transition can be made.
6606 		 *
6607 		 * Depending on how many cycles the device has been
6608 		 * over-used, we may need to skip years with
6609 		 * different percentage quota in order to determine
6610 		 * when the next transition can be made.
6611 		 */
6612 		cycles_over = (scp->ncycles - lower_bound_cycles);
6613 		while (cycles_over > cycles_diff) {
6614 			full_years++;
6615 			if (full_years >= DC_SCSI_NPY) {
6616 				*intervalp = (LONG_MAX / hz);
6617 				return (0);
6618 			}
6619 			cycles_over -= cycles_diff;
6620 			lower_bound_cycles = upper_bound_cycles;
6621 			upper_bound_cycles =
6622 			    (scp->lifemax * pcnt[full_years]) / 100;
6623 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6624 		}
6625 
6626 		/*
6627 		 * The linear slope that determines when the next transition
6628 		 * can be made is the relative position of used cycles within a
6629 		 * year over total number of cycles within that year.
6630 		 */
6631 		when_allowed = service_seconds + (full_years * DC_SPY) +
6632 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6633 		*intervalp = (when_allowed - now);
6634 		if (*intervalp > (LONG_MAX / hz))
6635 			*intervalp = (LONG_MAX / hz);
6636 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6637 		    *intervalp))
6638 		return (0);
6639 	}
6640 
6641 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6642 	return (-1);
6643 }
6644 
6645 /*
6646  * Nexus drivers call into pm framework to indicate which child driver is about
6647  * to be installed.  In some platforms, ppm may need to configure the hardware
6648  * for successful installation of a driver.
6649  */
6650 int
6651 pm_init_child(dev_info_t *dip)
6652 {
6653 	power_req_t power_req;
6654 
6655 	ASSERT(ddi_binding_name(dip));
6656 	ASSERT(ddi_get_name_addr(dip));
6657 	pm_ppm_claim(dip);
6658 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6659 		power_req.request_type = PMR_PPM_INIT_CHILD;
6660 		power_req.req.ppm_config_req.who = dip;
6661 		ASSERT(PPM(dip) != NULL);
6662 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6663 		    NULL));
6664 	} else {
6665 #ifdef DEBUG
6666 		/* pass it to the default handler so we can debug things */
6667 		power_req.request_type = PMR_PPM_INIT_CHILD;
6668 		power_req.req.ppm_config_req.who = dip;
6669 		(void) pm_ctlops(NULL, dip,
6670 		    DDI_CTLOPS_POWER, &power_req, NULL);
6671 #endif
6672 	}
6673 	return (DDI_SUCCESS);
6674 }
6675 
6676 /*
6677  * Bring parent of a node that is about to be probed up to full power, and
6678  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6679  * it is time to let it go down again
6680  */
6681 void
6682 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6683 {
6684 	int result;
6685 	power_req_t power_req;
6686 
6687 	bzero(cp, sizeof (*cp));
6688 	cp->ppc_dip = dip;
6689 
6690 	pm_ppm_claim(dip);
6691 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6692 		power_req.request_type = PMR_PPM_PRE_PROBE;
6693 		power_req.req.ppm_config_req.who = dip;
6694 		ASSERT(PPM(dip) != NULL);
6695 		(void) pm_ctlops(PPM(dip), dip,
6696 		    DDI_CTLOPS_POWER, &power_req, &result);
6697 		cp->ppc_ppm = PPM(dip);
6698 	} else {
6699 #ifdef DEBUG
6700 		/* pass it to the default handler so we can debug things */
6701 		power_req.request_type = PMR_PPM_PRE_PROBE;
6702 		power_req.req.ppm_config_req.who = dip;
6703 		(void) pm_ctlops(NULL, dip,
6704 		    DDI_CTLOPS_POWER, &power_req, &result);
6705 #endif
6706 		cp->ppc_ppm = NULL;
6707 	}
6708 }
6709 
6710 int
6711 pm_pre_config(dev_info_t *dip, char *devnm)
6712 {
6713 	PMD_FUNC(pmf, "pre_config")
6714 	int ret;
6715 
6716 	if (MDI_VHCI(dip)) {
6717 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6718 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6719 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6720 	} else if (!PM_GET_PM_INFO(dip))
6721 		return (DDI_SUCCESS);
6722 
6723 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6724 	pm_hold_power(dip);
6725 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6726 	if (ret != DDI_SUCCESS)
6727 		pm_rele_power(dip);
6728 	return (ret);
6729 }
6730 
6731 /*
6732  * This routine is called by devfs during its walk to unconfigue a node.
6733  * If the call is due to auto mod_unloads and the dip is not at its
6734  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6735  * return DDI_SUCCESS.
6736  */
6737 int
6738 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6739 {
6740 	PMD_FUNC(pmf, "pre_unconfig")
6741 	int ret;
6742 
6743 	if (MDI_VHCI(dip)) {
6744 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6745 		    PM_DEVICE(dip), flags))
6746 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6747 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6748 	} else if (!PM_GET_PM_INFO(dip))
6749 		return (DDI_SUCCESS);
6750 
6751 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6752 	    flags))
6753 	*held = 0;
6754 
6755 	/*
6756 	 * If the dip is a leaf node, don't power it up.
6757 	 */
6758 	if (!ddi_get_child(dip))
6759 		return (DDI_SUCCESS);
6760 
6761 	/*
6762 	 * Do not power up the node if it is called due to auto-modunload.
6763 	 */
6764 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6765 		return (DDI_FAILURE);
6766 
6767 	pm_hold_power(dip);
6768 	*held = 1;
6769 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6770 	if (ret != DDI_SUCCESS) {
6771 		pm_rele_power(dip);
6772 		*held = 0;
6773 	}
6774 	return (ret);
6775 }
6776 
6777 /*
6778  * Notify ppm of attach action.  Parent is already held at full power by
6779  * probe action.
6780  */
6781 void
6782 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6783 {
6784 	static char *me = "pm_pre_attach";
6785 	power_req_t power_req;
6786 	int result;
6787 
6788 	/*
6789 	 * Initialize and fill in the PPM cookie
6790 	 */
6791 	bzero(cp, sizeof (*cp));
6792 	cp->ppc_cmd = (int)cmd;
6793 	cp->ppc_ppm = PPM(dip);
6794 	cp->ppc_dip = dip;
6795 
6796 	/*
6797 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6798 	 * Power Management stuff. DDI_RESUME also has to purge it's
6799 	 * powerlevel information.
6800 	 */
6801 	switch (cmd) {
6802 	case DDI_ATTACH:
6803 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6804 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6805 			power_req.req.ppm_config_req.who = dip;
6806 			ASSERT(PPM(dip));
6807 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6808 			    &power_req, &result);
6809 		}
6810 #ifdef DEBUG
6811 		else {
6812 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6813 			power_req.req.ppm_config_req.who = dip;
6814 			(void) pm_ctlops(NULL, dip,
6815 			    DDI_CTLOPS_POWER, &power_req, &result);
6816 		}
6817 #endif
6818 		break;
6819 	case DDI_RESUME:
6820 		pm_forget_power_level(dip);
6821 
6822 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6823 			power_req.request_type = PMR_PPM_PRE_RESUME;
6824 			power_req.req.resume_req.who = cp->ppc_dip;
6825 			power_req.req.resume_req.cmd =
6826 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6827 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6828 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6829 			    DDI_CTLOPS_POWER, &power_req, &result);
6830 		}
6831 #ifdef DEBUG
6832 		else {
6833 			power_req.request_type = PMR_PPM_PRE_RESUME;
6834 			power_req.req.resume_req.who = cp->ppc_dip;
6835 			power_req.req.resume_req.cmd =
6836 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6837 			(void) pm_ctlops(NULL, cp->ppc_dip,
6838 			    DDI_CTLOPS_POWER, &power_req, &result);
6839 		}
6840 #endif
6841 		break;
6842 
6843 	case DDI_PM_RESUME:
6844 		break;
6845 
6846 	default:
6847 		panic(me);
6848 	}
6849 }
6850 
6851 /*
6852  * Nexus drivers call into pm framework to indicate which child driver is
6853  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6854  * hardware since the device driver is no longer installed.
6855  */
6856 int
6857 pm_uninit_child(dev_info_t *dip)
6858 {
6859 	power_req_t power_req;
6860 
6861 	ASSERT(ddi_binding_name(dip));
6862 	ASSERT(ddi_get_name_addr(dip));
6863 	pm_ppm_claim(dip);
6864 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6865 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6866 		power_req.req.ppm_config_req.who = dip;
6867 		ASSERT(PPM(dip));
6868 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6869 		    NULL));
6870 	} else {
6871 #ifdef DEBUG
6872 		/* pass it to the default handler so we can debug things */
6873 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6874 		power_req.req.ppm_config_req.who = dip;
6875 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6876 #endif
6877 	}
6878 	return (DDI_SUCCESS);
6879 }
6880 /*
6881  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6882  * Also notify ppm of result of probe if there is a ppm that cares
6883  */
6884 void
6885 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
6886 {
6887 	_NOTE(ARGUNUSED(probe_failed))
6888 	int result;
6889 	power_req_t power_req;
6890 
6891 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6892 		power_req.request_type = PMR_PPM_POST_PROBE;
6893 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6894 		power_req.req.ppm_config_req.result = ret;
6895 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6896 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
6897 		    &power_req, &result);
6898 	}
6899 #ifdef DEBUG
6900 	else {
6901 		power_req.request_type = PMR_PPM_POST_PROBE;
6902 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6903 		power_req.req.ppm_config_req.result = ret;
6904 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
6905 		    &power_req, &result);
6906 	}
6907 #endif
6908 }
6909 
6910 void
6911 pm_post_config(dev_info_t *dip, char *devnm)
6912 {
6913 	PMD_FUNC(pmf, "post_config")
6914 
6915 	if (MDI_VHCI(dip)) {
6916 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6917 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
6918 		return;
6919 	} else if (!PM_GET_PM_INFO(dip))
6920 		return;
6921 
6922 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6923 	pm_rele_power(dip);
6924 }
6925 
6926 void
6927 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
6928 {
6929 	PMD_FUNC(pmf, "post_unconfig")
6930 
6931 	if (MDI_VHCI(dip)) {
6932 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
6933 		    PM_DEVICE(dip), held))
6934 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
6935 		return;
6936 	} else if (!PM_GET_PM_INFO(dip))
6937 		return;
6938 
6939 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
6940 	    held))
6941 	if (!held)
6942 		return;
6943 	/*
6944 	 * We have held power in pre_unconfig, release it here.
6945 	 */
6946 	pm_rele_power(dip);
6947 }
6948 
6949 /*
6950  * Notify ppm of result of attach if there is a ppm that cares
6951  */
6952 void
6953 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
6954 {
6955 	int result;
6956 	power_req_t power_req;
6957 	dev_info_t	*dip;
6958 
6959 	if (cp->ppc_cmd != DDI_ATTACH)
6960 		return;
6961 
6962 	dip = cp->ppc_dip;
6963 
6964 	if (ret == DDI_SUCCESS) {
6965 		/*
6966 		 * Attach succeeded, so proceed to doing post-attach pm tasks
6967 		 */
6968 		if (PM_GET_PM_INFO(dip) == NULL)
6969 			(void) pm_start(dip);
6970 	} else {
6971 		/*
6972 		 * Attach may have got pm started before failing
6973 		 */
6974 		pm_stop(dip);
6975 	}
6976 
6977 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6978 		power_req.request_type = PMR_PPM_POST_ATTACH;
6979 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6980 		power_req.req.ppm_config_req.result = ret;
6981 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6982 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6983 		    DDI_CTLOPS_POWER, &power_req, &result);
6984 	}
6985 #ifdef DEBUG
6986 	else {
6987 		power_req.request_type = PMR_PPM_POST_ATTACH;
6988 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6989 		power_req.req.ppm_config_req.result = ret;
6990 		(void) pm_ctlops(NULL, cp->ppc_dip,
6991 		    DDI_CTLOPS_POWER, &power_req, &result);
6992 	}
6993 #endif
6994 }
6995 
6996 /*
6997  * Notify ppm of attach action.  Parent is already held at full power by
6998  * probe action.
6999  */
7000 void
7001 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7002 {
7003 	int result;
7004 	power_req_t power_req;
7005 
7006 	bzero(cp, sizeof (*cp));
7007 	cp->ppc_dip = dip;
7008 	cp->ppc_cmd = (int)cmd;
7009 
7010 	switch (cmd) {
7011 	case DDI_DETACH:
7012 		pm_detaching(dip);		/* suspend pm while detaching */
7013 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7014 			power_req.request_type = PMR_PPM_PRE_DETACH;
7015 			power_req.req.ppm_config_req.who = dip;
7016 			ASSERT(PPM(dip));
7017 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7018 			    &power_req, &result);
7019 			cp->ppc_ppm = PPM(dip);
7020 		} else {
7021 #ifdef DEBUG
7022 			/* pass to the default handler so we can debug things */
7023 			power_req.request_type = PMR_PPM_PRE_DETACH;
7024 			power_req.req.ppm_config_req.who = dip;
7025 			(void) pm_ctlops(NULL, dip,
7026 			    DDI_CTLOPS_POWER, &power_req, &result);
7027 #endif
7028 			cp->ppc_ppm = NULL;
7029 		}
7030 		break;
7031 
7032 	default:
7033 		break;
7034 	}
7035 }
7036 
7037 /*
7038  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7039  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7040  * make an entry to record the details, which includes certain flag settings.
7041  */
7042 static void
7043 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7044     int wasvolpmd, major_t major)
7045 {
7046 	PMD_FUNC(pmf, "record_invol_path")
7047 	major_t pm_path_to_major(char *);
7048 	size_t plen;
7049 	pm_noinvol_t *ip, *np, *pp;
7050 	pp = NULL;
7051 
7052 	plen = strlen(path) + 1;
7053 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7054 	np->ni_size = plen;
7055 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7056 	np->ni_noinvolpm = noinvolpm;
7057 	np->ni_volpmd = volpmd;
7058 	np->ni_wasvolpmd = wasvolpmd;
7059 	np->ni_flags = flags;
7060 	(void) strcpy(np->ni_path, path);
7061 	/*
7062 	 * If we haven't actually seen the node attached, it is hard to figure
7063 	 * out its major.  If we could hold the node by path, we would be much
7064 	 * happier here.
7065 	 */
7066 	if (major == (major_t)-1) {
7067 		np->ni_major = pm_path_to_major(path);
7068 	} else {
7069 		np->ni_major = major;
7070 	}
7071 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7072 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7073 		int comp = strcmp(path, ip->ni_path);
7074 		if (comp < 0) {
7075 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7076 			    pmf, path, ip->ni_path))
7077 			/* insert before current entry */
7078 			np->ni_next = ip;
7079 			if (pp) {
7080 				pp->ni_next = np;
7081 			} else {
7082 				pm_noinvol_head = np;
7083 			}
7084 			rw_exit(&pm_noinvol_rwlock);
7085 #ifdef DEBUG
7086 			if (pm_debug & PMD_NOINVOL)
7087 				pr_noinvol("record_invol_path exit0");
7088 #endif
7089 			return;
7090 		} else if (comp == 0) {
7091 			panic("%s already in pm_noinvol list", path);
7092 		}
7093 	}
7094 	/*
7095 	 * If we did not find an entry in the list that this should go before,
7096 	 * then it must go at the end
7097 	 */
7098 	if (pp) {
7099 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7100 		    pp->ni_path))
7101 		ASSERT(pp->ni_next == 0);
7102 		pp->ni_next = np;
7103 	} else {
7104 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7105 		ASSERT(!pm_noinvol_head);
7106 		pm_noinvol_head = np;
7107 	}
7108 	rw_exit(&pm_noinvol_rwlock);
7109 #ifdef DEBUG
7110 	if (pm_debug & PMD_NOINVOL)
7111 		pr_noinvol("record_invol_path exit");
7112 #endif
7113 }
7114 
7115 void
7116 pm_record_invol(dev_info_t *dip)
7117 {
7118 	char *pathbuf;
7119 	int pm_all_components_off(dev_info_t *);
7120 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7121 
7122 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7123 	(void) ddi_pathname(dip, pathbuf);
7124 
7125 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7126 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7127 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7128 
7129 	/*
7130 	 * If this child's detach will be holding up its ancestors, then we
7131 	 * allow for an exception to that if all children of this type have
7132 	 * gone down voluntarily.
7133 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7134 	 */
7135 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7136 	    dip);
7137 	kmem_free(pathbuf, MAXPATHLEN);
7138 }
7139 
7140 void
7141 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7142 {
7143 	dev_info_t *dip = cp->ppc_dip;
7144 	int result;
7145 	power_req_t power_req;
7146 
7147 	switch (cp->ppc_cmd) {
7148 	case DDI_DETACH:
7149 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7150 			power_req.request_type = PMR_PPM_POST_DETACH;
7151 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7152 			power_req.req.ppm_config_req.result = ret;
7153 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7154 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7155 			    DDI_CTLOPS_POWER, &power_req, &result);
7156 		}
7157 #ifdef DEBUG
7158 		else {
7159 			power_req.request_type = PMR_PPM_POST_DETACH;
7160 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7161 			power_req.req.ppm_config_req.result = ret;
7162 			(void) pm_ctlops(NULL, cp->ppc_dip,
7163 			    DDI_CTLOPS_POWER, &power_req, &result);
7164 		}
7165 #endif
7166 		if (ret == DDI_SUCCESS) {
7167 			/*
7168 			 * For hotplug detach we assume it is *really* gone
7169 			 */
7170 			if (cp->ppc_cmd == DDI_DETACH &&
7171 			    ((DEVI(dip)->devi_pm_flags &
7172 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7173 			    DEVI(dip)->devi_pm_noinvolpm))
7174 				pm_record_invol(dip);
7175 			DEVI(dip)->devi_pm_flags &=
7176 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7177 
7178 			/*
7179 			 * If console fb is detaching, then we don't need to
7180 			 * worry any more about it going off (pm_detaching has
7181 			 * brought up all components)
7182 			 */
7183 			if (PM_IS_CFB(dip)) {
7184 				mutex_enter(&pm_cfb_lock);
7185 				ASSERT(cfb_dip_detaching);
7186 				ASSERT(cfb_dip == NULL);
7187 				ASSERT(pm_cfb_comps_off == 0);
7188 				cfb_dip_detaching = NULL;
7189 				mutex_exit(&pm_cfb_lock);
7190 			}
7191 			pm_stop(dip);	/* make it permanent */
7192 		} else {
7193 			if (PM_IS_CFB(dip)) {
7194 				mutex_enter(&pm_cfb_lock);
7195 				ASSERT(cfb_dip_detaching);
7196 				ASSERT(cfb_dip == NULL);
7197 				ASSERT(pm_cfb_comps_off == 0);
7198 				cfb_dip = cfb_dip_detaching;
7199 				cfb_dip_detaching = NULL;
7200 				mutex_exit(&pm_cfb_lock);
7201 			}
7202 			pm_detach_failed(dip);	/* resume power management */
7203 		}
7204 		break;
7205 	case DDI_PM_SUSPEND:
7206 		break;
7207 	case DDI_SUSPEND:
7208 		break;				/* legal, but nothing to do */
7209 	default:
7210 #ifdef DEBUG
7211 		panic("pm_post_detach: unrecognized cmd %d for detach",
7212 		    cp->ppc_cmd);
7213 		/*NOTREACHED*/
7214 #else
7215 		break;
7216 #endif
7217 	}
7218 }
7219 
7220 /*
7221  * Called after vfs_mountroot has got the clock started to fix up timestamps
7222  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7223  * devices look busy but have a 0 busycnt
7224  */
7225 int
7226 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7227 {
7228 	_NOTE(ARGUNUSED(arg))
7229 
7230 	pm_info_t *info = PM_GET_PM_INFO(dip);
7231 	struct pm_component *cp;
7232 	int i;
7233 
7234 	if (!info)
7235 		return (DDI_WALK_CONTINUE);
7236 	PM_LOCK_BUSY(dip);
7237 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7238 		cp = PM_CP(dip, i);
7239 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7240 			cp->pmc_timestamp = gethrestime_sec();
7241 	}
7242 	PM_UNLOCK_BUSY(dip);
7243 	return (DDI_WALK_CONTINUE);
7244 }
7245 
7246 /*
7247  * Called at attach time to see if the device being attached has a record in
7248  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7249  * parents and set a flag in the dip
7250  */
7251 void
7252 pm_noinvol_specd(dev_info_t *dip)
7253 {
7254 	PMD_FUNC(pmf, "noinvol_specd")
7255 	char *pathbuf;
7256 	pm_noinvol_t *ip, *pp = NULL;
7257 	int wasvolpmd;
7258 	int found = 0;
7259 
7260 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7261 		return;
7262 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7263 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7264 	(void) ddi_pathname(dip, pathbuf);
7265 
7266 	PM_LOCK_DIP(dip);
7267 	DEVI(dip)->devi_pm_volpmd = 0;
7268 	DEVI(dip)->devi_pm_noinvolpm = 0;
7269 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7270 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7271 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7272 		    pmf, pathbuf, ip->ni_path))
7273 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7274 			found++;
7275 			break;
7276 		}
7277 	}
7278 	rw_exit(&pm_noinvol_rwlock);
7279 	if (!found) {
7280 		PM_UNLOCK_DIP(dip);
7281 		kmem_free(pathbuf, MAXPATHLEN);
7282 		return;
7283 	}
7284 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7285 	pp = NULL;
7286 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7287 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7288 		    pmf, pathbuf, ip->ni_path))
7289 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7290 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7291 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7292 			/*
7293 			 * Handle special case of console fb
7294 			 */
7295 			if (PM_IS_CFB(dip)) {
7296 				mutex_enter(&pm_cfb_lock);
7297 				cfb_dip = dip;
7298 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7299 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7300 				mutex_exit(&pm_cfb_lock);
7301 			}
7302 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7303 			ASSERT((DEVI(dip)->devi_pm_flags &
7304 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7305 			    DEVI(dip)->devi_pm_noinvolpm);
7306 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7307 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7308 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7309 			    ip->ni_noinvolpm, ip->ni_volpmd,
7310 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7311 			/*
7312 			 * free the entry in hopes the list will now be empty
7313 			 * and we won't have to search it any more until the
7314 			 * device detaches
7315 			 */
7316 			if (pp) {
7317 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7318 				    pmf, ip->ni_path, pp->ni_path))
7319 				pp->ni_next = ip->ni_next;
7320 			} else {
7321 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7322 				    pmf, ip->ni_path))
7323 				ASSERT(pm_noinvol_head == ip);
7324 				pm_noinvol_head = ip->ni_next;
7325 			}
7326 			PM_UNLOCK_DIP(dip);
7327 			wasvolpmd = ip->ni_wasvolpmd;
7328 			rw_exit(&pm_noinvol_rwlock);
7329 			kmem_free(ip->ni_path, ip->ni_size);
7330 			kmem_free(ip, sizeof (*ip));
7331 			/*
7332 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7333 			 * (and volpmd if appropriate)
7334 			 */
7335 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7336 			    wasvolpmd, pathbuf, dip);
7337 #ifdef DEBUG
7338 			if (pm_debug & PMD_NOINVOL)
7339 				pr_noinvol("noinvol_specd exit");
7340 #endif
7341 			kmem_free(pathbuf, MAXPATHLEN);
7342 			return;
7343 		}
7344 	}
7345 	kmem_free(pathbuf, MAXPATHLEN);
7346 	rw_exit(&pm_noinvol_rwlock);
7347 	PM_UNLOCK_DIP(dip);
7348 }
7349 
7350 int
7351 pm_all_components_off(dev_info_t *dip)
7352 {
7353 	int i;
7354 	pm_component_t *cp;
7355 
7356 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7357 		cp = PM_CP(dip, i);
7358 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7359 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7360 			return (0);
7361 	}
7362 	return (1);	/* all off */
7363 }
7364 
7365 /*
7366  * Make sure that all "no involuntary power cycles" devices are attached.
7367  * Called before doing a cpr suspend to make sure the driver has a say about
7368  * the power cycle
7369  */
7370 int
7371 pm_reattach_noinvol(void)
7372 {
7373 	PMD_FUNC(pmf, "reattach_noinvol")
7374 	pm_noinvol_t *ip;
7375 	char *path;
7376 	dev_info_t *dip;
7377 
7378 	/*
7379 	 * Prevent the modunload thread from unloading any modules until we
7380 	 * have completely stopped all kernel threads.
7381 	 */
7382 	modunload_disable();
7383 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7384 		/*
7385 		 * Forget we'v ever seen any entry
7386 		 */
7387 		ip->ni_persistent = 0;
7388 	}
7389 restart:
7390 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7391 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7392 		major_t maj;
7393 		maj = ip->ni_major;
7394 		path = ip->ni_path;
7395 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7396 			if (ip->ni_persistent) {
7397 				/*
7398 				 * If we weren't able to make this entry
7399 				 * go away, then we give up, as
7400 				 * holding/attaching the driver ought to have
7401 				 * resulted in this entry being deleted
7402 				 */
7403 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7404 				    "(%s|%d)\n", pmf, ip->ni_path,
7405 				    ddi_major_to_name(maj), (int)maj))
7406 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7407 				    ip->ni_path);
7408 				modunload_enable();
7409 				rw_exit(&pm_noinvol_rwlock);
7410 				return (0);
7411 			}
7412 			ip->ni_persistent++;
7413 			rw_exit(&pm_noinvol_rwlock);
7414 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7415 			dip = e_ddi_hold_devi_by_path(path, 0);
7416 			if (dip == NULL) {
7417 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7418 				    pmf, path, (int)maj))
7419 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7420 				    "driver", path);
7421 				modunload_enable();
7422 				return (0);
7423 			} else {
7424 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7425 				/*
7426 				 * Since the modunload thread is stopped, we
7427 				 * don't have to keep the driver held, which
7428 				 * saves a ton of bookkeeping
7429 				 */
7430 				ddi_release_devi(dip);
7431 				goto restart;
7432 			}
7433 		} else {
7434 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7435 			    pmf, ip->ni_path))
7436 			continue;
7437 		}
7438 	}
7439 	rw_exit(&pm_noinvol_rwlock);
7440 	return (1);
7441 }
7442 
7443 void
7444 pm_reattach_noinvol_fini(void)
7445 {
7446 	modunload_enable();
7447 }
7448 
7449 /*
7450  * Display pm support code
7451  */
7452 
7453 
7454 /*
7455  * console frame-buffer power-mgmt gets enabled when debugging
7456  * services are not present or console fbpm override is set
7457  */
7458 void
7459 pm_cfb_setup(const char *stdout_path)
7460 {
7461 	PMD_FUNC(pmf, "cfb_setup")
7462 	extern int obpdebug;
7463 	char *devname;
7464 	dev_info_t *dip;
7465 	int devname_len;
7466 	extern dev_info_t *fbdip;
7467 
7468 	/*
7469 	 * By virtue of this function being called (from consconfig),
7470 	 * we know stdout is a framebuffer.
7471 	 */
7472 	stdout_is_framebuffer = 1;
7473 
7474 	if (obpdebug || (boothowto & RB_DEBUG)) {
7475 		if (pm_cfb_override == 0) {
7476 			/*
7477 			 * Console is frame buffer, but we want to suppress
7478 			 * pm on it because of debugging setup
7479 			 */
7480 			pm_cfb_enabled = 0;
7481 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7482 			    "console power management.");
7483 			/*
7484 			 * however, we still need to know which is the console
7485 			 * fb in order to suppress pm on it
7486 			 */
7487 		} else {
7488 			cmn_err(CE_WARN, "Kernel debugger present: see "
7489 			    "kmdb(1M) for interaction with power management.");
7490 		}
7491 	}
7492 #ifdef DEBUG
7493 	/*
7494 	 * IF console is fb and is power managed, don't do prom_printfs from
7495 	 * pm debug macro
7496 	 */
7497 	if (pm_cfb_enabled) {
7498 		if (pm_debug)
7499 			prom_printf("pm debug output will be to log only\n");
7500 		pm_divertdebug++;
7501 	}
7502 #endif
7503 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7504 	devname_len = strlen(devname) + 1;
7505 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7506 	/* if the driver is attached */
7507 	if ((dip = fbdip) != NULL) {
7508 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7509 		    PM_DEVICE(dip)))
7510 		/*
7511 		 * We set up here as if the driver were power manageable in case
7512 		 * we get a later attach of a pm'able driver (which would result
7513 		 * in a panic later)
7514 		 */
7515 		cfb_dip = dip;
7516 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7517 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7518 		    PM_DEVICE(dip)))
7519 #ifdef DEBUG
7520 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7521 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7522 			    pmf, PM_DEVICE(dip)))
7523 		}
7524 #endif
7525 	} else {
7526 		char *ep;
7527 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7528 		pm_record_invol_path(devname,
7529 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7530 		    (major_t)-1);
7531 		for (ep = strrchr(devname, '/'); ep != devname;
7532 		    ep = strrchr(devname, '/')) {
7533 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7534 			*ep = '\0';
7535 			dip = pm_name_to_dip(devname, 0);
7536 			if (dip != NULL) {
7537 				/*
7538 				 * Walk up the tree incrementing
7539 				 * devi_pm_noinvolpm
7540 				 */
7541 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7542 				    0, 0, devname, dip);
7543 				break;
7544 			} else {
7545 				pm_record_invol_path(devname,
7546 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7547 			}
7548 		}
7549 	}
7550 	kmem_free(devname, devname_len);
7551 }
7552 
7553 void
7554 pm_cfb_rele(void)
7555 {
7556 	mutex_enter(&pm_cfb_lock);
7557 	/*
7558 	 * this call isn't using the console any  more, it is ok to take it
7559 	 * down if the count goes to 0
7560 	 */
7561 	cfb_inuse--;
7562 	mutex_exit(&pm_cfb_lock);
7563 }
7564 
7565 /*
7566  * software interrupt handler for fbpm; this function exists because we can't
7567  * bring up the frame buffer power from above lock level.  So if we need to,
7568  * we instead schedule a softint that runs this routine and takes us into
7569  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7570  */
7571 static uint_t
7572 pm_cfb_softint(caddr_t int_handler_arg)
7573 {
7574 	_NOTE(ARGUNUSED(int_handler_arg))
7575 	int rval = DDI_INTR_UNCLAIMED;
7576 
7577 	mutex_enter(&pm_cfb_lock);
7578 	if (pm_soft_pending) {
7579 		mutex_exit(&pm_cfb_lock);
7580 		debug_enter((char *)NULL);
7581 		/* acquired in debug_enter before calling pm_cfb_trigger */
7582 		pm_cfb_rele();
7583 		mutex_enter(&pm_cfb_lock);
7584 		pm_soft_pending = 0;
7585 		mutex_exit(&pm_cfb_lock);
7586 		rval = DDI_INTR_CLAIMED;
7587 	} else
7588 		mutex_exit(&pm_cfb_lock);
7589 
7590 	return (rval);
7591 }
7592 
7593 void
7594 pm_cfb_setup_intr(void)
7595 {
7596 	PMD_FUNC(pmf, "cfb_setup_intr")
7597 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7598 	void pm_cfb_check_and_powerup(void);
7599 
7600 	if (!stdout_is_framebuffer) {
7601 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7602 		return;
7603 	}
7604 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7605 #ifdef DEBUG
7606 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7607 #endif
7608 	/*
7609 	 * setup software interrupt handler
7610 	 */
7611 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7612 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7613 		panic("pm: unable to register soft intr.");
7614 
7615 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7616 }
7617 
7618 /*
7619  * Checks to see if it is safe to write to the console wrt power management
7620  * (i.e. if the console is a framebuffer, then it must be at full power)
7621  * returns 1 when power is off (power-up is needed)
7622  * returns 0 when power is on (power-up not needed)
7623  */
7624 int
7625 pm_cfb_check_and_hold(void)
7626 {
7627 	/*
7628 	 * cfb_dip is set iff console is a power manageable frame buffer
7629 	 * device
7630 	 */
7631 	extern int modrootloaded;
7632 
7633 	mutex_enter(&pm_cfb_lock);
7634 	cfb_inuse++;
7635 	ASSERT(cfb_inuse);	/* wrap? */
7636 	if (modrootloaded && cfb_dip) {
7637 		/*
7638 		 * don't power down the frame buffer, the prom is using it
7639 		 */
7640 		if (pm_cfb_comps_off) {
7641 			mutex_exit(&pm_cfb_lock);
7642 			return (1);
7643 		}
7644 	}
7645 	mutex_exit(&pm_cfb_lock);
7646 	return (0);
7647 }
7648 
7649 /*
7650  * turn on cfb power (which is known to be off).
7651  * Must be called below lock level!
7652  */
7653 void
7654 pm_cfb_powerup(void)
7655 {
7656 	pm_info_t *info;
7657 	int norm;
7658 	int ccount, ci;
7659 	int unused;
7660 #ifdef DEBUG
7661 	/*
7662 	 * Can't reenter prom_prekern, so suppress pm debug messages
7663 	 * (still go to circular buffer).
7664 	 */
7665 	mutex_enter(&pm_debug_lock);
7666 	pm_divertdebug++;
7667 	mutex_exit(&pm_debug_lock);
7668 #endif
7669 	info = PM_GET_PM_INFO(cfb_dip);
7670 	ASSERT(info);
7671 
7672 	ccount = PM_NUMCMPTS(cfb_dip);
7673 	for (ci = 0; ci < ccount; ci++) {
7674 		norm = pm_get_normal_power(cfb_dip, ci);
7675 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7676 		    PM_CANBLOCK_BYPASS, 0, &unused);
7677 	}
7678 #ifdef DEBUG
7679 	mutex_enter(&pm_debug_lock);
7680 	pm_divertdebug--;
7681 	mutex_exit(&pm_debug_lock);
7682 #endif
7683 }
7684 
7685 /*
7686  * Check if the console framebuffer is powered up.  If not power it up.
7687  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7688  * must be released by calling pm_cfb_rele when the console fb operation
7689  * is completed.
7690  */
7691 void
7692 pm_cfb_check_and_powerup(void)
7693 {
7694 	if (pm_cfb_check_and_hold())
7695 		pm_cfb_powerup();
7696 }
7697 
7698 /*
7699  * Trigger a low level interrupt to power up console frame buffer.
7700  */
7701 void
7702 pm_cfb_trigger(void)
7703 {
7704 	if (cfb_dip == NULL)
7705 		return;
7706 
7707 	mutex_enter(&pm_cfb_lock);
7708 	/*
7709 	 * If machine appears to be hung, pulling the keyboard connector of
7710 	 * the console will cause a high level interrupt and go to debug_enter.
7711 	 * But, if the fb is powered down, this routine will be called to bring
7712 	 * it up (by generating a softint to do the work).  If soft interrupts
7713 	 * are not running, and the keyboard connector is pulled again, the
7714 	 * following code detects this condition and calls panic which allows
7715 	 * the fb to be brought up from high level.
7716 	 *
7717 	 * If two nearly simultaneous calls to debug_enter occur (both from
7718 	 * high level) the code described above will cause a panic.
7719 	 */
7720 	if (lbolt <= pm_soft_pending) {
7721 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7722 		panic(panicstr);	/* does a power up at any intr level */
7723 		/* NOTREACHED */
7724 	}
7725 	pm_soft_pending = lbolt;
7726 	mutex_exit(&pm_cfb_lock);
7727 	ddi_trigger_softintr(pm_soft_id);
7728 }
7729 
7730 major_t
7731 pm_path_to_major(char *path)
7732 {
7733 	PMD_FUNC(pmf, "path_to_major")
7734 	char *np, *ap, *bp;
7735 	major_t ret;
7736 	size_t len;
7737 	static major_t i_path_to_major(char *, char *);
7738 
7739 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7740 
7741 	np = strrchr(path, '/');
7742 	if (np != NULL)
7743 		np++;
7744 	else
7745 		np = path;
7746 	len = strlen(np) + 1;
7747 	bp = kmem_alloc(len, KM_SLEEP);
7748 	(void) strcpy(bp, np);
7749 	if ((ap = strchr(bp, '@')) != NULL) {
7750 		*ap = '\0';
7751 	}
7752 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7753 	ret = i_path_to_major(path, np);
7754 	kmem_free(bp, len);
7755 	return (ret);
7756 }
7757 
7758 #ifdef DEBUG
7759 
7760 char *pm_msgp;
7761 char *pm_bufend;
7762 char *pm_msgbuf = NULL;
7763 int   pm_logpages = 2;
7764 
7765 #define	PMLOGPGS	pm_logpages
7766 
7767 /*PRINTFLIKE1*/
7768 void
7769 pm_log(const char *fmt, ...)
7770 {
7771 	va_list adx;
7772 	size_t size;
7773 
7774 	mutex_enter(&pm_debug_lock);
7775 	if (pm_msgbuf == NULL) {
7776 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7777 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7778 		pm_msgp = pm_msgbuf;
7779 	}
7780 	va_start(adx, fmt);
7781 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7782 	va_end(adx);
7783 	va_start(adx, fmt);
7784 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7785 		bzero(pm_msgp, pm_bufend - pm_msgp);
7786 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7787 		if (!pm_divertdebug)
7788 			prom_printf("%s", pm_msgp);
7789 		pm_msgp = pm_msgbuf + size;
7790 	} else {
7791 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7792 		if (!pm_divertdebug)
7793 			prom_printf("%s", pm_msgp);
7794 		pm_msgp += size;
7795 	}
7796 	va_end(adx);
7797 	mutex_exit(&pm_debug_lock);
7798 }
7799 #endif	/* DEBUG */
7800 
7801 /*
7802  * We want to save the state of any directly pm'd devices over the suspend/
7803  * resume process so that we can put them back the way the controlling
7804  * process left them.
7805  */
7806 void
7807 pm_save_direct_levels(void)
7808 {
7809 	pm_processes_stopped = 1;
7810 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7811 }
7812 
7813 static int
7814 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7815 {
7816 	_NOTE(ARGUNUSED(arg))
7817 	int i;
7818 	int *ip;
7819 	pm_info_t *info = PM_GET_PM_INFO(dip);
7820 
7821 	if (!info)
7822 		return (DDI_WALK_CONTINUE);
7823 
7824 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7825 		if (PM_NUMCMPTS(dip) > 2) {
7826 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7827 			    sizeof (int), KM_SLEEP);
7828 			ip = info->pmi_lp;
7829 		} else {
7830 			ip = info->pmi_levels;
7831 		}
7832 		/* autopm and processes are stopped, ok not to lock power */
7833 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7834 			*ip++ = PM_CURPOWER(dip, i);
7835 		/*
7836 		 * There is a small window between stopping the
7837 		 * processes and setting pm_processes_stopped where
7838 		 * a driver could get hung up in a pm_raise_power()
7839 		 * call.  Free any such driver now.
7840 		 */
7841 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7842 	}
7843 
7844 	return (DDI_WALK_CONTINUE);
7845 }
7846 
7847 void
7848 pm_restore_direct_levels(void)
7849 {
7850 	/*
7851 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
7852 	 * threads failed) then we don't want to try to restore them
7853 	 */
7854 	if (!pm_processes_stopped)
7855 		return;
7856 
7857 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
7858 	pm_processes_stopped = 0;
7859 }
7860 
7861 static int
7862 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
7863 {
7864 	_NOTE(ARGUNUSED(arg))
7865 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
7866 	int i, nc, result;
7867 	int *ip;
7868 
7869 	pm_info_t *info = PM_GET_PM_INFO(dip);
7870 	if (!info)
7871 		return (DDI_WALK_CONTINUE);
7872 
7873 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7874 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
7875 			ip = &info->pmi_lp[nc - 1];
7876 		} else {
7877 			ip = &info->pmi_levels[nc - 1];
7878 		}
7879 		/*
7880 		 * Because fb drivers fail attempts to turn off the
7881 		 * fb when the monitor is on, but treat a request to
7882 		 * turn on the monitor as a request to turn on the
7883 		 * fb too, we process components in descending order
7884 		 * Because autopm is disabled and processes aren't
7885 		 * running, it is ok to examine current power outside
7886 		 * of the power lock
7887 		 */
7888 		for (i = nc - 1; i >= 0; i--, ip--) {
7889 			if (PM_CURPOWER(dip, i) == *ip)
7890 				continue;
7891 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
7892 			    PM_CANBLOCK_BYPASS, 0, &result) !=
7893 				DDI_SUCCESS) {
7894 				cmn_err(CE_WARN, "cpr: unable "
7895 				    "to restore power level of "
7896 				    "component %d of directly "
7897 				    "power manged device %s@%s"
7898 				    " to %d",
7899 				    i, PM_NAME(dip),
7900 				    PM_ADDR(dip), *ip);
7901 				PMD(PMD_FAIL, ("%s: failed to restore "
7902 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
7903 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
7904 				    PM_CURPOWER(dip, i), *ip, result))
7905 			}
7906 		}
7907 		if (nc > 2) {
7908 			kmem_free(info->pmi_lp, nc * sizeof (int));
7909 			info->pmi_lp = NULL;
7910 		}
7911 	}
7912 	return (DDI_WALK_CONTINUE);
7913 }
7914 
7915 /*
7916  * Stolen from the bootdev module
7917  * attempt to convert a path to a major number
7918  */
7919 static major_t
7920 i_path_to_major(char *path, char *leaf_name)
7921 {
7922 	extern major_t path_to_major(char *pathname);
7923 	major_t maj;
7924 
7925 	if ((maj = path_to_major(path)) == (major_t)-1) {
7926 		maj = ddi_name_to_major(leaf_name);
7927 	}
7928 
7929 	return (maj);
7930 }
7931 
7932 /*
7933  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
7934  * An entry in the list means that the device is detached, so we need to
7935  * adjust its ancestors as if they had just seen this attach, and any detached
7936  * ancestors need to have their list entries adjusted.
7937  */
7938 void
7939 pm_driver_removed(major_t major)
7940 {
7941 	static void i_pm_driver_removed(major_t major);
7942 
7943 	/*
7944 	 * Serialize removal of drivers. This is to keep ancestors of
7945 	 * a node that is being deleted from getting deleted and added back
7946 	 * with different counters.
7947 	 */
7948 	mutex_enter(&pm_remdrv_lock);
7949 	i_pm_driver_removed(major);
7950 	mutex_exit(&pm_remdrv_lock);
7951 }
7952 
7953 /*
7954  * This routine is called recursively by pm_noinvol_process_ancestors()
7955  */
7956 static void
7957 i_pm_driver_removed(major_t major)
7958 {
7959 	PMD_FUNC(pmf, "driver_removed")
7960 	static void adjust_ancestors(char *, int);
7961 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
7962 	static void pm_noinvol_process_ancestors(char *);
7963 	pm_noinvol_t *ip, *pp = NULL;
7964 	int wasvolpmd;
7965 	ASSERT(major != (major_t)-1);
7966 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
7967 again:
7968 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7969 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7970 		if (major != ip->ni_major)
7971 			continue;
7972 		/*
7973 		 * If it is an ancestor of no-invol node, which is
7974 		 * not removed, skip it. This is to cover the case of
7975 		 * ancestor removed without removing its descendants.
7976 		 */
7977 		if (pm_is_noinvol_ancestor(ip)) {
7978 			ip->ni_flags |= PMC_DRIVER_REMOVED;
7979 			continue;
7980 		}
7981 		wasvolpmd = ip->ni_wasvolpmd;
7982 		/*
7983 		 * remove the entry from the list
7984 		 */
7985 		if (pp) {
7986 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
7987 			    pmf, ip->ni_path, pp->ni_path))
7988 			pp->ni_next = ip->ni_next;
7989 		} else {
7990 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
7991 			    ip->ni_path))
7992 			ASSERT(pm_noinvol_head == ip);
7993 			pm_noinvol_head = ip->ni_next;
7994 		}
7995 		rw_exit(&pm_noinvol_rwlock);
7996 		adjust_ancestors(ip->ni_path, wasvolpmd);
7997 		/*
7998 		 * Had an ancestor been removed before this node, it would have
7999 		 * been skipped. Adjust the no-invol counters for such skipped
8000 		 * ancestors.
8001 		 */
8002 		pm_noinvol_process_ancestors(ip->ni_path);
8003 		kmem_free(ip->ni_path, ip->ni_size);
8004 		kmem_free(ip, sizeof (*ip));
8005 		goto again;
8006 	}
8007 	rw_exit(&pm_noinvol_rwlock);
8008 }
8009 
8010 /*
8011  * returns 1, if *aip is a ancestor of a no-invol node
8012  *	   0, otherwise
8013  */
8014 static int
8015 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8016 {
8017 	pm_noinvol_t *ip;
8018 
8019 	ASSERT(strlen(aip->ni_path) != 0);
8020 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8021 		if (ip == aip)
8022 			continue;
8023 		/*
8024 		 * To be an ancestor, the path must be an initial substring of
8025 		 * the descendent, and end just before a '/' in the
8026 		 * descendent's path.
8027 		 */
8028 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8029 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8030 			return (1);
8031 	}
8032 	return (0);
8033 }
8034 
8035 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8036 /*
8037  * scan through the pm_noinvolpm list adjusting ancestors of the current
8038  * node;  Modifies string *path.
8039  */
8040 static void
8041 adjust_ancestors(char *path, int wasvolpmd)
8042 {
8043 	PMD_FUNC(pmf, "adjust_ancestors")
8044 	char *cp;
8045 	pm_noinvol_t *lp;
8046 	pm_noinvol_t *pp = NULL;
8047 	major_t locked = (major_t)UINT_MAX;
8048 	dev_info_t *dip;
8049 	char	*pathbuf;
8050 	size_t pathbuflen = strlen(path) + 1;
8051 
8052 	/*
8053 	 * First we look up the ancestor's dip.  If we find it, then we
8054 	 * adjust counts up the tree
8055 	 */
8056 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8057 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8058 	(void) strcpy(pathbuf, path);
8059 	cp = strrchr(pathbuf, '/');
8060 	if (cp == NULL)	{
8061 		/* if no ancestors, then nothing to do */
8062 		kmem_free(pathbuf, pathbuflen);
8063 		return;
8064 	}
8065 	*cp = '\0';
8066 	dip = pm_name_to_dip(pathbuf, 1);
8067 	if (dip != NULL) {
8068 		locked = PM_MAJOR(dip);
8069 
8070 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8071 		    path, dip);
8072 
8073 		if (locked != (major_t)UINT_MAX)
8074 			ddi_release_devi(dip);
8075 	} else {
8076 		char *apath;
8077 		size_t len = strlen(pathbuf) + 1;
8078 		int  lock_held = 1;
8079 
8080 		/*
8081 		 * Now check for ancestors that exist only in the list
8082 		 */
8083 		apath = kmem_alloc(len, KM_SLEEP);
8084 		(void) strcpy(apath, pathbuf);
8085 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8086 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8087 			/*
8088 			 * This can only happen once.  Since we have to drop
8089 			 * the lock, we need to extract the relevant info.
8090 			 */
8091 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8092 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8093 				    lp->ni_path, lp->ni_noinvolpm,
8094 				    lp->ni_noinvolpm - 1))
8095 				lp->ni_noinvolpm--;
8096 				if (wasvolpmd && lp->ni_volpmd) {
8097 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8098 					    "%d\n", pmf, lp->ni_path,
8099 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8100 					lp->ni_volpmd--;
8101 				}
8102 				/*
8103 				 * remove the entry from the list, if there
8104 				 * are no more no-invol descendants and node
8105 				 * itself is not a no-invol node.
8106 				 */
8107 				if (!(lp->ni_noinvolpm ||
8108 				    (lp->ni_flags & PMC_NO_INVOL))) {
8109 					ASSERT(lp->ni_volpmd == 0);
8110 					if (pp) {
8111 						PMD(PMD_NOINVOL, ("%s: freeing "
8112 						    "%s, prev is %s\n", pmf,
8113 						    lp->ni_path, pp->ni_path))
8114 						pp->ni_next = lp->ni_next;
8115 					} else {
8116 						PMD(PMD_NOINVOL, ("%s: free %s "
8117 						    "head\n", pmf, lp->ni_path))
8118 						ASSERT(pm_noinvol_head == lp);
8119 						pm_noinvol_head = lp->ni_next;
8120 					}
8121 					lock_held = 0;
8122 					rw_exit(&pm_noinvol_rwlock);
8123 					adjust_ancestors(apath, wasvolpmd);
8124 					/* restore apath */
8125 					(void) strcpy(apath, pathbuf);
8126 					kmem_free(lp->ni_path, lp->ni_size);
8127 					kmem_free(lp, sizeof (*lp));
8128 				}
8129 				break;
8130 			}
8131 		}
8132 		if (lock_held)
8133 			rw_exit(&pm_noinvol_rwlock);
8134 		adjust_ancestors(apath, wasvolpmd);
8135 		kmem_free(apath, len);
8136 	}
8137 	kmem_free(pathbuf, pathbuflen);
8138 }
8139 
8140 /*
8141  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8142  * which were skipped even though their drivers were removed.
8143  */
8144 static void
8145 pm_noinvol_process_ancestors(char *path)
8146 {
8147 	pm_noinvol_t *lp;
8148 
8149 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8150 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8151 		if (strstr(path, lp->ni_path) &&
8152 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8153 			rw_exit(&pm_noinvol_rwlock);
8154 			i_pm_driver_removed(lp->ni_major);
8155 			return;
8156 		}
8157 	}
8158 	rw_exit(&pm_noinvol_rwlock);
8159 }
8160 
8161 /*
8162  * Returns true if (detached) device needs to be kept up because it exported the
8163  * "no-involuntary-power-cycles" property or we're pretending it did (console
8164  * fb case) or it is an ancestor of such a device and has used up the "one
8165  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8166  * upon detach.  In any event, we need an exact hit on the path or we return
8167  * false.
8168  */
8169 int
8170 pm_noinvol_detached(char *path)
8171 {
8172 	PMD_FUNC(pmf, "noinvol_detached")
8173 	pm_noinvol_t *ip;
8174 	int ret = 0;
8175 
8176 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8177 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8178 		if (strcmp(path, ip->ni_path) == 0) {
8179 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8180 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8181 				    "%s\n", pmf, path))
8182 				ret = 1;
8183 				break;
8184 			}
8185 #ifdef	DEBUG
8186 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8187 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8188 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8189 				    path))
8190 #endif
8191 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8192 			break;
8193 		}
8194 	}
8195 	rw_exit(&pm_noinvol_rwlock);
8196 	return (ret);
8197 }
8198 
8199 int
8200 pm_is_cfb(dev_info_t *dip)
8201 {
8202 	return (dip == cfb_dip);
8203 }
8204 
8205 #ifdef	DEBUG
8206 /*
8207  * Return true if all components of the console frame buffer are at
8208  * "normal" power, i.e., fully on.  For the case where the console is not
8209  * a framebuffer, we also return true
8210  */
8211 int
8212 pm_cfb_is_up(void)
8213 {
8214 	return (pm_cfb_comps_off == 0);
8215 }
8216 #endif
8217 
8218 /*
8219  * Preventing scan from powering down the node by incrementing the
8220  * kidsupcnt.
8221  */
8222 void
8223 pm_hold_power(dev_info_t *dip)
8224 {
8225 	e_pm_hold_rele_power(dip, 1);
8226 }
8227 
8228 /*
8229  * Releasing the hold by decrementing the kidsupcnt allowing scan
8230  * to power down the node if all conditions are met.
8231  */
8232 void
8233 pm_rele_power(dev_info_t *dip)
8234 {
8235 	e_pm_hold_rele_power(dip, -1);
8236 }
8237 
8238 /*
8239  * A wrapper of pm_all_to_normal() to power up a dip
8240  * to its normal level
8241  */
8242 int
8243 pm_powerup(dev_info_t *dip)
8244 {
8245 	PMD_FUNC(pmf, "pm_powerup")
8246 
8247 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8248 	ASSERT(!(servicing_interrupt()));
8249 
8250 	/*
8251 	 * in case this node is not already participating pm
8252 	 */
8253 	if (!PM_GET_PM_INFO(dip)) {
8254 		if (!DEVI_IS_ATTACHING(dip))
8255 			return (DDI_SUCCESS);
8256 		if (pm_start(dip) != DDI_SUCCESS)
8257 			return (DDI_FAILURE);
8258 		if (!PM_GET_PM_INFO(dip))
8259 			return (DDI_SUCCESS);
8260 	}
8261 
8262 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8263 }
8264 
8265 int
8266 pm_rescan_walk(dev_info_t *dip, void *arg)
8267 {
8268 	_NOTE(ARGUNUSED(arg))
8269 
8270 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8271 		return (DDI_WALK_CONTINUE);
8272 
8273 	/*
8274 	 * Currently pm_cpr_callb/resume code is the only caller
8275 	 * and it needs to make sure that stopped scan get
8276 	 * reactivated. Otherwise, rescan walk needn't reactive
8277 	 * stopped scan.
8278 	 */
8279 	pm_scan_init(dip);
8280 
8281 	(void) pm_rescan(dip);
8282 	return (DDI_WALK_CONTINUE);
8283 }
8284 
8285 static dev_info_t *
8286 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8287 {
8288 	dev_info_t *wdip, *pdip;
8289 
8290 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8291 		pdip = ddi_get_parent(wdip);
8292 		if (pdip == dip)
8293 			return (wdip);
8294 	}
8295 	return (NULL);
8296 }
8297 
8298 int
8299 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8300     void *arg, void *result)
8301 {
8302 	PMD_FUNC(pmf, "bp_bus_power")
8303 	dev_info_t	*cdip;
8304 	pm_info_t	*cinfo;
8305 	pm_bp_child_pwrchg_t	*bpc;
8306 	pm_sp_misc_t		*pspm;
8307 	pm_bp_nexus_pwrup_t *bpn;
8308 	pm_bp_child_pwrchg_t new_bpc;
8309 	pm_bp_noinvol_t *bpi;
8310 	dev_info_t *tdip;
8311 	char *pathbuf;
8312 	int		ret = DDI_SUCCESS;
8313 	int		errno = 0;
8314 	pm_component_t *cp;
8315 
8316 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8317 	    pm_decode_op(op)))
8318 	switch (op) {
8319 	case BUS_POWER_CHILD_PWRCHG:
8320 		bpc = (pm_bp_child_pwrchg_t *)arg;
8321 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8322 		tdip = bpc->bpc_dip;
8323 		cdip = pm_get_next_descendent(dip, tdip);
8324 		cinfo = PM_GET_PM_INFO(cdip);
8325 		if (cdip != tdip) {
8326 			/*
8327 			 * If the node is an involved parent, it needs to
8328 			 * power up the node as it is needed.  There is nothing
8329 			 * else the framework can do here.
8330 			 */
8331 			if (PM_WANTS_NOTIFICATION(cdip)) {
8332 				PMD(PMD_SET, ("%s: call bus_power for "
8333 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8334 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8335 				    impl_arg, op, arg, result));
8336 			}
8337 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8338 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8339 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8340 			/*
8341 			 * we presume that the parent needs to be up in
8342 			 * order for the child to change state (either
8343 			 * because it must already be on if the child is on
8344 			 * (and the pm_all_to_normal_nexus() will be a nop)
8345 			 * or because it will need to be on for the child
8346 			 * to come on; so we make the call regardless
8347 			 */
8348 			pm_hold_power(cdip);
8349 			if (cinfo) {
8350 				pm_canblock_t canblock = pspm->pspm_canblock;
8351 				ret = pm_all_to_normal_nexus(cdip, canblock);
8352 				if (ret != DDI_SUCCESS) {
8353 					pm_rele_power(cdip);
8354 					return (ret);
8355 				}
8356 			}
8357 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8358 			    PM_DEVICE(cdip)))
8359 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8360 			    result);
8361 			pm_rele_power(cdip);
8362 		} else {
8363 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8364 			    result);
8365 		}
8366 		return (ret);
8367 
8368 	case BUS_POWER_NEXUS_PWRUP:
8369 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8370 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8371 
8372 		if (!e_pm_valid_info(dip, NULL) ||
8373 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8374 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8375 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8376 			    pmf, PM_DEVICE(dip)))
8377 			*pspm->pspm_errnop = EIO;
8378 			*(int *)result = DDI_FAILURE;
8379 			return (DDI_FAILURE);
8380 		}
8381 
8382 		ASSERT(bpn->bpn_dip == dip);
8383 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8384 		    PM_DEVICE(dip)))
8385 		new_bpc.bpc_dip = dip;
8386 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8387 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8388 		new_bpc.bpc_comp = bpn->bpn_comp;
8389 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8390 		new_bpc.bpc_nlevel = bpn->bpn_level;
8391 		new_bpc.bpc_private = bpn->bpn_private;
8392 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8393 		    PM_LEVEL_UPONLY;
8394 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8395 		    &errno;
8396 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8397 		    (void *)&new_bpc, result);
8398 		kmem_free(pathbuf, MAXPATHLEN);
8399 		return (ret);
8400 
8401 	case BUS_POWER_NOINVOL:
8402 		bpi = (pm_bp_noinvol_t *)arg;
8403 		tdip = bpi->bpni_dip;
8404 		cdip = pm_get_next_descendent(dip, tdip);
8405 
8406 		/* In case of rem_drv, the leaf node has been removed */
8407 		if (cdip == NULL)
8408 			return (DDI_SUCCESS);
8409 
8410 		cinfo = PM_GET_PM_INFO(cdip);
8411 		if (cdip != tdip) {
8412 			if (PM_WANTS_NOTIFICATION(cdip)) {
8413 				PMD(PMD_NOINVOL,
8414 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8415 				    pmf, PM_DEVICE(cdip)))
8416 				ret = (*PM_BUS_POWER_FUNC(cdip))
8417 				    (cdip, NULL, op, arg, result);
8418 				if ((cinfo) && (ret == DDI_SUCCESS))
8419 					(void) pm_noinvol_update_node(cdip,
8420 					    bpi);
8421 				return (ret);
8422 			} else {
8423 				PMD(PMD_NOINVOL,
8424 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8425 				    PM_DEVICE(cdip)))
8426 				ret = pm_busop_bus_power(cdip, NULL, op,
8427 				    arg, result);
8428 				/*
8429 				 * Update the current node.
8430 				 */
8431 				if ((cinfo) && (ret == DDI_SUCCESS))
8432 					(void) pm_noinvol_update_node(cdip,
8433 					    bpi);
8434 				return (ret);
8435 			}
8436 		} else {
8437 			/*
8438 			 * For attach, detach, power up:
8439 			 * Do nothing for leaf node since its
8440 			 * counts are already updated.
8441 			 * For CFB and driver removal, since the
8442 			 * path and the target dip passed in is up to and incl.
8443 			 * the immediate ancestor, need to do the update.
8444 			 */
8445 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8446 			    "reached\n", pmf, PM_DEVICE(cdip)))
8447 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8448 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8449 				(void) pm_noinvol_update_node(cdip, bpi);
8450 			return (DDI_SUCCESS);
8451 		}
8452 
8453 	default:
8454 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8455 		return (DDI_FAILURE);
8456 	}
8457 }
8458 
8459 static int
8460 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8461     void *arg, void *resultp)
8462 {
8463 	_NOTE(ARGUNUSED(impl_arg))
8464 	PMD_FUNC(pmf, "bp_set_power")
8465 	pm_ppm_devlist_t *devl;
8466 	int clevel, circ;
8467 #ifdef	DEBUG
8468 	int circ_db, ccirc_db;
8469 #endif
8470 	int ret = DDI_SUCCESS;
8471 	dev_info_t *cdip;
8472 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8473 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8474 	pm_canblock_t canblock = pspm->pspm_canblock;
8475 	int scan = pspm->pspm_scan;
8476 	int comp = bpc->bpc_comp;
8477 	int olevel = bpc->bpc_olevel;
8478 	int nlevel = bpc->bpc_nlevel;
8479 	int comps_off_incr = 0;
8480 	dev_info_t *pdip = ddi_get_parent(dip);
8481 	int dodeps;
8482 	int direction = pspm->pspm_direction;
8483 	int *errnop = pspm->pspm_errnop;
8484 	char *dir = pm_decode_direction(direction);
8485 	int *iresp = (int *)resultp;
8486 	time_t	idletime, thresh;
8487 	pm_component_t *cp = PM_CP(dip, comp);
8488 	int work_type;
8489 
8490 	*iresp = DDI_SUCCESS;
8491 	*errnop = 0;
8492 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8493 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8494 	    pm_decode_op(op)))
8495 
8496 	/*
8497 	 * The following set of conditions indicate we are here to handle a
8498 	 * driver's pm_[raise|lower]_power request, but the device is being
8499 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8500 	 * we want to pm_block and pass a status back to the caller based
8501 	 * on whether the controlling process's next activity on the device
8502 	 * matches the current request or not.  This distinction tells
8503 	 * downstream functions to avoid calling into a driver or changing
8504 	 * the framework's power state.  To actually block, we need:
8505 	 *
8506 	 * PM_ISDIRECT(dip)
8507 	 *	no reason to block unless a process is directly controlling dev
8508 	 * direction != PM_LEVEL_EXACT
8509 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8510 	 * !pm_processes_stopped
8511 	 *	don't block if controlling proc already be stopped for cpr
8512 	 * canblock != PM_CANBLOCK_BYPASS
8513 	 *	our caller must not have explicitly prevented blocking
8514 	 */
8515 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8516 		PM_LOCK_DIP(dip);
8517 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8518 			/* releases dip lock */
8519 			ret = pm_busop_match_request(dip, bpc);
8520 			if (ret == EAGAIN) {
8521 				PM_LOCK_DIP(dip);
8522 				continue;
8523 			}
8524 			return (*iresp = ret);
8525 		}
8526 		PM_UNLOCK_DIP(dip);
8527 	}
8528 	/* BC device is never scanned, so power will stick until we are done */
8529 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8530 	    direction != PM_LEVEL_DOWNONLY) {
8531 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8532 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8533 		    canblock, 0, resultp) != DDI_SUCCESS) {
8534 			/* *resultp set by pm_set_power */
8535 			return (DDI_FAILURE);
8536 		}
8537 	}
8538 	if (PM_WANTS_NOTIFICATION(pdip)) {
8539 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8540 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8541 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8542 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8543 		if (ret != DDI_SUCCESS) {
8544 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8545 			    pmf, PM_DEVICE(pdip)))
8546 			return (DDI_FAILURE);
8547 		}
8548 	} else {
8549 		/*
8550 		 * Since we don't know what the actual power level is,
8551 		 * we place a power hold on the parent no matter what
8552 		 * component and level is changing.
8553 		 */
8554 		pm_hold_power(pdip);
8555 	}
8556 	PM_LOCK_POWER(dip, &circ);
8557 	clevel = PM_CURPOWER(dip, comp);
8558 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8559 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8560 	    clevel, dir))
8561 	switch (direction) {
8562 	case PM_LEVEL_UPONLY:
8563 		/* Powering up */
8564 		if (clevel >= nlevel) {
8565 			PMD(PMD_SET, ("%s: current level is already "
8566 			    "at or above the requested level.\n", pmf))
8567 			*iresp = DDI_SUCCESS;
8568 			ret = DDI_SUCCESS;
8569 			goto post_notify;
8570 		}
8571 		break;
8572 	case PM_LEVEL_EXACT:
8573 		/* specific level request */
8574 		if (clevel == nlevel && !PM_ISBC(dip)) {
8575 			PMD(PMD_SET, ("%s: current level is already "
8576 			    "at the requested level.\n", pmf))
8577 			*iresp = DDI_SUCCESS;
8578 			ret = DDI_SUCCESS;
8579 			goto post_notify;
8580 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8581 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8582 			if (!pm_cfb_enabled) {
8583 				PMD(PMD_ERROR | PMD_CFB,
8584 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8585 				*errnop = EINVAL;
8586 				*iresp = DDI_FAILURE;
8587 				ret = DDI_FAILURE;
8588 				goto post_notify;
8589 			}
8590 			mutex_enter(&pm_cfb_lock);
8591 			while (cfb_inuse) {
8592 				mutex_exit(&pm_cfb_lock);
8593 				if (delay_sig(1) == EINTR) {
8594 					ret = DDI_FAILURE;
8595 					*iresp = DDI_FAILURE;
8596 					*errnop = EINTR;
8597 					goto post_notify;
8598 				}
8599 				mutex_enter(&pm_cfb_lock);
8600 			}
8601 			mutex_exit(&pm_cfb_lock);
8602 		}
8603 		break;
8604 	case PM_LEVEL_DOWNONLY:
8605 		/* Powering down */
8606 		thresh = cur_threshold(dip, comp);
8607 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8608 		if (scan && ((PM_KUC(dip) != 0) ||
8609 		    (cp->pmc_busycount > 0) ||
8610 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8611 #ifdef	DEBUG
8612 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8613 				PMD(PMD_SET, ("%s: scan failed: "
8614 				    "kidsupcnt != 0\n", pmf))
8615 			if (cp->pmc_busycount > 0)
8616 				PMD(PMD_SET, ("%s: scan failed: "
8617 				    "device become busy\n", pmf))
8618 			if (idletime < thresh)
8619 				PMD(PMD_SET, ("%s: scan failed: device "
8620 				    "hasn't been idle long enough\n", pmf))
8621 #endif
8622 			*iresp = DDI_FAILURE;
8623 			*errnop = EBUSY;
8624 			ret = DDI_FAILURE;
8625 			goto post_notify;
8626 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8627 			PMD(PMD_SET, ("%s: current level is already at "
8628 			    "or below the requested level.\n", pmf))
8629 			*iresp = DDI_SUCCESS;
8630 			ret = DDI_SUCCESS;
8631 			goto post_notify;
8632 		}
8633 		break;
8634 	}
8635 
8636 	if (PM_IS_CFB(dip) && (comps_off_incr =
8637 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8638 		/*
8639 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8640 		 * component from full power.  Remember that we tried to
8641 		 * lower power in case it fails and we need to back out
8642 		 * the adjustment.
8643 		 */
8644 		update_comps_off(comps_off_incr, dip);
8645 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8646 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8647 		    pm_cfb_comps_off))
8648 	}
8649 
8650 	if ((*iresp = power_dev(dip,
8651 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8652 #ifdef DEBUG
8653 		/*
8654 		 * All descendents of this node should already be powered off.
8655 		 */
8656 		if (PM_CURPOWER(dip, comp) == 0) {
8657 			pm_desc_pwrchk_t pdpchk;
8658 			pdpchk.pdpc_dip = dip;
8659 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8660 			ndi_devi_enter(dip, &circ_db);
8661 			for (cdip = ddi_get_child(dip); cdip != NULL;
8662 			    cdip = ddi_get_next_sibling(cdip)) {
8663 				ndi_devi_enter(cdip, &ccirc_db);
8664 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8665 				    (void *)&pdpchk);
8666 				ndi_devi_exit(cdip, ccirc_db);
8667 			}
8668 			ndi_devi_exit(dip, circ_db);
8669 		}
8670 #endif
8671 		/*
8672 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8673 		 * back up to full power.
8674 		 */
8675 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8676 			update_comps_off(comps_off_incr, dip);
8677 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8678 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8679 			    comp, clevel, nlevel, pm_cfb_comps_off))
8680 		}
8681 		dodeps = 0;
8682 		if (POWERING_OFF(clevel, nlevel)) {
8683 			if (PM_ISBC(dip)) {
8684 				dodeps = (comp == 0);
8685 			} else {
8686 				int i;
8687 				dodeps = 1;
8688 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8689 					/* if some component still on */
8690 					if (PM_CURPOWER(dip, i)) {
8691 						dodeps = 0;
8692 						break;
8693 					}
8694 				}
8695 			}
8696 			if (dodeps)
8697 				work_type = PM_DEP_WK_POWER_OFF;
8698 		} else if (POWERING_ON(clevel, nlevel)) {
8699 			if (PM_ISBC(dip)) {
8700 				dodeps = (comp == 0);
8701 			} else {
8702 				int i;
8703 				dodeps = 1;
8704 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8705 					if (i == comp)
8706 						continue;
8707 					if (PM_CURPOWER(dip, i) > 0) {
8708 						dodeps = 0;
8709 						break;
8710 					}
8711 				}
8712 			}
8713 			if (dodeps)
8714 				work_type = PM_DEP_WK_POWER_ON;
8715 		}
8716 
8717 		if (dodeps) {
8718 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8719 
8720 			(void) ddi_pathname(dip, pathbuf);
8721 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8722 			    PM_DEP_NOWAIT, NULL, 0);
8723 			kmem_free(pathbuf, MAXPATHLEN);
8724 		}
8725 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8726 			int old;
8727 
8728 			/* If old power cached during deadlock, use it. */
8729 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8730 			    cp->pmc_phc_pwr : olevel);
8731 			mutex_enter(&pm_rsvp_lock);
8732 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8733 			    old, canblock);
8734 			pm_enqueue_notify_others(&devl, canblock);
8735 			mutex_exit(&pm_rsvp_lock);
8736 		}
8737 
8738 		/*
8739 		 * If we are coming from a scan, don't do it again,
8740 		 * else we can have infinite loops.
8741 		 */
8742 		if (!scan)
8743 			pm_rescan(dip);
8744 	} else {
8745 		/* if we incremented pm_comps_off_count, but failed */
8746 		if (comps_off_incr > 0) {
8747 			update_comps_off(-comps_off_incr, dip);
8748 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8749 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8750 			    comp, clevel, nlevel, pm_cfb_comps_off))
8751 		}
8752 		*errnop = EIO;
8753 	}
8754 
8755 post_notify:
8756 	/*
8757 	 * This thread may have been in deadlock with pm_power_has_changed.
8758 	 * Before releasing power lock, clear the flag which marks this
8759 	 * condition.
8760 	 */
8761 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8762 
8763 	/*
8764 	 * Update the old power level in the bus power structure with the
8765 	 * actual power level before the transition was made to the new level.
8766 	 * Some involved parents depend on this information to keep track of
8767 	 * their children's power transition.
8768 	 */
8769 	if (*iresp != DDI_FAILURE)
8770 		bpc->bpc_olevel = clevel;
8771 
8772 	if (PM_WANTS_NOTIFICATION(pdip)) {
8773 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8774 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8775 		PM_UNLOCK_POWER(dip, circ);
8776 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8777 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8778 		    PM_DEVICE(dip), ret))
8779 	} else {
8780 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8781 		PM_UNLOCK_POWER(dip, circ);
8782 		/*
8783 		 * Now that we know what power transition has occurred
8784 		 * (if any), release the power hold.  Leave the hold
8785 		 * in effect in the case of OFF->ON transition.
8786 		 */
8787 		if (!(clevel == 0 && nlevel > 0 &&
8788 		    (!PM_ISBC(dip) || comp == 0)))
8789 			pm_rele_power(pdip);
8790 		/*
8791 		 * If the power transition was an ON->OFF transition,
8792 		 * remove the power hold from the parent.
8793 		 */
8794 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8795 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8796 			pm_rele_power(pdip);
8797 	}
8798 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8799 		return (DDI_FAILURE);
8800 	else
8801 		return (DDI_SUCCESS);
8802 }
8803 
8804 /*
8805  * If an app (SunVTS or Xsun) has taken control, then block until it
8806  * gives it up or makes the requested power level change, unless
8807  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8808  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8809  */
8810 static int
8811 pm_busop_match_request(dev_info_t *dip, void *arg)
8812 {
8813 	PMD_FUNC(pmf, "bp_match_request")
8814 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8815 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8816 	int comp = bpc->bpc_comp;
8817 	int nlevel = bpc->bpc_nlevel;
8818 	pm_canblock_t canblock = pspm->pspm_canblock;
8819 	int direction = pspm->pspm_direction;
8820 	int clevel, circ;
8821 
8822 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8823 	PM_LOCK_POWER(dip, &circ);
8824 	clevel = PM_CURPOWER(dip, comp);
8825 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8826 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8827 	if (direction == PM_LEVEL_UPONLY) {
8828 		if (clevel >= nlevel) {
8829 			PM_UNLOCK_POWER(dip, circ);
8830 			PM_UNLOCK_DIP(dip);
8831 			return (DDI_SUCCESS);
8832 		}
8833 	} else if (clevel == nlevel) {
8834 		PM_UNLOCK_POWER(dip, circ);
8835 		PM_UNLOCK_DIP(dip);
8836 		return (DDI_SUCCESS);
8837 	}
8838 	if (canblock == PM_CANBLOCK_FAIL) {
8839 		PM_UNLOCK_POWER(dip, circ);
8840 		PM_UNLOCK_DIP(dip);
8841 		return (DDI_FAILURE);
8842 	}
8843 	if (canblock == PM_CANBLOCK_BLOCK) {
8844 		/*
8845 		 * To avoid a deadlock, we must not hold the
8846 		 * power lock when we pm_block.
8847 		 */
8848 		PM_UNLOCK_POWER(dip, circ);
8849 		PMD(PMD_SET, ("%s: blocking\n", pmf))
8850 		    /* pm_block releases dip lock */
8851 		    switch (pm_block(dip, comp, nlevel, clevel)) {
8852 		    case PMP_RELEASE:
8853 				return (EAGAIN);
8854 		    case PMP_SUCCEED:
8855 				return (DDI_SUCCESS);
8856 		    case PMP_FAIL:
8857 				return (DDI_FAILURE);
8858 		    }
8859 	} else {
8860 		ASSERT(0);
8861 	}
8862 	_NOTE(NOTREACHED);
8863 	return (DDI_FAILURE);	/* keep gcc happy */
8864 }
8865 
8866 static int
8867 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
8868 {
8869 	PMD_FUNC(pmf, "all_to_normal_nexus")
8870 	int		*normal;
8871 	int		i, ncomps;
8872 	size_t		size;
8873 	int		changefailed = 0;
8874 	int		ret, result = DDI_SUCCESS;
8875 	pm_bp_nexus_pwrup_t	bpn;
8876 	pm_sp_misc_t	pspm;
8877 
8878 	ASSERT(PM_GET_PM_INFO(dip));
8879 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8880 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
8881 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
8882 		return (DDI_FAILURE);
8883 	}
8884 	ncomps = PM_NUMCMPTS(dip);
8885 	for (i = 0; i < ncomps; i++) {
8886 		bpn.bpn_dip = dip;
8887 		bpn.bpn_comp = i;
8888 		bpn.bpn_level = normal[i];
8889 		pspm.pspm_canblock = canblock;
8890 		pspm.pspm_scan = 0;
8891 		bpn.bpn_private = &pspm;
8892 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
8893 		    (void *)&bpn, (void *)&result);
8894 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
8895 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
8896 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
8897 			    i, normal[i], result))
8898 			changefailed++;
8899 		}
8900 	}
8901 	kmem_free(normal, size);
8902 	if (changefailed) {
8903 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
8904 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
8905 		return (DDI_FAILURE);
8906 	}
8907 	return (DDI_SUCCESS);
8908 }
8909 
8910 int
8911 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
8912     dev_info_t *tdip)
8913 {
8914 	PMD_FUNC(pmf, "noinvol_update")
8915 	pm_bp_noinvol_t args;
8916 	int ret;
8917 	int result = DDI_SUCCESS;
8918 
8919 	args.bpni_path = path;
8920 	args.bpni_dip = tdip;
8921 	args.bpni_cmd = subcmd;
8922 	args.bpni_wasvolpmd = wasvolpmd;
8923 	args.bpni_volpmd = volpmd;
8924 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
8925 	    "volpmd %d wasvolpmd %d\n", pmf,
8926 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
8927 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
8928 	    &args, &result);
8929 	return (ret);
8930 }
8931 
8932 void
8933 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
8934 {
8935 	PMD_FUNC(pmf, "noinvol_update_node")
8936 
8937 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8938 	switch (req->bpni_cmd) {
8939 	case PM_BP_NOINVOL_ATTACH:
8940 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
8941 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8942 		    DEVI(dip)->devi_pm_noinvolpm,
8943 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8944 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8945 		PM_LOCK_DIP(dip);
8946 		DEVI(dip)->devi_pm_noinvolpm--;
8947 		if (req->bpni_wasvolpmd) {
8948 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
8949 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8950 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8951 			    DEVI(dip)->devi_pm_volpmd - 1))
8952 			if (DEVI(dip)->devi_pm_volpmd)
8953 				DEVI(dip)->devi_pm_volpmd--;
8954 		}
8955 		PM_UNLOCK_DIP(dip);
8956 		break;
8957 
8958 	case PM_BP_NOINVOL_DETACH:
8959 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
8960 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
8961 		    DEVI(dip)->devi_pm_noinvolpm,
8962 		    DEVI(dip)->devi_pm_noinvolpm + 1))
8963 		PM_LOCK_DIP(dip);
8964 		DEVI(dip)->devi_pm_noinvolpm++;
8965 		if (req->bpni_wasvolpmd) {
8966 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
8967 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8968 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8969 			    DEVI(dip)->devi_pm_volpmd + 1))
8970 			DEVI(dip)->devi_pm_volpmd++;
8971 		}
8972 		PM_UNLOCK_DIP(dip);
8973 		break;
8974 
8975 	case PM_BP_NOINVOL_REMDRV:
8976 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8977 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8978 		    DEVI(dip)->devi_pm_noinvolpm,
8979 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8980 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8981 		PM_LOCK_DIP(dip);
8982 		DEVI(dip)->devi_pm_noinvolpm--;
8983 		if (req->bpni_wasvolpmd) {
8984 			PMD(PMD_NOINVOL,
8985 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8986 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
8987 			    DEVI(dip)->devi_pm_volpmd,
8988 			    DEVI(dip)->devi_pm_volpmd - 1))
8989 			/*
8990 			 * A power up could come in between and
8991 			 * clear the volpmd, if that's the case,
8992 			 * volpmd would be clear.
8993 			 */
8994 			if (DEVI(dip)->devi_pm_volpmd)
8995 				DEVI(dip)->devi_pm_volpmd--;
8996 		}
8997 		PM_UNLOCK_DIP(dip);
8998 		break;
8999 
9000 	case PM_BP_NOINVOL_CFB:
9001 		PMD(PMD_NOINVOL,
9002 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9003 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9004 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9005 		PM_LOCK_DIP(dip);
9006 		DEVI(dip)->devi_pm_noinvolpm++;
9007 		PM_UNLOCK_DIP(dip);
9008 		break;
9009 
9010 	case PM_BP_NOINVOL_POWER:
9011 		PMD(PMD_NOINVOL,
9012 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9013 		    pmf, PM_DEVICE(dip),
9014 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9015 		    req->bpni_volpmd))
9016 		PM_LOCK_DIP(dip);
9017 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9018 		PM_UNLOCK_DIP(dip);
9019 		break;
9020 
9021 	default:
9022 		break;
9023 	}
9024 
9025 }
9026 
9027 #ifdef DEBUG
9028 static int
9029 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9030 {
9031 	PMD_FUNC(pmf, "desc_pwrchk")
9032 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9033 	pm_info_t *info = PM_GET_PM_INFO(dip);
9034 	int i, curpwr, ce_level;
9035 
9036 	if (!info)
9037 		return (DDI_WALK_CONTINUE);
9038 
9039 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9040 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9041 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9042 			continue;
9043 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9044 		    CE_WARN;
9045 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9046 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9047 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9048 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9049 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9050 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9051 	}
9052 	return (DDI_WALK_CONTINUE);
9053 }
9054 #endif
9055 
9056 /*
9057  * Record the fact that one thread is borrowing the lock on a device node.
9058  * Use is restricted to the case where the lending thread will block until
9059  * the borrowing thread (always curthread) completes.
9060  */
9061 void
9062 pm_borrow_lock(kthread_t *lender)
9063 {
9064 	lock_loan_t *prev = &lock_loan_head;
9065 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9066 
9067 	cur->pmlk_borrower = curthread;
9068 	cur->pmlk_lender = lender;
9069 	mutex_enter(&pm_loan_lock);
9070 	cur->pmlk_next = prev->pmlk_next;
9071 	prev->pmlk_next = cur;
9072 	mutex_exit(&pm_loan_lock);
9073 }
9074 
9075 /*
9076  * Return the borrowed lock.  A thread can borrow only one.
9077  */
9078 void
9079 pm_return_lock(void)
9080 {
9081 	lock_loan_t *cur;
9082 	lock_loan_t *prev = &lock_loan_head;
9083 
9084 	mutex_enter(&pm_loan_lock);
9085 	ASSERT(prev->pmlk_next != NULL);
9086 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9087 		if (cur->pmlk_borrower == curthread)
9088 			break;
9089 
9090 	ASSERT(cur != NULL);
9091 	prev->pmlk_next = cur->pmlk_next;
9092 	mutex_exit(&pm_loan_lock);
9093 	kmem_free(cur, sizeof (*cur));
9094 }
9095